Parallelizer.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // Eigen is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 3 of the License, or (at your option) any later version.
10 //
11 // Alternatively, you can redistribute it and/or
12 // modify it under the terms of the GNU General Public License as
13 // published by the Free Software Foundation; either version 2 of
14 // the License, or (at your option) any later version.
15 //
16 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
17 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License and a copy of the GNU General Public License along with
23 // Eigen. If not, see <http://www.gnu.org/licenses/>.
24 
25 #ifndef EIGEN_PARALLELIZER_H
26 #define EIGEN_PARALLELIZER_H
27 
28 namespace Eigen {
29 
30 namespace internal {
31 
33 inline void manage_multi_threading(Action action, int* v)
34 {
35  static EIGEN_UNUSED int m_maxThreads = -1;
36 
37  if(action==SetAction)
38  {
40  m_maxThreads = *v;
41  }
42  else if(action==GetAction)
43  {
45  #ifdef EIGEN_HAS_OPENMP
46  if(m_maxThreads>0)
47  *v = m_maxThreads;
48  else
49  *v = omp_get_max_threads();
50  #else
51  *v = 1;
52  #endif
53  }
54  else
55  {
56  eigen_internal_assert(false);
57  }
58 }
59 
60 }
61 
63 inline void initParallel()
64 {
65  int nbt;
67  std::ptrdiff_t l1, l2;
69 }
70 
73 inline int nbThreads()
74 {
75  int ret;
77  return ret;
78 }
79 
82 inline void setNbThreads(int v)
83 {
85 }
86 
87 namespace internal {
88 
89 template<typename Index> struct GemmParallelInfo
90 {
91  GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
92 
93  int volatile sync;
94  int volatile users;
95 
96  Index rhs_start;
97  Index rhs_length;
98 };
99 
100 template<bool Condition, typename Functor, typename Index>
101 void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
102 {
103  // TODO when EIGEN_USE_BLAS is defined,
104  // we should still enable OMP for other scalar types
105 #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
106  // FIXME the transpose variable is only needed to properly split
107  // the matrix product when multithreading is enabled. This is a temporary
108  // fix to support row-major destination matrices. This whole
109  // parallelizer mechanism has to be redisigned anyway.
110  EIGEN_UNUSED_VARIABLE(transpose);
111  func(0,rows, 0,cols);
112 #else
113 
114  // Dynamically check whether we should enable or disable OpenMP.
115  // The conditions are:
116  // - the max number of threads we can create is greater than 1
117  // - we are not already in a parallel code
118  // - the sizes are large enough
119 
120  // 1- are we already in a parallel session?
121  // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
122  if((!Condition) || (omp_get_num_threads()>1))
123  return func(0,rows, 0,cols);
124 
125  Index size = transpose ? cols : rows;
126 
127  // 2- compute the maximal number of threads from the size of the product:
128  // FIXME this has to be fine tuned
129  Index max_threads = std::max<Index>(1,size / 32);
130 
131  // 3 - compute the number of threads we are going to use
132  Index threads = std::min<Index>(nbThreads(), max_threads);
133 
134  if(threads==1)
135  return func(0,rows, 0,cols);
136 
138  func.initParallelSession();
139 
140  if(transpose)
141  std::swap(rows,cols);
142 
143  Index blockCols = (cols / threads) & ~Index(0x3);
144  Index blockRows = (rows / threads) & ~Index(0x7);
145 
146  GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
147 
148  #pragma omp parallel for schedule(static,1) num_threads(threads)
149  for(Index i=0; i<threads; ++i)
150  {
151  Index r0 = i*blockRows;
152  Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
153 
154  Index c0 = i*blockCols;
155  Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
156 
157  info[i].rhs_start = c0;
158  info[i].rhs_length = actualBlockCols;
159 
160  if(transpose)
161  func(0, cols, r0, actualBlockRows, info);
162  else
163  func(r0, actualBlockRows, 0,cols, info);
164  }
165 
166  delete[] info;
167 #endif
168 }
169 
170 } // end namespace internal
171 
172 } // end namespace Eigen
173 
174 #endif // EIGEN_PARALLELIZER_H