TR-mbed 1.0
Loading...
Searching...
No Matches
Parallelizer.h
Go to the documentation of this file.
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_PARALLELIZER_H
11#define EIGEN_PARALLELIZER_H
12
13#if EIGEN_HAS_CXX11_ATOMIC
14#include <atomic>
15#endif
16
17namespace Eigen {
18
19namespace internal {
20
23{
24 static int m_maxThreads = -1;
26
27 if(action==SetAction)
28 {
30 m_maxThreads = *v;
31 }
32 else if(action==GetAction)
33 {
35 #ifdef EIGEN_HAS_OPENMP
36 if(m_maxThreads>0)
37 *v = m_maxThreads;
38 else
40 #else
41 *v = 1;
42 #endif
43 }
44 else
45 {
47 }
48}
49
50}
51
53inline void initParallel()
54{
55 int nbt;
57 std::ptrdiff_t l1, l2, l3;
59}
60
63inline int nbThreads()
64{
65 int ret;
67 return ret;
68}
69
76
77namespace internal {
78
79template<typename Index> struct GemmParallelInfo
80{
82
83 // volatile is not enough on all architectures (see bug 1572)
84 // to guarantee that when thread A says to thread B that it is
85 // done with packing a block, then all writes have been really
86 // carried out... C++11 memory model+atomic guarantees this.
87#if EIGEN_HAS_CXX11_ATOMIC
88 std::atomic<Index> sync;
89 std::atomic<int> users;
90#else
91 Index volatile sync;
92 int volatile users;
93#endif
94
97};
98
99template<bool Condition, typename Functor, typename Index>
100void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
101{
102 // TODO when EIGEN_USE_BLAS is defined,
103 // we should still enable OMP for other scalar types
104 // Without C++11, we have to disable GEMM's parallelization on
105 // non x86 architectures because there volatile is not enough for our purpose.
106 // See bug 1572.
107#if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS) || ((!EIGEN_HAS_CXX11_ATOMIC) && !(EIGEN_ARCH_i386_OR_x86_64))
108 // FIXME the transpose variable is only needed to properly split
109 // the matrix product when multithreading is enabled. This is a temporary
110 // fix to support row-major destination matrices. This whole
111 // parallelizer mechanism has to be redesigned anyway.
113 EIGEN_UNUSED_VARIABLE(transpose);
114 func(0,rows, 0,cols);
115#else
116
117 // Dynamically check whether we should enable or disable OpenMP.
118 // The conditions are:
119 // - the max number of threads we can create is greater than 1
120 // - we are not already in a parallel code
121 // - the sizes are large enough
122
123 // compute the maximal number of threads from the size of the product:
124 // This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once.
125 Index size = transpose ? rows : cols;
126 Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
127
128 // compute the maximal number of threads from the total amount of work:
129 double work = static_cast<double>(rows) * static_cast<double>(cols) *
130 static_cast<double>(depth);
131 double kMinTaskSize = 50000; // FIXME improve this heuristic.
132 pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, static_cast<Index>( work / kMinTaskSize ) ));
133
134 // compute the number of threads we are going to use
135 Index threads = std::min<Index>(nbThreads(), pb_max_threads);
136
137 // if multi-threading is explicitly disabled, not useful, or if we already are in a parallel session,
138 // then abort multi-threading
139 // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
140 if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
141 return func(0,rows, 0,cols);
142
144 func.initParallelSession(threads);
145
146 if(transpose)
147 std::swap(rows,cols);
148
150
151 #pragma omp parallel num_threads(threads)
152 {
154 // Note that the actual number of threads might be lower than the number of request ones.
156
157 Index blockCols = (cols / actual_threads) & ~Index(0x3);
158 Index blockRows = (rows / actual_threads);
159 blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
160
161 Index r0 = i*blockRows;
162 Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
163
164 Index c0 = i*blockCols;
165 Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
166
167 info[i].lhs_start = r0;
168 info[i].lhs_length = actualBlockRows;
169
170 if(transpose) func(c0, actualBlockCols, 0, rows, info);
171 else func(0, rows, c0, actualBlockCols, info);
172 }
173#endif
174}
175
176} // end namespace internal
177
178} // end namespace Eigen
179
180#endif // EIGEN_PARALLELIZER_H
Array< int, Dynamic, 1 > v
Definition Array_initializer_list_vector_cxx11.cpp:1
int i
Definition BiCGSTAB_step_by_step.cpp:9
#define eigen_internal_assert(x)
Definition Macros.h:1043
#define EIGEN_UNUSED_VARIABLE(var)
Definition Macros.h:1076
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition Memory.h:768
int rows
Definition Tutorial_commainit_02.cpp:1
int cols
Definition Tutorial_commainit_02.cpp:1
Scalar Scalar int size
Definition benchVecAdd.cpp:17
else if n * info
Definition cholesky.cpp:18
DenseIndex ret
Definition level1_cplx_impl.h:44
void parallelize_gemm(const Functor &func, Index rows, Index cols, Index depth, bool transpose)
Definition Parallelizer.h:100
void manage_multi_threading(Action action, int *v)
Definition Parallelizer.h:22
void manage_caching_sizes(Action action, std::ptrdiff_t *l1, std::ptrdiff_t *l2, std::ptrdiff_t *l3)
Definition GeneralBlockPanelKernel.h:86
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
Action
Definition Constants.h:504
@ GetAction
Definition Constants.h:504
@ SetAction
Definition Constants.h:504
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
void initParallel()
Definition Parallelizer.h:53
int nbThreads()
Definition Parallelizer.h:63
void setNbThreads(int v)
Definition Parallelizer.h:72
Definition BandTriangularSolver.h:13
Definition Parallelizer.h:80
GemmParallelInfo()
Definition Parallelizer.h:81
Index lhs_length
Definition Parallelizer.h:96
Index lhs_start
Definition Parallelizer.h:95
Index volatile sync
Definition Parallelizer.h:91
int volatile users
Definition Parallelizer.h:92
Definition ForwardDeclarations.h:17
Definition NonLinearOptimization.cpp:118
Definition benchGeometry.cpp:23