TR-mbed 1.0
Loading...
Searching...
No Matches
TriangularMatrixMatrix.h
Go to the documentation of this file.
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
11#define EIGEN_TRIANGULAR_MATRIX_MATRIX_H
12
13namespace Eigen {
14
15namespace internal {
16
17// template<typename Scalar, int mr, int StorageOrder, bool Conjugate, int Mode>
18// struct gemm_pack_lhs_triangular
19// {
20// Matrix<Scalar,mr,mr,
21// void operator()(Scalar* blockA, const EIGEN_RESTRICT Scalar* _lhs, int lhsStride, int depth, int rows)
22// {
23// conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
24// const_blas_data_mapper<Scalar, StorageOrder> lhs(_lhs,lhsStride);
25// int count = 0;
26// const int peeled_mc = (rows/mr)*mr;
27// for(int i=0; i<peeled_mc; i+=mr)
28// {
29// for(int k=0; k<depth; k++)
30// for(int w=0; w<mr; w++)
31// blockA[count++] = cj(lhs(i+w, k));
32// }
33// for(int i=peeled_mc; i<rows; i++)
34// {
35// for(int k=0; k<depth; k++)
36// blockA[count++] = cj(lhs(i, k));
37// }
38// }
39// };
40
41/* Optimized triangular matrix * matrix (_TRMM++) product built on top of
42 * the general matrix matrix product.
43 */
44template <typename Scalar, typename Index,
45 int Mode, bool LhsIsTriangular,
46 int LhsStorageOrder, bool ConjugateLhs,
47 int RhsStorageOrder, bool ConjugateRhs,
48 int ResStorageOrder, int ResInnerStride,
49 int Version = Specialized>
51
52template <typename Scalar, typename Index,
53 int Mode, bool LhsIsTriangular,
56 int ResInnerStride, int Version>
79
80// implements col-major += alpha * op(triangular) * op(general)
81template <typename Scalar, typename Index, int Mode,
82 int LhsStorageOrder, bool ConjugateLhs,
83 int RhsStorageOrder, bool ConjugateRhs,
84 int ResInnerStride, int Version>
88{
89
91 enum {
92 SmallPanelWidth = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
93 IsLower = (Mode&Lower) == Lower,
94 SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
95 };
96
97 static EIGEN_DONT_INLINE void run(
99 const Scalar* _lhs, Index lhsStride,
100 const Scalar* _rhs, Index rhsStride,
103};
104
105template <typename Scalar, typename Index, int Mode,
108 int ResInnerStride, int Version>
113 const Scalar* _lhs, Index lhsStride,
114 const Scalar* _rhs, Index rhsStride,
117 {
118 // strip zeros
119 Index diagSize = (std::min)(_rows,_depth);
120 Index rows = IsLower ? _rows : diagSize;
121 Index depth = IsLower ? diagSize : _depth;
122 Index cols = _cols;
123
130
131 Index kc = blocking.kc(); // cache block size along the K direction
132 Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
133 // The small panel size must not be larger than blocking size.
134 // Usually this should never be the case because SmallPanelWidth^2 is very small
135 // compared to L2 cache size, but let's be safe:
136 Index panelWidth = (std::min)(Index(SmallPanelWidth),(std::min)(kc,mc));
137
138 std::size_t sizeA = kc*mc;
139 std::size_t sizeB = kc*cols;
140
143
144 // To work around an "error: member reference base type 'Matrix<...>
145 // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
146 // not a structure or union" compilation error in nvcc (tested V8.0.61),
147 // create a dummy internal::constructor_without_unaligned_array_assert
148 // object to pass to the Matrix constructor.
151 triangularBuffer.setZero();
152 if((Mode&ZeroDiag)==ZeroDiag)
153 triangularBuffer.diagonal().setZero();
154 else
155 triangularBuffer.diagonal().setOnes();
156
160
161 for(Index k2=IsLower ? depth : 0;
162 IsLower ? k2>0 : k2<depth;
163 IsLower ? k2-=kc : k2+=kc)
164 {
165 Index actual_kc = (std::min)(IsLower ? k2 : depth-k2, kc);
166 Index actual_k2 = IsLower ? k2-actual_kc : k2;
167
168 // align blocks with the end of the triangular part for trapezoidal lhs
169 if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows))
170 {
171 actual_kc = rows-k2;
172 k2 = k2+actual_kc-kc;
173 }
174
175 pack_rhs(blockB, rhs.getSubMapper(actual_k2,0), actual_kc, cols);
176
177 // the selected lhs's panel has to be split in three different parts:
178 // 1 - the part which is zero => skip it
179 // 2 - the diagonal block => special kernel
180 // 3 - the dense panel below (lower case) or above (upper case) the diagonal block => GEPP
181
182 // the block diagonal, if any:
183 if(IsLower || actual_k2<rows)
184 {
185 // for each small vertical panels of lhs
186 for (Index k1=0; k1<actual_kc; k1+=panelWidth)
187 {
188 Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth);
192
193 // => GEBP with the micro triangular block
194 // The trick is to pack this micro block while filling the opposite triangular part with zeros.
195 // To this end we do an extra triangular copy to a small temporary buffer
196 for (Index k=0;k<actualPanelWidth;++k)
197 {
198 if (SetDiag)
199 triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k);
200 for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
201 triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
202 }
204
205 gebp_kernel(res.getSubMapper(startBlock, 0), blockA, blockB,
208
209 // GEBP with remaining micro panel
210 if (lengthTarget>0)
211 {
213
214 pack_lhs(blockA, lhs.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);
215
216 gebp_kernel(res.getSubMapper(startTarget, 0), blockA, blockB,
219 }
220 }
221 }
222 // the part below (lower case) or above (upper case) the diagonal => GEPP
223 {
224 Index start = IsLower ? k2 : 0;
225 Index end = IsLower ? rows : (std::min)(actual_k2,rows);
226 for(Index i2=start; i2<end; i2+=mc)
227 {
228 const Index actual_mc = (std::min)(i2+mc,end)-i2;
230 (blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
231
232 gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc,
233 actual_kc, cols, alpha, -1, -1, 0, 0);
234 }
235 }
236 }
237 }
238
239// implements col-major += alpha * op(general) * op(triangular)
240template <typename Scalar, typename Index, int Mode,
243 int ResInnerStride, int Version>
247{
249 enum {
250 SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
251 IsLower = (Mode&Lower) == Lower,
252 SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
253 };
254
255 static EIGEN_DONT_INLINE void run(
257 const Scalar* _lhs, Index lhsStride,
258 const Scalar* _rhs, Index rhsStride,
261};
262
263template <typename Scalar, typename Index, int Mode,
266 int ResInnerStride, int Version>
271 const Scalar* _lhs, Index lhsStride,
272 const Scalar* _rhs, Index rhsStride,
275 {
277 // strip zeros
278 Index diagSize = (std::min)(_cols,_depth);
279 Index rows = _rows;
280 Index depth = IsLower ? _depth : diagSize;
281 Index cols = IsLower ? diagSize : _cols;
282
289
290 Index kc = blocking.kc(); // cache block size along the K direction
291 Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
292
293 std::size_t sizeA = kc*mc;
294 std::size_t sizeB = kc*cols+EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar);
295
298
301 triangularBuffer.setZero();
302 if((Mode&ZeroDiag)==ZeroDiag)
303 triangularBuffer.diagonal().setZero();
304 else
305 triangularBuffer.diagonal().setOnes();
306
311
312 for(Index k2=IsLower ? 0 : depth;
313 IsLower ? k2<depth : k2>0;
314 IsLower ? k2+=kc : k2-=kc)
315 {
316 Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc);
317 Index actual_k2 = IsLower ? k2 : k2-actual_kc;
318
319 // align blocks with the end of the triangular part for trapezoidal rhs
320 if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols))
321 {
322 actual_kc = cols-k2;
323 k2 = actual_k2 + actual_kc - kc;
324 }
325
326 // remaining size
327 Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2;
328 // size of the triangular part
329 Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
330
331 Scalar* geb = blockB+ts*ts;
333
334 pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);
335
336 // pack the triangular part of the rhs padding the unrolled blocks with zeros
337 if(ts>0)
338 {
339 for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
340 {
341 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
343 Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
345 // general part
347 rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
350
351 // append the triangular part via a temporary buffer
352 for (Index j=0;j<actualPanelWidth;++j)
353 {
354 if (SetDiag)
355 triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);
356 for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
357 triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
358 }
359
361 RhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()),
363 actual_kc, j2);
364 }
365 }
366
367 for (Index i2=0; i2<rows; i2+=mc)
368 {
369 const Index actual_mc = (std::min)(mc,rows-i2);
370 pack_lhs(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
371
372 // triangular kernel
373 if(ts>0)
374 {
375 for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
376 {
377 Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
379 Index blockOffset = IsLower ? j2 : 0;
380
381 gebp_kernel(res.getSubMapper(i2, actual_k2 + j2),
382 blockA, blockB+j2*actual_kc,
384 alpha,
385 actual_kc, actual_kc, // strides
386 blockOffset, blockOffset);// offsets
387 }
388 }
389 gebp_kernel(res.getSubMapper(i2, IsLower ? 0 : k2),
390 blockA, geb, actual_mc, actual_kc, rs,
391 alpha,
392 -1, -1, 0, 0);
393 }
394 }
395 }
396
397/***************************************************************************
398* Wrapper to product_triangular_matrix_matrix
399***************************************************************************/
400
401} // end namespace internal
402
403namespace internal {
404template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
406{
407 template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
408 {
409 typedef typename Lhs::Scalar LhsScalar;
410 typedef typename Rhs::Scalar RhsScalar;
411 typedef typename Dest::Scalar Scalar;
412
413 typedef internal::blas_traits<Lhs> LhsBlasTraits;
414 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
415 typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
416 typedef internal::blas_traits<Rhs> RhsBlasTraits;
417 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
418 typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
419
420 typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
421 typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
422
423 LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
424 RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
426
428 Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
429
430 enum { IsLower = (Mode&Lower) == Lower };
431 Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols());
432 Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows());
433 Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows()))
434 : ((IsLower) ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols()));
435
437
439 Mode, LhsIsTriangular,
442 (internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor, Dest::InnerStrideAtCompileTime>
443 ::run(
445 &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
446 &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
447 &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info
449 );
450
451 // Apply correction if the diagonal is unit and a scalar factor was nested:
452 if ((Mode&UnitDiag)==UnitDiag)
453 {
454 if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
455 {
456 Index diagSize = (std::min)(lhs.rows(),lhs.cols());
457 dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
458 }
459 else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
460 {
461 Index diagSize = (std::min)(rhs.rows(),rhs.cols());
462 dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
463 }
464 }
465 }
466};
467
468} // end namespace internal
469
470} // end namespace Eigen
471
472#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H
ArrayXXi a
Definition Array_initializer_list_23_cxx11.cpp:1
int i
Definition BiCGSTAB_step_by_step.cpp:9
#define EIGEN_MAX_ALIGN_BYTES
Definition ConfigureVectorization.h:175
#define EIGEN_PLAIN_ENUM_MAX(a, b)
Definition Macros.h:1289
#define EIGEN_DONT_INLINE
Definition Macros.h:940
#define EIGEN_STRONG_INLINE
Definition Macros.h:917
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition Memory.h:768
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition PartialRedux_count.cpp:3
int rows
Definition Tutorial_commainit_02.cpp:1
int cols
Definition Tutorial_commainit_02.cpp:1
SCALAR Scalar
Definition bench_gemm.cpp:46
Definition GeneralMatrixMatrix.h:248
@ UnitDiag
Definition Constants.h:213
@ ZeroDiag
Definition Constants.h:215
@ Lower
Definition Constants.h:209
@ Upper
Definition Constants.h:211
@ ColMajor
Definition Constants.h:319
@ RowMajor
Definition Constants.h:321
const unsigned int RowMajorBit
Definition Constants.h:66
RealScalar alpha
Definition level1_cplx_impl.h:147
@ Lhs
Definition TensorContractionMapper.h:19
@ Rhs
Definition TensorContractionMapper.h:18
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
@ Specialized
Definition Constants.h:310
Definition BandTriangularSolver.h:13
Definition GeneralBlockPanelKernel.h:1058
Definition GenericPacketMath.h:107
static EIGEN_STRONG_INLINE void run(Index rows, Index cols, Index depth, const Scalar *lhs, Index lhsStride, const Scalar *rhs, Index rhsStride, Scalar *res, Index resIncr, Index resStride, const Scalar &alpha, level3_blocking< Scalar, Scalar > &blocking)
Definition TriangularMatrixMatrix.h:61
Definition TriangularMatrixMatrix.h:50
Definition ForwardDeclarations.h:17
static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar &alpha)
Definition TriangularMatrixMatrix.h:407
Definition ProductEvaluators.h:758
std::ptrdiff_t j
Definition tut_arithmetic_redux_minmax.cpp:2