11#ifndef EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H
12#define EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H
14#pragma GCC target("cpu=power10")
17#if !__has_builtin(__builtin_vsx_assemble_pair)
18#define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair
26template<
typename Scalar,
typename Packet>
32template<
typename DataMapper,
typename Index,
typename Packet, const Index accCols>
46template<
typename DataMapper,
typename Index,
typename Packet,
typename Packetc, const Index accColsC,
int N>
67template<
typename LhsPacket,
typename RhsPacket,
bool NegativeAccumulate>
78template<
typename LhsPacket,
typename RhsPacket,
bool NegativeAccumulate>
90template<
typename LhsPacket,
typename RhsPacket,
bool NegativeAccumulate>
101template<
typename LhsPacket,
typename RhsPacket,
bool NegativeAccumulate>
107template<
typename Scalar,
typename Packet,
typename RhsPacket,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
125template<
typename Scalar,
typename Packet>
146 __asm__ (
"lxvp %x0,%1" :
"=wa" (
rhsV) :
"Y" (*rhs));
159#define MICRO_MMA_UNROLL(func) \
160 func(0) func(1) func(2) func(3) func(4) func(5) func(6) func(7)
162#define MICRO_MMA_LOAD_ONE(iter) \
163 if (unroll_factor > iter) { \
164 lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr##iter); \
165 lhs_ptr##iter += accCols; \
167 EIGEN_UNUSED_VARIABLE(lhsV##iter); \
170#define MICRO_MMA_WORK_ONE(iter, type, peel) \
171 if (unroll_factor > iter) { \
172 pgerMMA<Packet, type, false>(&accZero##iter, rhsV##peel, lhsV##iter); \
175#define MICRO_MMA_TYPE_PEEL(func, func2, type, peel) \
176 if (PEEL_MMA > peel) { \
177 Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4, lhsV5, lhsV6, lhsV7; \
178 ploadRhsMMA<Scalar, type>(rhs_ptr + (accRows * peel), rhsV##peel); \
179 MICRO_MMA_UNROLL(func2); \
180 func(0,type,peel) func(1,type,peel) func(2,type,peel) func(3,type,peel) \
181 func(4,type,peel) func(5,type,peel) func(6,type,peel) func(7,type,peel) \
183 EIGEN_UNUSED_VARIABLE(rhsV##peel); \
186#define MICRO_MMA_UNROLL_TYPE_PEEL(func, func2, type) \
187 type rhsV0, rhsV1, rhsV2, rhsV3, rhsV4, rhsV5, rhsV6, rhsV7, rhsV8, rhsV9; \
188 MICRO_MMA_TYPE_PEEL(func,func2,type,0); MICRO_MMA_TYPE_PEEL(func,func2,type,1); \
189 MICRO_MMA_TYPE_PEEL(func,func2,type,2); MICRO_MMA_TYPE_PEEL(func,func2,type,3); \
190 MICRO_MMA_TYPE_PEEL(func,func2,type,4); MICRO_MMA_TYPE_PEEL(func,func2,type,5); \
191 MICRO_MMA_TYPE_PEEL(func,func2,type,6); MICRO_MMA_TYPE_PEEL(func,func2,type,7); \
192 MICRO_MMA_TYPE_PEEL(func,func2,type,8); MICRO_MMA_TYPE_PEEL(func,func2,type,9);
194#define MICRO_MMA_UNROLL_TYPE_ONE(func, func2, type) \
196 MICRO_MMA_TYPE_PEEL(func,func2,type,0);
198#define MICRO_MMA_ONE_PEEL \
199 if (sizeof(Scalar) == sizeof(float)) { \
200 MICRO_MMA_UNROLL_TYPE_PEEL(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, RhsPacket); \
202 MICRO_MMA_UNROLL_TYPE_PEEL(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, __vector_pair); \
204 rhs_ptr += (accRows * PEEL_MMA);
206#define MICRO_MMA_ONE \
207 if (sizeof(Scalar) == sizeof(float)) { \
208 MICRO_MMA_UNROLL_TYPE_ONE(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, RhsPacket); \
210 MICRO_MMA_UNROLL_TYPE_ONE(MICRO_MMA_WORK_ONE, MICRO_MMA_LOAD_ONE, __vector_pair); \
214#define MICRO_MMA_DST_PTR_ONE(iter) \
215 if (unroll_factor > iter) { \
216 bsetzeroMMA<Scalar, Packet>(&accZero##iter); \
218 EIGEN_UNUSED_VARIABLE(accZero##iter); \
221#define MICRO_MMA_DST_PTR MICRO_MMA_UNROLL(MICRO_MMA_DST_PTR_ONE)
223#define MICRO_MMA_SRC_PTR_ONE(iter) \
224 if (unroll_factor > iter) { \
225 lhs_ptr##iter = lhs_base + ( (row/accCols) + iter )*strideA*accCols + accCols*offsetA; \
227 EIGEN_UNUSED_VARIABLE(lhs_ptr##iter); \
230#define MICRO_MMA_SRC_PTR MICRO_MMA_UNROLL(MICRO_MMA_SRC_PTR_ONE)
232#define MICRO_MMA_PREFETCH_ONE(iter) \
233 if (unroll_factor > iter) { \
234 EIGEN_POWER_PREFETCH(lhs_ptr##iter); \
237#define MICRO_MMA_PREFETCH MICRO_MMA_UNROLL(MICRO_MMA_PREFETCH_ONE)
239#define MICRO_MMA_STORE_ONE(iter) \
240 if (unroll_factor > iter) { \
241 storeAccumulator<DataMapper, Index, Packet, accCols>(row + iter*accCols, col, res, pAlpha, &accZero##iter); \
244#define MICRO_MMA_STORE MICRO_MMA_UNROLL(MICRO_MMA_STORE_ONE)
246template<
int unroll_factor,
typename Scalar,
typename Packet,
typename RhsPacket,
typename DataMapper,
typename Index, const Index accRows, const Index accCols>
272 for(; k <
depth; k++)
281template<
typename Scalar,
typename Index,
typename Packet,
typename RhsPacket,
typename DataMapper, const Index accRows, const Index accCols>
282void gemmMMA(
const DataMapper&
res,
const Scalar* blockA,
const Scalar* blockB,
Index rows,
Index depth,
Index cols,
Scalar alpha,
Index strideA,
Index strideB,
Index offsetA,
Index offsetB)
300#define MAX_MMA_UNROLL 7
302 gemm_unrolled_MMA_iteration<MAX_MMA_UNROLL, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
305#if MAX_MMA_UNROLL > 7
307 gemm_unrolled_MMA_iteration<7, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
310#if MAX_MMA_UNROLL > 6
312 gemm_unrolled_MMA_iteration<6, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
315#if MAX_MMA_UNROLL > 5
317 gemm_unrolled_MMA_iteration<5, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
320#if MAX_MMA_UNROLL > 4
322 gemm_unrolled_MMA_iteration<4, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
325#if MAX_MMA_UNROLL > 3
327 gemm_unrolled_MMA_iteration<3, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
330#if MAX_MMA_UNROLL > 2
332 gemm_unrolled_MMA_iteration<2, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
335#if MAX_MMA_UNROLL > 1
337 gemm_unrolled_MMA_iteration<1, Scalar, Packet, RhsPacket, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
347 gemm_extra_row<Scalar, Packet, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
rows,
cols,
remaining_rows,
pAlpha,
pMask);
360 gemm_unrolled_col<Scalar, Packet, DataMapper, Index, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
rows,
col,
remaining_cols,
pAlpha);
364 gemm_extra_col<Scalar, Packet, DataMapper, Index, accRows>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
remaining_rows,
remaining_cols,
pAlpha);
371#define accColsC (accCols / 2)
372#define advanceRows ((LhsIsReal) ? 1 : 2)
373#define advanceCols ((RhsIsReal) ? 1 : 2)
376#define PEEL_COMPLEX_MMA 7
378#define MICRO_COMPLEX_MMA_UNROLL(func) \
379 func(0) func(1) func(2) func(3) func(4)
381#define MICRO_COMPLEX_MMA_LOAD_ONE(iter) \
382 if (unroll_factor > iter) { \
383 lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr_real##iter); \
384 lhs_ptr_real##iter += accCols; \
386 lhsVi##iter = ploadLhs<Scalar, Packet>(lhs_ptr_imag##iter); \
387 lhs_ptr_imag##iter += accCols; \
389 EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
392 EIGEN_UNUSED_VARIABLE(lhsV##iter); \
393 EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
396#define MICRO_COMPLEX_MMA_WORK_ONE(iter, type, peel) \
397 if (unroll_factor > iter) { \
398 pgercMMA<Scalar, Packet, type, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \
401#define MICRO_COMPLEX_MMA_TYPE_PEEL(func, func2, type, peel) \
402 if (PEEL_COMPLEX_MMA > peel) { \
403 Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4; \
404 Packet lhsVi0, lhsVi1, lhsVi2, lhsVi3, lhsVi4; \
405 ploadRhsMMA<Scalar, type>(rhs_ptr_real + (accRows * peel), rhsV##peel); \
407 ploadRhsMMA<Scalar, type>(rhs_ptr_imag + (accRows * peel), rhsVi##peel); \
409 EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
411 MICRO_COMPLEX_MMA_UNROLL(func2); \
412 func(0,type,peel) func(1,type,peel) func(2,type,peel) func(3,type,peel) func(4,type,peel) \
414 EIGEN_UNUSED_VARIABLE(rhsV##peel); \
415 EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
418#define MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(func, func2, type) \
419 type rhsV0, rhsV1, rhsV2, rhsV3, rhsV4, rhsV5, rhsV6, rhsV7, rhsV8, rhsV9; \
420 type rhsVi0, rhsVi1, rhsVi2, rhsVi3, rhsVi4, rhsVi5, rhsVi6, rhsVi7, rhsVi8, rhsVi9; \
421 MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,0); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,1); \
422 MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,2); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,3); \
423 MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,4); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,5); \
424 MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,6); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,7); \
425 MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,8); MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,9);
427#define MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(func, func2, type) \
428 type rhsV0, rhsVi0; \
429 MICRO_COMPLEX_MMA_TYPE_PEEL(func,func2,type,0);
431#define MICRO_COMPLEX_MMA_ONE_PEEL \
432 if (sizeof(Scalar) == sizeof(float)) { \
433 MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, RhsPacket); \
435 MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, __vector_pair); \
437 rhs_ptr_real += (accRows * PEEL_COMPLEX_MMA); \
438 if(!RhsIsReal) rhs_ptr_imag += (accRows * PEEL_COMPLEX_MMA);
440#define MICRO_COMPLEX_MMA_ONE \
441 if (sizeof(Scalar) == sizeof(float)) { \
442 MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, RhsPacket); \
444 MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_MMA_LOAD_ONE, __vector_pair); \
446 rhs_ptr_real += accRows; \
447 if(!RhsIsReal) rhs_ptr_imag += accRows;
449#define MICRO_COMPLEX_MMA_DST_PTR_ONE(iter) \
450 if (unroll_factor > iter) { \
451 bsetzeroMMA<Scalar, Packet>(&accReal##iter); \
452 bsetzeroMMA<Scalar, Packet>(&accImag##iter); \
454 EIGEN_UNUSED_VARIABLE(accReal##iter); \
455 EIGEN_UNUSED_VARIABLE(accImag##iter); \
458#define MICRO_COMPLEX_MMA_DST_PTR MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_DST_PTR_ONE)
460#define MICRO_COMPLEX_MMA_SRC_PTR_ONE(iter) \
461 if (unroll_factor > iter) { \
462 lhs_ptr_real##iter = lhs_base + ( ((advanceRows*row)/accCols) + iter*advanceRows )*strideA*accCols + accCols*offsetA; \
464 lhs_ptr_imag##iter = lhs_ptr_real##iter + accCols*strideA; \
466 EIGEN_UNUSED_VARIABLE(lhs_ptr_imag##iter); \
469 EIGEN_UNUSED_VARIABLE(lhs_ptr_real##iter); \
470 EIGEN_UNUSED_VARIABLE(lhs_ptr_imag##iter); \
473#define MICRO_COMPLEX_MMA_SRC_PTR MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_SRC_PTR_ONE)
475#define MICRO_COMPLEX_MMA_PREFETCH_ONE(iter) \
476 if (unroll_factor > iter) { \
477 EIGEN_POWER_PREFETCH(lhs_ptr_real##iter); \
479 EIGEN_POWER_PREFETCH(lhs_ptr_imag##iter); \
483#define MICRO_COMPLEX_MMA_PREFETCH MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_PREFETCH_ONE)
485#define MICRO_COMPLEX_MMA_STORE_ONE(iter) \
486 if (unroll_factor > iter) { \
487 storeComplexAccumulator<DataMapper, Index, Packet, Packetc, accColsC, 0>(row + iter*accCols, col, res, pAlphaReal, pAlphaImag, &accReal##iter, &accImag##iter); \
490#define MICRO_COMPLEX_MMA_STORE MICRO_COMPLEX_MMA_UNROLL(MICRO_COMPLEX_MMA_STORE_ONE)
492template<
int unroll_factor,
typename Scalar,
typename Packet,
typename Packetc,
typename RhsPacket,
typename DataMapper,
typename Index, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
531 for(; k <
depth; k++)
540template<
typename LhsScalar,
typename RhsScalar,
typename Scalarc,
typename Scalar,
typename Index,
typename Packet,
typename Packetc,
typename RhsPacket,
typename DataMapper, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
541void gemm_complexMMA(
const DataMapper&
res,
const LhsScalar*
blockAc,
const RhsScalar*
blockBc,
Index rows,
Index depth,
Index cols,
Scalarc alpha,
Index strideA,
Index strideB,
Index offsetA,
Index offsetB)
563#define MAX_COMPLEX_MMA_UNROLL 4
565 gemm_complex_unrolled_MMA_iteration<MAX_COMPLEX_MMA_UNROLL, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
pAlphaReal,
pAlphaImag);
568#if MAX_COMPLEX_MMA_UNROLL > 4
570 gemm_complex_unrolled_MMA_iteration<4, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
pAlphaReal,
pAlphaImag);
573#if MAX_COMPLEX_MMA_UNROLL > 3
575 gemm_complex_unrolled_MMA_iteration<3, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
pAlphaReal,
pAlphaImag);
578#if MAX_COMPLEX_MMA_UNROLL > 2
580 gemm_complex_unrolled_MMA_iteration<2, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
pAlphaReal,
pAlphaImag);
583#if MAX_COMPLEX_MMA_UNROLL > 1
585 gemm_complex_unrolled_MMA_iteration<1, Scalar, Packet, Packetc, RhsPacket, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
pAlphaReal,
pAlphaImag);
591#undef MAX_COMPLEX_MMA_UNROLL
595 gemm_complex_extra_row<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
rows,
cols,
remaining_rows,
pAlphaReal,
pAlphaImag,
pMask);
608 gemm_complex_unrolled_col<Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
rows,
col,
remaining_cols,
pAlphaReal,
pAlphaImag);
612 gemm_complex_extra_col<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
remaining_rows,
remaining_cols,
pAlphaReal,
pAlphaImag);
623#pragma GCC reset_options
ArrayXXi a
Definition Array_initializer_list_23_cxx11.cpp:1
int i
Definition BiCGSTAB_step_by_step.cpp:9
#define EIGEN_ALWAYS_INLINE
Definition Macros.h:932
#define EIGEN_UNUSED_VARIABLE(var)
Definition Macros.h:1076
#define EIGEN_STRONG_INLINE
Definition Macros.h:917
int data[]
Definition Map_placement_new.cpp:1
#define EIGEN_POWER_PREFETCH(p)
Definition MatrixProductCommon.h:5
#define MICRO_MMA_DST_PTR
Definition MatrixProductMMA.h:221
#define MICRO_COMPLEX_MMA_ONE
Definition MatrixProductMMA.h:440
#define MICRO_MMA_STORE
Definition MatrixProductMMA.h:244
#define MICRO_COMPLEX_MMA_DST_PTR
Definition MatrixProductMMA.h:458
#define MICRO_COMPLEX_MMA_SRC_PTR
Definition MatrixProductMMA.h:473
#define MICRO_MMA_PREFETCH
Definition MatrixProductMMA.h:237
#define MICRO_COMPLEX_MMA_STORE
Definition MatrixProductMMA.h:490
#define MICRO_MMA_ONE
Definition MatrixProductMMA.h:206
#define MAX_COMPLEX_MMA_UNROLL
#define MICRO_MMA_SRC_PTR
Definition MatrixProductMMA.h:230
#define MICRO_COMPLEX_MMA_ONE_PEEL
Definition MatrixProductMMA.h:431
#define MICRO_MMA_ONE_PEEL
Definition MatrixProductMMA.h:198
#define PEEL_MMA
Definition MatrixProductMMA.h:157
#define PEEL_COMPLEX_MMA
Definition MatrixProductMMA.h:376
#define MICRO_COMPLEX_MMA_PREFETCH
Definition MatrixProductMMA.h:483
#define advanceCols
Definition MatrixProduct.h:1788
#define accColsC
Definition MatrixProduct.h:1786
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition PartialRedux_count.cpp:3
int rows
Definition Tutorial_commainit_02.cpp:1
int cols
Definition Tutorial_commainit_02.cpp:1
Scalar * b
Definition benchVecAdd.cpp:17
SCALAR Scalar
Definition bench_gemm.cpp:46
@ N
Definition constructor.cpp:23
RealScalar alpha
Definition level1_cplx_impl.h:147
v2f64 Packet2d
Definition PacketMath.h:820
void gemm_complexMMA(const DataMapper &res, const LhsScalar *blockAc, const RhsScalar *blockBc, Index rows, Index depth, Index cols, Scalarc alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
Definition MatrixProductMMA.h:541
EIGEN_ALWAYS_INLINE void storeAccumulator(Index i, Index j, const DataMapper &data, const Packet &alpha, __vector_quad *acc)
Definition MatrixProductMMA.h:33
void gemmMMA(const DataMapper &res, const Scalar *blockA, const Scalar *blockB, Index rows, Index depth, Index cols, Scalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
Definition MatrixProductMMA.h:282
EIGEN_STRONG_INLINE void gemm_complex_unrolled_MMA_iteration(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index strideB, Index &row, Index col, const Packet &pAlphaReal, const Packet &pAlphaImag)
Definition MatrixProductMMA.h:493
EIGEN_ALWAYS_INLINE void storeComplexAccumulator(Index i, Index j, const DataMapper &data, const Packet &alphaReal, const Packet &alphaImag, __vector_quad *accReal, __vector_quad *accImag)
Definition MatrixProductMMA.h:47
EIGEN_ALWAYS_INLINE void pgercMMA(__vector_quad *accReal, __vector_quad *accImag, const Packet &lhsV, const Packet &lhsVi, const RhsPacket &rhsV, const RhsPacket &rhsVi)
Definition MatrixProductMMA.h:108
EIGEN_STRONG_INLINE void gemm_unrolled_MMA_iteration(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index &row, Index col, const Packet &pAlpha)
Definition MatrixProductMMA.h:247
EIGEN_ALWAYS_INLINE void ploadRhsMMA< double, __vector_pair >(const double *rhs, __vector_pair &rhsV)
Definition MatrixProductMMA.h:139
__vector float Packet4f
Definition PacketMath.h:30
EIGEN_ALWAYS_INLINE void bsetzeroMMA(__vector_quad *acc)
Definition MatrixProductMMA.h:27
EIGEN_ALWAYS_INLINE void ploadRhsMMA(const Scalar *rhs, Packet &rhsV)
Definition MatrixProductMMA.h:126
EIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad *acc, const RhsPacket &a, const LhsPacket &b)
Definition MatrixProductMMA.h:68
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
Definition BandTriangularSolver.h:13
Definition ForwardDeclarations.h:17
std::ptrdiff_t j
Definition tut_arithmetic_redux_minmax.cpp:2
Definition PacketMath.h:47