11#ifndef EIGEN_MATRIX_PRODUCT_ALTIVEC_H
12#define EIGEN_MATRIX_PRODUCT_ALTIVEC_H
14#ifndef EIGEN_ALTIVEC_USE_CUSTOM_PACK
15#define EIGEN_ALTIVEC_USE_CUSTOM_PACK 1
22#if !defined(EIGEN_ALTIVEC_DISABLE_MMA) && !defined(EIGEN_ALTIVEC_MMA_ONLY)
24#define EIGEN_ALTIVEC_MMA_ONLY
26#define EIGEN_ALTIVEC_DISABLE_MMA
32#if __has_builtin(__builtin_mma_assemble_acc)
33 #define ALTIVEC_MMA_SUPPORT
37#if defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
53template<
typename Scalar>
85const static Packet16uc p16uc_GETREAL32 = { 0, 1, 2, 3,
90const static Packet16uc p16uc_GETIMAG32 = { 4, 5, 6, 7,
94const static Packet16uc p16uc_GETREAL64 = { 0, 1, 2, 3, 4, 5, 6, 7,
95 16, 17, 18, 19, 20, 21, 22, 23};
98const static Packet16uc p16uc_GETIMAG64 = { 8, 9, 10, 11, 12, 13, 14, 15,
99 24, 25, 26, 27, 28, 29, 30, 31};
119template<
typename Scalar,
typename Index,
int StorageOrder>
122 std::complex<Scalar>
v;
138template<
typename Scalar,
typename Index,
int StorageOrder,
int N>
188template<
typename Scalar,
typename Index,
int StorageOrder>
239template<
typename Scalar,
typename Index,
int StorageOrder,
int N>
255 blockB[
ri + k] = rhs(
j+k,
i);
257 blockB[
ri + k] = rhs(
i,
j+k);
271 blockB[
ri] = rhs(
i, k);
273 blockB[
ri] = rhs(k,
i);
280template<
typename Scalar,
typename Index,
int StorageOrder>
297 blockA[
ri + k] = lhs(
j+k,
i);
299 blockA[
ri + k] = lhs(
i,
j+k);
313 blockA[
ri] = lhs(k,
i);
315 blockA[
ri] = lhs(
i, k);
322template<
typename Index,
int nr,
int StorageOrder>
331template<
typename Index,
int Pack1,
int Pack2_dummy,
int StorageOrder>
342template<
typename Index,
int nr,
int StorageOrder>
351template<
typename Index,
int Pack1,
int Pack2_dummy,
int StorageOrder>
361template<
typename Index,
int nr,
int StorageOrder>
370template<
typename Index,
int Pack1,
int Pack2_dummy,
int StorageOrder>
380template<
typename Index,
int nr,
int StorageOrder>
389template<
typename Index,
int Pack1,
int Pack2_dummy,
int StorageOrder>
409template<
typename Scalar,
typename Packet,
typename Index>
419template<
typename Scalar,
typename Packet,
typename Index>
428template<
typename Scalar,
typename Index,
typename DataMapper,
typename Packet,
typename PacketC,
int StorageOrder,
bool Conjugate,
bool PanelMode,
bool UseLhs>
571template<
typename Scalar,
typename Index,
typename DataMapper,
typename Packet,
int StorageOrder,
bool PanelMode,
bool UseLhs>
607 blockA[
ri+0] = lhs(
j+0,
i);
608 blockA[
ri+1] = lhs(
j+1,
i);
609 blockA[
ri+2] = lhs(
j+2,
i);
610 blockA[
ri+3] = lhs(
j+3,
i);
612 blockA[
ri+0] = lhs(
i,
j+0);
613 blockA[
ri+1] = lhs(
i,
j+1);
614 blockA[
ri+2] = lhs(
i,
j+2);
615 blockA[
ri+3] = lhs(
i,
j+3);
643 blockA[
ri] = lhs(k,
i);
645 blockA[
ri] = lhs(
i, k);
655template<
typename Index,
typename DataMapper,
int StorageOrder,
bool PanelMode>
691 blockA[
ri+0] = lhs(
j+0,
i);
692 blockA[
ri+1] = lhs(
j+1,
i);
713 blockA[
ri] = lhs(k,
i);
722template<
typename Index,
typename DataMapper,
int StorageOrder,
bool PanelMode>
769 blockB[
ri+0] = rhs(
i,
j+0);
770 blockB[
ri+1] = rhs(
i,
j+1);
774 blockB[
ri+0] = rhs(
i,
j+2);
775 blockB[
ri+1] = rhs(
i,
j+3);
800 blockB[
ri] = rhs(
i, k);
809template<
typename Index,
typename DataMapper,
typename Packet,
typename PacketC,
int StorageOrder,
bool Conjugate,
bool PanelMode>
817 double*
blockAt =
reinterpret_cast<double *
>(blockA);
922template<
typename Index,
typename DataMapper,
typename Packet,
typename PacketC,
int StorageOrder,
bool Conjugate,
bool PanelMode>
930 double*
blockBt =
reinterpret_cast<double *
>(blockB);
998template<
typename Packet,
bool NegativeAccumulate>
1015template<
typename Packet,
bool NegativeAccumulate>
1026template<
int N,
typename Scalar,
typename Packet,
bool NegativeAccumulate>
1034template<
typename Scalar,
typename Packet,
typename Index>
1047template<
int N,
typename Scalar,
typename Packet,
typename Index,
bool NegativeAccumulate>
1057template<
int N,
typename Packet,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
1076template<
int N,
typename Scalar,
typename Packet,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
1084 pgerc_common<N, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
accReal,
accImag,
lhsV,
lhsVi,
rhsV,
rhsVi);
1087template<
typename Scalar,
typename Packet,
typename Index,
bool LhsIsReal>
1104template<
int N,
typename Scalar,
typename Packet,
typename Index,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
1110 pgerc_common<N, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
accReal,
accImag,
lhsV,
lhsVi,
rhsV,
rhsVi);
1113template<
typename Scalar,
typename Packet>
1120template<
typename Scalar,
typename Packet>
1129template<
typename Scalar,
typename Packet>
1136template<
typename Packet>
1145template<
typename Packet>
1151template<
typename Packet>
1160template<
typename Packet>
1167template<
typename Packet,
int N>
1179template<
typename Packet>
1182 acc.packet[0] =
pand(acc.packet[0],
pMask);
1183 acc.packet[1] =
pand(acc.packet[1],
pMask);
1184 acc.packet[2] =
pand(acc.packet[2],
pMask);
1185 acc.packet[3] =
pand(acc.packet[3],
pMask);
1188template<
typename Packet>
1189EIGEN_ALWAYS_INLINE void bscalec(
PacketBlock<Packet,4>&
aReal,
PacketBlock<Packet,4>&
aImag,
const Packet&
bReal,
const Packet&
bImag,
PacketBlock<Packet,4>&
cReal,
PacketBlock<Packet,4>&
cImag,
const Packet&
pMask)
1198template<
typename DataMapper,
typename Packet,
typename Index, const Index accCols,
int N,
int StorageOrder>
1215template<
typename DataMapper,
typename Packet,
typename Index, const Index accCols,
int N,
int StorageOrder>
1239template<
typename DataMapper,
typename Packet,
typename Index, const Index accCols,
int N,
int StorageOrder>
1246const static Packet4i mask41 = { -1, 0, 0, 0 };
1247const static Packet4i mask42 = { -1, -1, 0, 0 };
1248const static Packet4i mask43 = { -1, -1, -1, 0 };
1250const static Packet2l mask21 = { -1, 0 };
1252template<
typename Packet>
1259 case 1:
return Packet(mask41);
1260 case 2:
return Packet(mask42);
1261 default:
return Packet(mask43);
1276template<
typename Packet>
1284template<
typename Packet>
1304template<
typename Scalar,
typename Packet,
typename Index>
1319template<
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accRows>
1345 for (
int l = 0; l <
PEEL; l++) {
1353 for(; k <
depth; k++)
1368template<
typename Scalar,
typename Packet,
typename Index, const Index accRows>
1382template<
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accRows, const Index accCols>
1410 for (
int l = 0; l <
PEEL; l++) {
1427 for(; k <
depth; k++)
1447#define MICRO_UNROLL(func) \
1448 func(0) func(1) func(2) func(3) func(4) func(5) func(6) func(7)
1450#define MICRO_UNROLL_WORK(func, func2, peel) \
1451 MICRO_UNROLL(func2); \
1452 func(0,peel) func(1,peel) func(2,peel) func(3,peel) \
1453 func(4,peel) func(5,peel) func(6,peel) func(7,peel)
1455#define MICRO_LOAD_ONE(iter) \
1456 if (unroll_factor > iter) { \
1457 lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr##iter); \
1458 lhs_ptr##iter += accCols; \
1460 EIGEN_UNUSED_VARIABLE(lhsV##iter); \
1463#define MICRO_WORK_ONE(iter, peel) \
1464 if (unroll_factor > iter) { \
1465 pger_common<Packet, false>(&accZero##iter, lhsV##iter, rhsV##peel); \
1468#define MICRO_TYPE_PEEL4(func, func2, peel) \
1469 if (PEEL > peel) { \
1470 Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4, lhsV5, lhsV6, lhsV7; \
1471 pbroadcast4<Packet>(rhs_ptr + (accRows * peel), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \
1472 MICRO_UNROLL_WORK(func, func2, peel) \
1474 EIGEN_UNUSED_VARIABLE(rhsV##peel); \
1477#define MICRO_TYPE_PEEL1(func, func2, peel) \
1478 if (PEEL > peel) { \
1479 Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4, lhsV5, lhsV6, lhsV7; \
1480 rhsV##peel[0] = pset1<Packet>(rhs_ptr[remaining_cols * peel]); \
1481 MICRO_UNROLL_WORK(func, func2, peel) \
1483 EIGEN_UNUSED_VARIABLE(rhsV##peel); \
1486#define MICRO_UNROLL_TYPE_PEEL(M, func, func1, func2) \
1487 Packet rhsV0[M], rhsV1[M], rhsV2[M], rhsV3[M], rhsV4[M], rhsV5[M], rhsV6[M], rhsV7[M], rhsV8[M], rhsV9[M]; \
1488 func(func1,func2,0); func(func1,func2,1); \
1489 func(func1,func2,2); func(func1,func2,3); \
1490 func(func1,func2,4); func(func1,func2,5); \
1491 func(func1,func2,6); func(func1,func2,7); \
1492 func(func1,func2,8); func(func1,func2,9);
1494#define MICRO_UNROLL_TYPE_ONE(M, func, func1, func2) \
1496 func(func1,func2,0);
1498#define MICRO_ONE_PEEL4 \
1499 MICRO_UNROLL_TYPE_PEEL(4, MICRO_TYPE_PEEL4, MICRO_WORK_ONE, MICRO_LOAD_ONE); \
1500 rhs_ptr += (accRows * PEEL);
1503 MICRO_UNROLL_TYPE_ONE(4, MICRO_TYPE_PEEL4, MICRO_WORK_ONE, MICRO_LOAD_ONE); \
1506#define MICRO_ONE_PEEL1 \
1507 MICRO_UNROLL_TYPE_PEEL(1, MICRO_TYPE_PEEL1, MICRO_WORK_ONE, MICRO_LOAD_ONE); \
1508 rhs_ptr += (remaining_cols * PEEL);
1511 MICRO_UNROLL_TYPE_ONE(1, MICRO_TYPE_PEEL1, MICRO_WORK_ONE, MICRO_LOAD_ONE); \
1512 rhs_ptr += remaining_cols;
1514#define MICRO_DST_PTR_ONE(iter) \
1515 if (unroll_factor > iter) { \
1516 bsetzero<Scalar, Packet>(accZero##iter); \
1518 EIGEN_UNUSED_VARIABLE(accZero##iter); \
1521#define MICRO_DST_PTR MICRO_UNROLL(MICRO_DST_PTR_ONE)
1523#define MICRO_SRC_PTR_ONE(iter) \
1524 if (unroll_factor > iter) { \
1525 lhs_ptr##iter = lhs_base + ( (row/accCols) + iter )*strideA*accCols + accCols*offsetA; \
1527 EIGEN_UNUSED_VARIABLE(lhs_ptr##iter); \
1530#define MICRO_SRC_PTR MICRO_UNROLL(MICRO_SRC_PTR_ONE)
1532#define MICRO_PREFETCH_ONE(iter) \
1533 if (unroll_factor > iter) { \
1534 EIGEN_POWER_PREFETCH(lhs_ptr##iter); \
1537#define MICRO_PREFETCH MICRO_UNROLL(MICRO_PREFETCH_ONE)
1539#define MICRO_STORE_ONE(iter) \
1540 if (unroll_factor > iter) { \
1541 acc.packet[0] = res.template loadPacket<Packet>(row + iter*accCols, col + 0); \
1542 acc.packet[1] = res.template loadPacket<Packet>(row + iter*accCols, col + 1); \
1543 acc.packet[2] = res.template loadPacket<Packet>(row + iter*accCols, col + 2); \
1544 acc.packet[3] = res.template loadPacket<Packet>(row + iter*accCols, col + 3); \
1545 bscale<Packet>(acc, accZero##iter, pAlpha); \
1546 res.template storePacketBlock<Packet,4>(row + iter*accCols, col, acc); \
1549#define MICRO_STORE MICRO_UNROLL(MICRO_STORE_ONE)
1551#define MICRO_COL_STORE_ONE(iter) \
1552 if (unroll_factor > iter) { \
1553 acc.packet[0] = res.template loadPacket<Packet>(row + iter*accCols, col + 0); \
1554 bscale<Packet>(acc, accZero##iter, pAlpha); \
1555 res.template storePacketBlock<Packet,1>(row + iter*accCols, col, acc); \
1558#define MICRO_COL_STORE MICRO_UNROLL(MICRO_COL_STORE_ONE)
1560template<
int unroll_factor,
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accRows, const Index accCols>
1587 for(; k <
depth; k++)
1596template<
int unroll_factor,
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accCols>
1624 for(; k <
depth; k++)
1633template<
typename Scalar,
typename Packet,
typename DataMapper,
typename Index, const Index accCols>
1649 gemm_unrolled_col_iteration<MAX_UNROLL, Scalar, Packet, DataMapper, Index, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
remaining_cols,
pAlpha);
1654 gemm_unrolled_col_iteration<7, Scalar, Packet, DataMapper, Index, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
remaining_cols,
pAlpha);
1659 gemm_unrolled_col_iteration<6, Scalar, Packet, DataMapper, Index, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
remaining_cols,
pAlpha);
1664 gemm_unrolled_col_iteration<5, Scalar, Packet, DataMapper, Index, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
remaining_cols,
pAlpha);
1669 gemm_unrolled_col_iteration<4, Scalar, Packet, DataMapper, Index, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
remaining_cols,
pAlpha);
1674 gemm_unrolled_col_iteration<3, Scalar, Packet, DataMapper, Index, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
remaining_cols,
pAlpha);
1679 gemm_unrolled_col_iteration<2, Scalar, Packet, DataMapper, Index, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
remaining_cols,
pAlpha);
1684 gemm_unrolled_col_iteration<1, Scalar, Packet, DataMapper, Index, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
remaining_cols,
pAlpha);
1696template<
typename Scalar,
typename Index,
typename Packet,
typename RhsPacket,
typename DataMapper, const Index accRows, const Index accCols>
1697EIGEN_STRONG_INLINE void gemm(
const DataMapper&
res,
const Scalar* blockA,
const Scalar* blockB,
Index rows,
Index depth,
Index cols,
Scalar alpha,
Index strideA,
Index strideB,
Index offsetA,
Index offsetB)
1717 gemm_unrolled_iteration<MAX_UNROLL, Scalar, Packet, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
1722 gemm_unrolled_iteration<7, Scalar, Packet, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
1727 gemm_unrolled_iteration<6, Scalar, Packet, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
1732 gemm_unrolled_iteration<5, Scalar, Packet, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
1737 gemm_unrolled_iteration<4, Scalar, Packet, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
1742 gemm_unrolled_iteration<3, Scalar, Packet, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
1747 gemm_unrolled_iteration<2, Scalar, Packet, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
1752 gemm_unrolled_iteration<1, Scalar, Packet, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
pAlpha);
1762 gemm_extra_row<Scalar, Packet, DataMapper, Index, accRows, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
rows,
cols,
remaining_rows,
pAlpha,
pMask);
1775 gemm_unrolled_col<Scalar, Packet, DataMapper, Index, accCols>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
rows,
col,
remaining_cols,
pAlpha);
1779 gemm_extra_col<Scalar, Packet, DataMapper, Index, accRows>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
row,
col,
remaining_rows,
remaining_cols,
pAlpha);
1786#define accColsC (accCols / 2)
1787#define advanceRows ((LhsIsReal) ? 1 : 2)
1788#define advanceCols ((RhsIsReal) ? 1 : 2)
1791#define PEEL_COMPLEX 3
1793template<
typename Scalar,
typename Packet,
typename Index, const Index accRows,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
1804 pgerc<1, Scalar, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&
accReal, &
accImag,
lhs_ptr_real,
lhs_ptr_imag,
rhsV,
rhsVi);
1813template<
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
1857 MICRO_COMPLEX_EXTRA_COL<Scalar, Packet, Index, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
lhs_ptr_real,
lhs_ptr_imag,
rhs_ptr_real,
rhs_ptr_imag,
accReal,
accImag,
remaining_rows,
remaining_cols);
1862 MICRO_COMPLEX_EXTRA_COL<Scalar, Packet, Index, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
lhs_ptr_real,
lhs_ptr_imag,
rhs_ptr_real,
rhs_ptr_imag,
accReal,
accImag,
remaining_rows,
remaining_cols);
1865 for(; k <
depth; k++)
1870 pgerc<1, Scalar, Packet, Index, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&
accReal, &
accImag,
lhs_ptr_real,
lhs_ptr_imag,
rhsV,
rhsVi,
remaining_rows);
1892template<
typename Scalar,
typename Packet,
typename Index, const Index accRows,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
1902 pgerc<4, Scalar, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&
accReal, &
accImag,
lhs_ptr_real,
lhs_ptr_imag,
rhsV,
rhsVi);
1911template<
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
1958 MICRO_COMPLEX_EXTRA_ROW<Scalar, Packet, Index, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
lhs_ptr_real,
lhs_ptr_imag,
rhs_ptr_real,
rhs_ptr_imag,
accReal,
accImag,
remaining_rows);
1963 MICRO_COMPLEX_EXTRA_ROW<Scalar, Packet, Index, accRows, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
lhs_ptr_real,
lhs_ptr_imag,
rhs_ptr_real,
rhs_ptr_imag,
accReal,
accImag,
remaining_rows);
1974 for(; k <
depth; k++)
1979 pgerc<4, Scalar, Packet, Index, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&
accReal, &
accImag,
lhs_ptr_real,
lhs_ptr_imag,
rhsV,
rhsVi,
remaining_rows);
2007#define MICRO_COMPLEX_UNROLL(func) \
2008 func(0) func(1) func(2) func(3) func(4)
2010#define MICRO_COMPLEX_UNROLL_WORK(func, func2, peel) \
2011 MICRO_COMPLEX_UNROLL(func2); \
2012 func(0,peel) func(1,peel) func(2,peel) func(3,peel) func(4,peel)
2014#define MICRO_COMPLEX_LOAD_ONE(iter) \
2015 if (unroll_factor > iter) { \
2016 lhsV##iter = ploadLhs<Scalar, Packet>(lhs_ptr_real##iter); \
2017 lhs_ptr_real##iter += accCols; \
2019 lhsVi##iter = ploadLhs<Scalar, Packet>(lhs_ptr_imag##iter); \
2020 lhs_ptr_imag##iter += accCols; \
2022 EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
2025 EIGEN_UNUSED_VARIABLE(lhsV##iter); \
2026 EIGEN_UNUSED_VARIABLE(lhsVi##iter); \
2029#define MICRO_COMPLEX_WORK_ONE4(iter, peel) \
2030 if (unroll_factor > iter) { \
2031 pgerc_common<4, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \
2034#define MICRO_COMPLEX_WORK_ONE1(iter, peel) \
2035 if (unroll_factor > iter) { \
2036 pgerc_common<1, Packet, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \
2039#define MICRO_COMPLEX_TYPE_PEEL4(func, func2, peel) \
2040 if (PEEL_COMPLEX > peel) { \
2041 Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4; \
2042 Packet lhsVi0, lhsVi1, lhsVi2, lhsVi3, lhsVi4; \
2043 pbroadcast4_old<Packet>(rhs_ptr_real + (accRows * peel), rhsV##peel[0], rhsV##peel[1], rhsV##peel[2], rhsV##peel[3]); \
2045 pbroadcast4_old<Packet>(rhs_ptr_imag + (accRows * peel), rhsVi##peel[0], rhsVi##peel[1], rhsVi##peel[2], rhsVi##peel[3]); \
2047 EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
2049 MICRO_COMPLEX_UNROLL_WORK(func, func2, peel) \
2051 EIGEN_UNUSED_VARIABLE(rhsV##peel); \
2052 EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
2055#define MICRO_COMPLEX_TYPE_PEEL1(func, func2, peel) \
2056 if (PEEL_COMPLEX > peel) { \
2057 Packet lhsV0, lhsV1, lhsV2, lhsV3, lhsV4; \
2058 Packet lhsVi0, lhsVi1, lhsVi2, lhsVi3, lhsVi4; \
2059 rhsV##peel[0] = pset1<Packet>(rhs_ptr_real[remaining_cols * peel]); \
2061 rhsVi##peel[0] = pset1<Packet>(rhs_ptr_imag[remaining_cols * peel]); \
2063 EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
2065 MICRO_COMPLEX_UNROLL_WORK(func, func2, peel) \
2067 EIGEN_UNUSED_VARIABLE(rhsV##peel); \
2068 EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
2071#define MICRO_COMPLEX_UNROLL_TYPE_PEEL(M, func, func1, func2) \
2072 Packet rhsV0[M], rhsV1[M], rhsV2[M], rhsV3[M], rhsV4[M], rhsV5[M], rhsV6[M], rhsV7[M], rhsV8[M], rhsV9[M]; \
2073 Packet rhsVi0[M], rhsVi1[M], rhsVi2[M], rhsVi3[M], rhsVi4[M], rhsVi5[M], rhsVi6[M], rhsVi7[M], rhsVi8[M], rhsVi9[M]; \
2074 func(func1,func2,0); func(func1,func2,1); \
2075 func(func1,func2,2); func(func1,func2,3); \
2076 func(func1,func2,4); func(func1,func2,5); \
2077 func(func1,func2,6); func(func1,func2,7); \
2078 func(func1,func2,8); func(func1,func2,9);
2080#define MICRO_COMPLEX_UNROLL_TYPE_ONE(M, func, func1, func2) \
2081 Packet rhsV0[M], rhsVi0[M];\
2082 func(func1,func2,0);
2084#define MICRO_COMPLEX_ONE_PEEL4 \
2085 MICRO_COMPLEX_UNROLL_TYPE_PEEL(4, MICRO_COMPLEX_TYPE_PEEL4, MICRO_COMPLEX_WORK_ONE4, MICRO_COMPLEX_LOAD_ONE); \
2086 rhs_ptr_real += (accRows * PEEL_COMPLEX); \
2087 if(!RhsIsReal) rhs_ptr_imag += (accRows * PEEL_COMPLEX);
2089#define MICRO_COMPLEX_ONE4 \
2090 MICRO_COMPLEX_UNROLL_TYPE_ONE(4, MICRO_COMPLEX_TYPE_PEEL4, MICRO_COMPLEX_WORK_ONE4, MICRO_COMPLEX_LOAD_ONE); \
2091 rhs_ptr_real += accRows; \
2092 if(!RhsIsReal) rhs_ptr_imag += accRows;
2094#define MICRO_COMPLEX_ONE_PEEL1 \
2095 MICRO_COMPLEX_UNROLL_TYPE_PEEL(1, MICRO_COMPLEX_TYPE_PEEL1, MICRO_COMPLEX_WORK_ONE1, MICRO_COMPLEX_LOAD_ONE); \
2096 rhs_ptr_real += (remaining_cols * PEEL_COMPLEX); \
2097 if(!RhsIsReal) rhs_ptr_imag += (remaining_cols * PEEL_COMPLEX);
2099#define MICRO_COMPLEX_ONE1 \
2100 MICRO_COMPLEX_UNROLL_TYPE_ONE(1, MICRO_COMPLEX_TYPE_PEEL1, MICRO_COMPLEX_WORK_ONE1, MICRO_COMPLEX_LOAD_ONE); \
2101 rhs_ptr_real += remaining_cols; \
2102 if(!RhsIsReal) rhs_ptr_imag += remaining_cols;
2104#define MICRO_COMPLEX_DST_PTR_ONE(iter) \
2105 if (unroll_factor > iter) { \
2106 bsetzero<Scalar, Packet>(accReal##iter); \
2107 bsetzero<Scalar, Packet>(accImag##iter); \
2109 EIGEN_UNUSED_VARIABLE(accReal##iter); \
2110 EIGEN_UNUSED_VARIABLE(accImag##iter); \
2113#define MICRO_COMPLEX_DST_PTR MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_DST_PTR_ONE)
2115#define MICRO_COMPLEX_SRC_PTR_ONE(iter) \
2116 if (unroll_factor > iter) { \
2117 lhs_ptr_real##iter = lhs_base + ( ((advanceRows*row)/accCols) + iter*advanceRows )*strideA*accCols + accCols*offsetA; \
2119 lhs_ptr_imag##iter = lhs_ptr_real##iter + accCols*strideA; \
2121 EIGEN_UNUSED_VARIABLE(lhs_ptr_imag##iter); \
2124 EIGEN_UNUSED_VARIABLE(lhs_ptr_real##iter); \
2125 EIGEN_UNUSED_VARIABLE(lhs_ptr_imag##iter); \
2128#define MICRO_COMPLEX_SRC_PTR MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_SRC_PTR_ONE)
2130#define MICRO_COMPLEX_PREFETCH_ONE(iter) \
2131 if (unroll_factor > iter) { \
2132 EIGEN_POWER_PREFETCH(lhs_ptr_real##iter); \
2134 EIGEN_POWER_PREFETCH(lhs_ptr_imag##iter); \
2138#define MICRO_COMPLEX_PREFETCH MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_PREFETCH_ONE)
2140#define MICRO_COMPLEX_STORE_ONE(iter) \
2141 if (unroll_factor > iter) { \
2142 bload<DataMapper, Packetc, Index, accColsC, 0, ColMajor>(tRes, res, row + iter*accCols, col); \
2143 bscalec<Packet,4>(accReal##iter, accImag##iter, pAlphaReal, pAlphaImag, taccReal, taccImag); \
2144 bcouple<Packet, Packetc>(taccReal, taccImag, tRes, acc0, acc1); \
2145 res.template storePacketBlock<Packetc,4>(row + iter*accCols + 0, col, acc0); \
2146 res.template storePacketBlock<Packetc,4>(row + iter*accCols + accColsC, col, acc1); \
2149#define MICRO_COMPLEX_STORE MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_STORE_ONE)
2151#define MICRO_COMPLEX_COL_STORE_ONE(iter) \
2152 if (unroll_factor > iter) { \
2153 bload<DataMapper, Packetc, Index, accColsC, 0, ColMajor>(tRes, res, row + iter*accCols, col); \
2154 bscalec<Packet,1>(accReal##iter, accImag##iter, pAlphaReal, pAlphaImag, taccReal, taccImag); \
2155 bcouple<Packet, Packetc>(taccReal, taccImag, tRes, acc0, acc1); \
2156 res.template storePacketBlock<Packetc,1>(row + iter*accCols + 0, col, acc0); \
2157 res.template storePacketBlock<Packetc,1>(row + iter*accCols + accColsC, col, acc1); \
2160#define MICRO_COMPLEX_COL_STORE MICRO_COMPLEX_UNROLL(MICRO_COMPLEX_COL_STORE_ONE)
2162template<
int unroll_factor,
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
2206 for(; k <
depth; k++)
2215template<
int unroll_factor,
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
2260 for(; k <
depth; k++)
2269template<
typename Scalar,
typename Packet,
typename Packetc,
typename DataMapper,
typename Index, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
2285#define MAX_COMPLEX_UNROLL 3
2287 gemm_complex_unrolled_col_iteration<MAX_COMPLEX_UNROLL, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
remaining_cols,
pAlphaReal,
pAlphaImag);
2290#if MAX_COMPLEX_UNROLL > 4
2292 gemm_complex_unrolled_col_iteration<4, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
remaining_cols,
pAlphaReal,
pAlphaImag);
2295#if MAX_COMPLEX_UNROLL > 3
2297 gemm_complex_unrolled_col_iteration<3, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
remaining_cols,
pAlphaReal,
pAlphaImag);
2300#if MAX_COMPLEX_UNROLL > 2
2302 gemm_complex_unrolled_col_iteration<2, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
remaining_cols,
pAlphaReal,
pAlphaImag);
2305#if MAX_COMPLEX_UNROLL > 1
2307 gemm_complex_unrolled_col_iteration<1, Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
remaining_cols,
pAlphaReal,
pAlphaImag);
2313#undef MAX_COMPLEX_UNROLL
2316template<
typename LhsScalar,
typename RhsScalar,
typename Scalarc,
typename Scalar,
typename Index,
typename Packet,
typename Packetc,
typename RhsPacket,
typename DataMapper, const Index accRows, const Index accCols,
bool ConjugateLhs,
bool ConjugateRhs,
bool LhsIsReal,
bool RhsIsReal>
2317EIGEN_STRONG_INLINE void gemm_complex(
const DataMapper&
res,
const LhsScalar*
blockAc,
const RhsScalar*
blockBc,
Index rows,
Index depth,
Index cols,
Scalarc alpha,
Index strideA,
Index strideB,
Index offsetA,
Index offsetB)
2339#define MAX_COMPLEX_UNROLL 3
2341 gemm_complex_unrolled_iteration<MAX_COMPLEX_UNROLL, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
pAlphaReal,
pAlphaImag);
2344#if MAX_COMPLEX_UNROLL > 4
2346 gemm_complex_unrolled_iteration<4, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
pAlphaReal,
pAlphaImag);
2349#if MAX_COMPLEX_UNROLL > 3
2351 gemm_complex_unrolled_iteration<3, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
pAlphaReal,
pAlphaImag);
2354#if MAX_COMPLEX_UNROLL > 2
2356 gemm_complex_unrolled_iteration<2, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
pAlphaReal,
pAlphaImag);
2359#if MAX_COMPLEX_UNROLL > 1
2361 gemm_complex_unrolled_iteration<1, Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
pAlphaReal,
pAlphaImag);
2367#undef MAX_COMPLEX_UNROLL
2371 gemm_complex_extra_row<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
rows,
cols,
remaining_rows,
pAlphaReal,
pAlphaImag,
pMask);
2384 gemm_complex_unrolled_col<Scalar, Packet, Packetc, DataMapper, Index, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
rows,
col,
remaining_cols,
pAlphaReal,
pAlphaImag);
2388 gemm_complex_extra_col<Scalar, Packet, Packetc, DataMapper, Index, accRows, accCols, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(
res,
lhs_base,
rhs_base,
depth,
strideA,
offsetA,
strideB,
row,
col,
remaining_rows,
remaining_cols,
pAlphaReal,
pAlphaImag);
2402template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2408template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2416template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2422template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2430#if EIGEN_ALTIVEC_USE_CUSTOM_PACK
2431template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2437template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2445template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2451template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2460template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2466template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2474template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2480template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2488template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2494template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2502template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2508template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2516#if EIGEN_ALTIVEC_USE_CUSTOM_PACK
2517template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2523template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2531template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2537template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2546template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2552template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2560template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2566template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2574template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2580template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2588template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2594template<
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2602template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2608template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2616template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2622template<
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2631template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2642template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2650 void (*
gemm_function)(
const DataMapper&,
const float*,
const float*,
Index,
Index,
Index,
float,
Index,
Index,
Index,
Index);
2652 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2655 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2665 gemm_function(
res, blockA, blockB,
rows,
depth,
cols,
alpha,
strideA,
strideB,
offsetA,
offsetB);
2668template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2680template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2682 ::operator()(
const DataMapper&
res,
const std::complex<float>* blockA,
const std::complex<float>* blockB,
2691 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2693 gemm_function = &
Eigen::internal::gemm_complexMMA<std::complex<float>, std::complex<float>, std::complex<float>,
float,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
false>;
2694 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2696 gemm_function = &
Eigen::internal::gemm_complexMMA<std::complex<float>, std::complex<float>, std::complex<float>,
float,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
false>;
2699 gemm_function = &
Eigen::internal::gemm_complex<std::complex<float>, std::complex<float>, std::complex<float>,
float,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
false>;
2702 gemm_function = &
Eigen::internal::gemm_complex<std::complex<float>, std::complex<float>, std::complex<float>,
float,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
false>;
2704 gemm_function(
res, blockA, blockB,
rows,
depth,
cols,
alpha,
strideA,
strideB,
offsetA,
offsetB);
2707template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2719template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2721 ::operator()(
const DataMapper&
res,
const float* blockA,
const std::complex<float>* blockB,
2729 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2731 gemm_function = &
Eigen::internal::gemm_complexMMA<float, std::complex<float>, std::complex<float>,
float,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
true,
false>;
2732 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2734 gemm_function = &
Eigen::internal::gemm_complexMMA<float, std::complex<float>, std::complex<float>,
float,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
true,
false>;
2737 gemm_function = &
Eigen::internal::gemm_complex<float, std::complex<float>, std::complex<float>,
float,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
true,
false>;
2740 gemm_function = &
Eigen::internal::gemm_complex<float, std::complex<float>, std::complex<float>,
float,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
true,
false>;
2742 gemm_function(
res, blockA, blockB,
rows,
depth,
cols,
alpha,
strideA,
strideB,
offsetA,
offsetB);
2745template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2757template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2759 ::operator()(
const DataMapper&
res,
const std::complex<float>* blockA,
const float* blockB,
2767 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2769 gemm_function = &
Eigen::internal::gemm_complexMMA<std::complex<float>,
float, std::complex<float>,
float,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
true>;
2770 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2772 gemm_function = &
Eigen::internal::gemm_complexMMA<std::complex<float>,
float, std::complex<float>,
float,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
true>;
2775 gemm_function = &
Eigen::internal::gemm_complex<std::complex<float>,
float, std::complex<float>,
float,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
true>;
2778 gemm_function = &
Eigen::internal::gemm_complex<std::complex<float>,
float, std::complex<float>,
float,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
true>;
2780 gemm_function(
res, blockA, blockB,
rows,
depth,
cols,
alpha,
strideA,
strideB,
offsetA,
offsetB);
2783template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2794template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2802 void (*
gemm_function)(
const DataMapper&,
const double*,
const double*,
Index,
Index,
Index,
double,
Index,
Index,
Index,
Index);
2804 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2807 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2817 gemm_function(
res, blockA, blockB,
rows,
depth,
cols,
alpha,
strideA,
strideB,
offsetA,
offsetB);
2820template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2827 void operator()(
const DataMapper&
res,
const std::complex<double>* blockA,
const std::complex<double>* blockB,
2832template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2834 ::operator()(
const DataMapper&
res,
const std::complex<double>* blockA,
const std::complex<double>* blockB,
2842 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2844 gemm_function = &
Eigen::internal::gemm_complexMMA<std::complex<double>, std::complex<double>, std::complex<double>,
double,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
false>;
2845 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2847 gemm_function = &
Eigen::internal::gemm_complexMMA<std::complex<double>, std::complex<double>, std::complex<double>,
double,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
false>;
2850 gemm_function = &
Eigen::internal::gemm_complex<std::complex<double>, std::complex<double>, std::complex<double>,
double,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
false>;
2853 gemm_function = &
Eigen::internal::gemm_complex<std::complex<double>, std::complex<double>, std::complex<double>,
double,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
false>;
2855 gemm_function(
res, blockA, blockB,
rows,
depth,
cols,
alpha,
strideA,
strideB,
offsetA,
offsetB);
2858template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2870template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2872 ::operator()(
const DataMapper&
res,
const std::complex<double>* blockA,
const double* blockB,
2880 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2882 gemm_function = &
Eigen::internal::gemm_complexMMA<std::complex<double>,
double, std::complex<double>,
double,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
true>;
2883 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2885 gemm_function = &
Eigen::internal::gemm_complexMMA<std::complex<double>,
double, std::complex<double>,
double,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
true>;
2888 gemm_function = &
Eigen::internal::gemm_complex<std::complex<double>,
double, std::complex<double>,
double,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
true>;
2891 gemm_function = &
Eigen::internal::gemm_complex<std::complex<double>,
double, std::complex<double>,
double,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
false,
true>;
2893 gemm_function(
res, blockA, blockB,
rows,
depth,
cols,
alpha,
strideA,
strideB,
offsetA,
offsetB);
2896template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2908template<
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
2910 ::operator()(
const DataMapper&
res,
const double* blockA,
const std::complex<double>* blockB,
2918 #ifdef EIGEN_ALTIVEC_MMA_ONLY
2920 gemm_function = &
Eigen::internal::gemm_complexMMA<double, std::complex<double>, std::complex<double>,
double,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
true,
false>;
2921 #elif defined(ALTIVEC_MMA_SUPPORT) && !defined(EIGEN_ALTIVEC_DISABLE_MMA)
2923 gemm_function = &
Eigen::internal::gemm_complexMMA<double, std::complex<double>, std::complex<double>,
double,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
true,
false>;
2926 gemm_function = &
Eigen::internal::gemm_complex<double, std::complex<double>, std::complex<double>,
double,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
true,
false>;
2929 gemm_function = &
Eigen::internal::gemm_complex<double, std::complex<double>, std::complex<double>,
double,
Index,
Packet,
Packetc,
RhsPacket,
DataMapper,
accRows,
accCols,
ConjugateLhs,
ConjugateRhs,
true,
false>;
2931 gemm_function(
res, blockA, blockB,
rows,
depth,
cols,
alpha,
strideA,
strideB,
offsetA,
offsetB);
#define __UNPACK_TYPE__(PACKETNAME)
Definition PacketMath.h:74
ArrayXXi a
Definition Array_initializer_list_23_cxx11.cpp:1
Array< int, Dynamic, 1 > v
Definition Array_initializer_list_vector_cxx11.cpp:1
int i
Definition BiCGSTAB_step_by_step.cpp:9
internal::enable_if< internal::valid_indexed_view_overload< RowIndices, ColIndices >::value &&internal::traits< typenameEIGEN_INDEXED_VIEW_METHOD_TYPE< RowIndices, ColIndices >::type >::ReturnAsIndexedView, typenameEIGEN_INDEXED_VIEW_METHOD_TYPE< RowIndices, ColIndices >::type >::type operator()(const RowIndices &rowIndices, const ColIndices &colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
Definition IndexedViewMethods.h:73
#define EIGEN_ALWAYS_INLINE
Definition Macros.h:932
#define EIGEN_UNUSED_VARIABLE(var)
Definition Macros.h:1076
#define EIGEN_STRONG_INLINE
Definition Macros.h:917
#define EIGEN_POWER_PREFETCH(p)
Definition MatrixProductCommon.h:5
#define MICRO_COMPLEX_DST_PTR
Definition MatrixProduct.h:2113
#define advanceCols
Definition MatrixProduct.h:1788
#define MICRO_ONE_PEEL4
Definition MatrixProduct.h:1498
#define MICRO_STORE
Definition MatrixProduct.h:1549
#define accColsC
Definition MatrixProduct.h:1786
#define MICRO_DST_PTR
Definition MatrixProduct.h:1521
#define MICRO_COMPLEX_COL_STORE
Definition MatrixProduct.h:2160
#define advanceRows
Definition MatrixProduct.h:1787
#define MICRO_ONE_PEEL1
Definition MatrixProduct.h:1506
#define MICRO_COMPLEX_ONE_PEEL4
Definition MatrixProduct.h:2084
#define MICRO_COMPLEX_PREFETCH
Definition MatrixProduct.h:2138
#define PEEL_COMPLEX
Definition MatrixProduct.h:1791
#define MICRO_ONE1
Definition MatrixProduct.h:1510
#define PEEL
Definition MatrixProduct.h:1302
#define MICRO_COMPLEX_ONE4
Definition MatrixProduct.h:2089
#define MICRO_PREFETCH
Definition MatrixProduct.h:1537
#define MICRO_ONE4
Definition MatrixProduct.h:1502
#define MICRO_COMPLEX_ONE_PEEL1
Definition MatrixProduct.h:2094
#define MICRO_COMPLEX_STORE
Definition MatrixProduct.h:2149
#define MICRO_COL_STORE
Definition MatrixProduct.h:1558
#define MICRO_SRC_PTR
Definition MatrixProduct.h:1530
#define MICRO_COMPLEX_SRC_PTR
Definition MatrixProduct.h:2128
#define MAX_COMPLEX_UNROLL
#define MICRO_COMPLEX_ONE1
Definition MatrixProduct.h:2099
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition PartialRedux_count.cpp:3
m m block(1, 0, 2, 2)<< 4
int rows
Definition Tutorial_commainit_02.cpp:1
int cols
Definition Tutorial_commainit_02.cpp:1
Scalar Scalar int size
Definition benchVecAdd.cpp:17
SCALAR Scalar
Definition bench_gemm.cpp:46
Definition ForwardDeclarations.h:87
Definition BlasUtil.h:389
@ N
Definition constructor.cpp:23
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
Definition gnuplot_common_settings.hh:64
@ ColMajor
Definition Constants.h:319
@ RowMajor
Definition Constants.h:321
RealScalar alpha
Definition level1_cplx_impl.h:147
v2f64 Packet2d
Definition PacketMath.h:820
EIGEN_ALWAYS_INLINE void bscalec_common(PacketBlock< Packet, 4 > &acc, PacketBlock< Packet, 4 > &accZ, const Packet &pAlpha)
Definition MatrixProduct.h:1152
EIGEN_STRONG_INLINE void symm_pack_rhs_helper(Scalar *blockB, const Scalar *_rhs, Index rhsStride, Index rows, Index cols, Index k2)
Definition MatrixProduct.h:240
v2i64 Packet2l
Definition PacketMath.h:821
EIGEN_ALWAYS_INLINE void bscalec(PacketBlock< Packet, N > &aReal, PacketBlock< Packet, N > &aImag, const Packet &bReal, const Packet &bImag, PacketBlock< Packet, N > &cReal, PacketBlock< Packet, N > &cImag)
Definition MatrixProduct.h:1168
EIGEN_STRONG_INLINE void gemm(const DataMapper &res, const Scalar *blockA, const Scalar *blockB, Index rows, Index depth, Index cols, Scalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
Definition MatrixProduct.h:1697
__vector int Packet4i
Definition PacketMath.h:31
EIGEN_STRONG_INLINE void symm_pack_complex_lhs_helper(std::complex< Scalar > *blockA, const std::complex< Scalar > *_lhs, Index lhsStride, Index cols, Index rows)
Definition MatrixProduct.h:189
EIGEN_STRONG_INLINE void gemm_complex_unrolled_iteration(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index strideB, Index &row, Index col, const Packet &pAlphaReal, const Packet &pAlphaImag)
Definition MatrixProduct.h:2163
EIGEN_ALWAYS_INLINE std::complex< Scalar > getAdjointVal(Index i, Index j, const_blas_data_mapper< std::complex< Scalar >, Index, StorageOrder > &dt)
Definition MatrixProduct.h:120
__vector unsigned char Packet16uc
Definition PacketMath.h:37
EIGEN_ALWAYS_INLINE void pgerc(PacketBlock< Packet, N > *accReal, PacketBlock< Packet, N > *accImag, const Scalar *lhs_ptr, const Scalar *lhs_ptr_imag, const Packet *rhsV, const Packet *rhsVi)
Definition MatrixProduct.h:1077
EIGEN_STRONG_INLINE void gemm_complex(const DataMapper &res, const LhsScalar *blockAc, const RhsScalar *blockBc, Index rows, Index depth, Index cols, Scalarc alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
Definition MatrixProduct.h:2317
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition Complex.h:224
EIGEN_ALWAYS_INLINE void pger_common(PacketBlock< Packet, 4 > *acc, const Packet &lhsV, const Packet *rhsV)
Definition MatrixProduct.h:999
EIGEN_ALWAYS_INLINE void pgerc_common(PacketBlock< Packet, N > *accReal, PacketBlock< Packet, N > *accImag, const Packet &lhsV, const Packet &lhsVi, const Packet *rhsV, const Packet *rhsVi)
Definition MatrixProduct.h:1058
EIGEN_STRONG_INLINE void symm_pack_lhs_helper(Scalar *blockA, const Scalar *_lhs, Index lhsStride, Index cols, Index rows)
Definition MatrixProduct.h:281
EIGEN_STRONG_INLINE void gemm_unrolled_col_iteration(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index &row, Index col, Index remaining_cols, const Packet &pAlpha)
Definition MatrixProduct.h:1597
EIGEN_STRONG_INLINE void gemm_complex_extra_col(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index strideB, Index row, Index col, Index remaining_rows, Index remaining_cols, const Packet &pAlphaReal, const Packet &pAlphaImag)
Definition MatrixProduct.h:1814
EIGEN_ALWAYS_INLINE void pbroadcast4_old(const __UNPACK_TYPE__(Packet) *a, Packet &a0, Packet &a1, Packet &a2, Packet &a3)
Definition MatrixProduct.h:1285
EIGEN_STRONG_INLINE void gemm_complex_extra_row(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index strideB, Index row, Index col, Index rows, Index cols, Index remaining_rows, const Packet &pAlphaReal, const Packet &pAlphaImag, const Packet &pMask)
Definition MatrixProduct.h:1912
EIGEN_ALWAYS_INLINE void MICRO_COMPLEX_EXTRA_ROW(const Scalar *&lhs_ptr_real, const Scalar *&lhs_ptr_imag, const Scalar *&rhs_ptr_real, const Scalar *&rhs_ptr_imag, PacketBlock< Packet, 4 > &accReal, PacketBlock< Packet, 4 > &accImag, Index remaining_rows)
Definition MatrixProduct.h:1893
EIGEN_ALWAYS_INLINE void loadPacketRemaining(const Scalar *lhs, Packet &lhsV, Index remaining_rows)
Definition MatrixProduct.h:1035
EIGEN_STRONG_INLINE void symm_pack_complex_rhs_helper(std::complex< Scalar > *blockB, const std::complex< Scalar > *_rhs, Index rhsStride, Index rows, Index cols, Index k2)
Definition MatrixProduct.h:139
EIGEN_ALWAYS_INLINE void MICRO_EXTRA_ROW(const Scalar *&lhs_ptr, const Scalar *&rhs_ptr, PacketBlock< Packet, 4 > &accZero, Index remaining_rows)
Definition MatrixProduct.h:1369
EIGEN_ALWAYS_INLINE void MICRO_EXTRA_COL(const Scalar *&lhs_ptr, const Scalar *&rhs_ptr, PacketBlock< Packet, 1 > &accZero, Index remaining_rows, Index remaining_cols)
Definition MatrixProduct.h:1305
EIGEN_STRONG_INLINE Packet2d pset1< Packet2d >(const double &from)
Definition PacketMath.h:872
EIGEN_STRONG_INLINE void gemm_extra_row(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index row, Index col, Index rows, Index cols, Index remaining_rows, const Packet &pAlpha, const Packet &pMask)
Definition MatrixProduct.h:1383
EIGEN_ALWAYS_INLINE void MICRO_COMPLEX_EXTRA_COL(const Scalar *&lhs_ptr_real, const Scalar *&lhs_ptr_imag, const Scalar *&rhs_ptr_real, const Scalar *&rhs_ptr_imag, PacketBlock< Packet, 1 > &accReal, PacketBlock< Packet, 1 > &accImag, Index remaining_rows, Index remaining_cols)
Definition MatrixProduct.h:1794
EIGEN_ALWAYS_INLINE void band(PacketBlock< Packet, 4 > &acc, const Packet &pMask)
Definition MatrixProduct.h:1180
EIGEN_STRONG_INLINE void gemm_unrolled_col(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index &row, Index rows, Index col, Index remaining_cols, const Packet &pAlpha)
Definition MatrixProduct.h:1634
EIGEN_STRONG_INLINE void pstore< double >(double *to, const Packet4d &from)
Definition PacketMath.h:623
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition PacketMath.h:827
EIGEN_STRONG_INLINE Packet2d pload< Packet2d >(const double *from)
Definition PacketMath.h:967
EIGEN_ALWAYS_INLINE void storeBlock(Scalar *to, PacketBlock< Packet, 4 > &block)
Definition MatrixProduct.h:410
EIGEN_ALWAYS_INLINE Packet2d bmask< Packet2d >(const int remaining_rows)
Definition MatrixProduct.h:1267
EIGEN_ALWAYS_INLINE Packet bmask(const int remaining_rows)
Definition MatrixProduct.h:1253
EIGEN_ALWAYS_INLINE void pbroadcast4_old< Packet2d >(const double *a, Packet2d &a0, Packet2d &a1, Packet2d &a2, Packet2d &a3)
Definition MatrixProduct.h:1291
EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex< float > *from0, const std::complex< float > *from1)
Definition Complex.h:130
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition PacketMath.h:1050
EIGEN_STRONG_INLINE void gemm_extra_col(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index row, Index col, Index remaining_rows, Index remaining_cols, const Packet &pAlpha)
Definition MatrixProduct.h:1320
EIGEN_STRONG_INLINE void gemm_complex_unrolled_col(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index strideB, Index &row, Index rows, Index col, Index remaining_cols, const Packet &pAlphaReal, const Packet &pAlphaImag)
Definition MatrixProduct.h:2270
EIGEN_ALWAYS_INLINE void bscale(PacketBlock< Packet, 4 > &acc, PacketBlock< Packet, 4 > &accZ, const Packet &pAlpha)
Definition MatrixProduct.h:1137
EIGEN_ALWAYS_INLINE Packet ploadLhs(const Scalar *lhs)
Definition MatrixProduct.h:1114
EIGEN_ALWAYS_INLINE void pger(PacketBlock< Packet, N > *acc, const Scalar *lhs, const Packet *rhsV)
Definition MatrixProduct.h:1027
EIGEN_STRONG_INLINE void gemm_unrolled_iteration(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index &row, Index col, const Packet &pAlpha)
Definition MatrixProduct.h:1561
EIGEN_ALWAYS_INLINE void bload(PacketBlock< Packet, 4 > &acc, const DataMapper &res, Index row, Index col)
Definition MatrixProduct.h:1199
EIGEN_ALWAYS_INLINE void bsetzero(PacketBlock< Packet, 4 > &acc)
Definition MatrixProduct.h:1121
EIGEN_STRONG_INLINE void gemm_complex_unrolled_col_iteration(const DataMapper &res, const Scalar *lhs_base, const Scalar *rhs_base, Index depth, Index strideA, Index offsetA, Index strideB, Index &row, Index col, Index remaining_cols, const Packet &pAlphaReal, const Packet &pAlphaImag)
Definition MatrixProduct.h:2216
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
Definition BandTriangularSolver.h:13
EIGEN_STRONG_INLINE void operator()(std::complex< double > *blockB, const DataMapper &rhs, Index depth, Index cols, Index stride, Index offset)
Definition MatrixProduct.h:925
EIGEN_STRONG_INLINE void operator()(std::complex< double > *blockA, const DataMapper &lhs, Index depth, Index rows, Index stride, Index offset)
Definition MatrixProduct.h:812
Definition MatrixProduct.h:429
EIGEN_STRONG_INLINE void operator()(std::complex< Scalar > *blockA, const DataMapper &lhs, Index depth, Index rows, Index stride, Index offset)
Definition MatrixProduct.h:430
EIGEN_STRONG_INLINE void operator()(double *blockA, const DataMapper &lhs, Index depth, Index rows, Index stride, Index offset)
Definition MatrixProduct.h:658
EIGEN_STRONG_INLINE void operator()(double *blockB, const DataMapper &rhs, Index depth, Index cols, Index stride, Index offset)
Definition MatrixProduct.h:725
Definition MatrixProduct.h:572
EIGEN_STRONG_INLINE void operator()(Scalar *blockA, const DataMapper &lhs, Index depth, Index rows, Index stride, Index offset)
Definition MatrixProduct.h:573
quad_traits< double >::vectortype Packet
Definition MatrixProduct.h:2786
quad_traits< double >::rhstype RhsPacket
Definition MatrixProduct.h:2787
Packet1cd Packetc
Definition MatrixProduct.h:2900
quad_traits< double >::rhstype RhsPacket
Definition MatrixProduct.h:2901
quad_traits< double >::vectortype Packet
Definition MatrixProduct.h:2899
quad_traits< float >::rhstype RhsPacket
Definition MatrixProduct.h:2635
quad_traits< float >::vectortype Packet
Definition MatrixProduct.h:2634
Packet2cf Packetc
Definition MatrixProduct.h:2711
Packet4f RhsPacket
Definition MatrixProduct.h:2712
Packet4f Packet
Definition MatrixProduct.h:2710
quad_traits< double >::vectortype Packet
Definition MatrixProduct.h:2861
Packet1cd Packetc
Definition MatrixProduct.h:2862
quad_traits< double >::rhstype RhsPacket
Definition MatrixProduct.h:2863
quad_traits< double >::vectortype Packet
Definition MatrixProduct.h:2823
quad_traits< double >::rhstype RhsPacket
Definition MatrixProduct.h:2825
Packet1cd Packetc
Definition MatrixProduct.h:2824
Packet2cf Packetc
Definition MatrixProduct.h:2749
Packet4f Packet
Definition MatrixProduct.h:2748
Packet4f RhsPacket
Definition MatrixProduct.h:2750
Packet2cf Packetc
Definition MatrixProduct.h:2672
Packet4f Packet
Definition MatrixProduct.h:2671
Packet4f RhsPacket
Definition MatrixProduct.h:2673
Definition GeneralBlockPanelKernel.h:1058
EIGEN_DONT_INLINE void operator()(const DataMapper &res, const LhsScalar *blockA, const RhsScalar *blockB, Index rows, Index depth, Index cols, ResScalar alpha, Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0)
Definition GeneralBlockPanelKernel.h:1405
Definition GenericPacketMath.h:107
PacketBlock< Packet2d, 2 > rhstype
Definition MatrixProduct.h:72
PacketBlock< vectortype, 4 > type
Definition MatrixProduct.h:71
Packet2d vectortype
Definition MatrixProduct.h:70
Definition MatrixProduct.h:55
@ size
Definition MatrixProduct.h:62
@ rows
Definition MatrixProduct.h:63
@ vectorsize
Definition MatrixProduct.h:61
packet_traits< Scalar >::type vectortype
Definition MatrixProduct.h:56
vectortype rhstype
Definition MatrixProduct.h:58
PacketBlock< vectortype, 4 > type
Definition MatrixProduct.h:57
void operator()(double *blockA, const double *_lhs, Index lhsStride, Index cols, Index rows)
Definition MatrixProduct.h:392
void operator()(float *blockA, const float *_lhs, Index lhsStride, Index cols, Index rows)
Definition MatrixProduct.h:373
void operator()(std::complex< double > *blockA, const std::complex< double > *_lhs, Index lhsStride, Index cols, Index rows)
Definition MatrixProduct.h:354
void operator()(std::complex< float > *blockA, const std::complex< float > *_lhs, Index lhsStride, Index cols, Index rows)
Definition MatrixProduct.h:334
Definition SelfadjointMatrixMatrix.h:20
void operator()(double *blockB, const double *_rhs, Index rhsStride, Index rows, Index cols, Index k2)
Definition MatrixProduct.h:383
void operator()(float *blockB, const float *_rhs, Index rhsStride, Index rows, Index cols, Index k2)
Definition MatrixProduct.h:364
void operator()(std::complex< double > *blockB, const std::complex< double > *_rhs, Index rhsStride, Index rows, Index cols, Index k2)
Definition MatrixProduct.h:345
void operator()(std::complex< float > *blockB, const std::complex< float > *_rhs, Index rhsStride, Index rows, Index cols, Index k2)
Definition MatrixProduct.h:325
Definition SelfadjointMatrixMatrix.h:102
Definition ForwardDeclarations.h:17
Definition datatypes.h:12
std::ptrdiff_t j
Definition tut_arithmetic_redux_minmax.cpp:2
Definition PacketMath.h:47