10#ifndef EIGEN_GENERAL_BLOCK_PANEL_H
11#define EIGEN_GENERAL_BLOCK_PANEL_H
24template<
typename _LhsScalar,
typename _RhsScalar,
bool _ConjLhs=false,
bool _ConjRhs=false,
int Arch=Architecture::Target,
int _PacketSize=GEBPPacketFull>
34#if defined(EIGEN_DEFAULT_L1_CACHE_SIZE)
35#define EIGEN_SET_DEFAULT_L1_CACHE_SIZE(val) EIGEN_DEFAULT_L1_CACHE_SIZE
37#define EIGEN_SET_DEFAULT_L1_CACHE_SIZE(val) val
40#if defined(EIGEN_DEFAULT_L2_CACHE_SIZE)
41#define EIGEN_SET_DEFAULT_L2_CACHE_SIZE(val) EIGEN_DEFAULT_L2_CACHE_SIZE
43#define EIGEN_SET_DEFAULT_L2_CACHE_SIZE(val) val
46#if defined(EIGEN_DEFAULT_L3_CACHE_SIZE)
47#define EIGEN_SET_DEFAULT_L3_CACHE_SIZE(val) EIGEN_DEFAULT_L3_CACHE_SIZE
49#define EIGEN_SET_DEFAULT_L3_CACHE_SIZE(val) val
52#if EIGEN_ARCH_i386_OR_x86_64
66#undef EIGEN_SET_DEFAULT_L1_CACHE_SIZE
67#undef EIGEN_SET_DEFAULT_L2_CACHE_SIZE
68#undef EIGEN_SET_DEFAULT_L3_CACHE_SIZE
123template<
typename LhsScalar,
typename RhsScalar,
int KcFactor,
typename Index>
133 std::ptrdiff_t
l1,
l2,
l3;
135 #ifdef EIGEN_VECTORIZE_AVX512
147 typedef typename Traits::ResScalar ResScalar;
149 kdiv =
KcFactor * (Traits::mr *
sizeof(LhsScalar) + Traits::nr *
sizeof(RhsScalar)),
150 ksub = Traits::mr * Traits::nr *
sizeof(ResScalar),
192#ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
205 typedef typename Traits::ResScalar ResScalar;
208 k_div =
KcFactor * (Traits::mr *
sizeof(LhsScalar) + Traits::nr *
sizeof(RhsScalar)),
209 k_sub = Traits::mr * Traits::nr *
sizeof(ResScalar)
238 #ifdef EIGEN_DEBUG_SMALL_PRODUCT_BLOCKS
272 : (nc - Traits::nr * ((nc-(
n%nc))/(Traits::nr*(
n/nc+1))));
296 if (mc > Traits::mr) mc -= mc % Traits::mr;
297 else if (mc==0)
return;
299 : (mc - Traits::mr * ((mc-(
m%mc))/(Traits::mr*(
m/mc+1))));
304template <
typename Index>
307#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
338template<
typename LhsScalar,
typename RhsScalar,
int KcFactor,
typename Index>
346template<
typename LhsScalar,
typename RhsScalar,
typename Index>
352template <
typename RhsPacket,
typename RhsPacketx4,
int registers_taken>
360template <
typename Packet>
370template <
int N,
typename T1,
typename T2,
typename T3>
373template <
typename T1,
typename T2,
typename T3>
376template <
typename T1,
typename T2,
typename T3>
379#define PACKET_DECL_COND_PREFIX(prefix, name, packet_size) \
380 typedef typename packet_conditional<packet_size, \
381 typename packet_traits<name ## Scalar>::type, \
382 typename packet_traits<name ## Scalar>::half, \
383 typename unpacket_traits<typename packet_traits<name ## Scalar>::half>::half>::type \
384 prefix ## name ## Packet
386#define PACKET_DECL_COND(name, packet_size) \
387 typedef typename packet_conditional<packet_size, \
388 typename packet_traits<name ## Scalar>::type, \
389 typename packet_traits<name ## Scalar>::half, \
390 typename unpacket_traits<typename packet_traits<name ## Scalar>::half>::half>::type \
393#define PACKET_DECL_COND_SCALAR_PREFIX(prefix, packet_size) \
394 typedef typename packet_conditional<packet_size, \
395 typename packet_traits<Scalar>::type, \
396 typename packet_traits<Scalar>::half, \
397 typename unpacket_traits<typename packet_traits<Scalar>::half>::half>::type \
398 prefix ## ScalarPacket
400#define PACKET_DECL_COND_SCALAR(packet_size) \
401 typedef typename packet_conditional<packet_size, \
402 typename packet_traits<Scalar>::type, \
403 typename packet_traits<Scalar>::half, \
404 typename unpacket_traits<typename packet_traits<Scalar>::half>::half>::type \
417template<
typename _LhsScalar,
typename _RhsScalar,
bool _ConjLhs,
bool _ConjRhs,
int Arch,
int _PacketSize>
473 template<
typename RhsPacketType>
484 template<
typename RhsPacketType>
499 template<
typename LhsPacketType>
505 template<
typename LhsPacketType>
511 template<
typename LhsPacketType,
typename RhsPacketType,
typename AccPacketType,
typename LaneIdType>
519#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
527 template<
typename LhsPacketType,
typename AccPacketType,
typename LaneIdType>
538 template<
typename ResPacketHalf>
546template<
typename RealScalar,
bool _ConjLhs,
int Arch,
int _PacketSize>
568#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD) && !defined(EIGEN_VECTORIZE_ALTIVEC) && !defined(EIGEN_VECTORIZE_VSX)
570 mr = 3*LhsPacketSize,
575 LhsProgress = LhsPacketSize,
593 template<
typename RhsPacketType>
604 template<
typename RhsPacketType>
637 template<
typename LhsPacketType>
643 template <
typename LhsPacketType,
typename RhsPacketType,
typename AccPacketType,
typename LaneIdType>
649 template <
typename LhsPacketType,
typename RhsPacketType,
typename AccPacketType>
652#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
665 template<
typename LhsPacketType,
typename AccPacketType,
typename LaneIdType>
671 template <
typename ResPacketType,
typename AccPacketType>
681template<
typename Packet>
688template<
typename Packet>
701template<
typename Packet>
702const DoublePacket<Packet>&
709template<
typename Packet>
710DoublePacket<typename unpacket_traits<Packet>::half>
716 typedef std::complex<typename unpacket_traits<Packet>::type>
Cplx;
724template<
typename Scalar,
typename RealPacket>
732template<
typename Scalar,
typename RealPacket>
738 RealScalar r[4] = {numext::real(
b[0]), numext::real(
b[0]), numext::real(
b[1]), numext::real(
b[1])};
739 RealScalar i[4] = {numext::imag(
b[0]), numext::imag(
b[0]), numext::imag(
b[1]), numext::imag(
b[1])};
757template<
typename RealScalar,
bool _ConjLhs,
bool _ConjRhs,
int Arch,
int _PacketSize>
786 LhsProgress = ResPacketSize,
816 template<
typename RealPacketType>
825 loadRhs(
b,
dest.B_0);
826 loadRhs(
b + 1,
dest.B1);
827 loadRhs(
b + 2,
dest.B2);
828 loadRhs(
b + 3,
dest.B3);
838 template<
typename RealPacketType>
861 template<
typename LhsPacketType>
867 template<
typename LhsPacketType,
typename RhsPacketType,
typename ResPacketType,
typename TmpType,
typename LaneIdType>
876 template<
typename LaneIdType>
882 template<
typename LhsPacketType,
typename AccPacketType,
typename LaneIdType>
890 template<
typename RealPacketType,
typename ResPacketType>
895 if((!ConjLhs)&&(!ConjRhs))
900 else if((!ConjLhs)&&(ConjRhs))
905 else if((ConjLhs)&&(!ConjRhs))
910 else if((ConjLhs)&&(ConjRhs))
923template<
typename RealScalar,
bool _ConjRhs,
int Arch,
int _PacketSize>
938#undef PACKET_DECL_COND_SCALAR_PREFIX
939#undef PACKET_DECL_COND_PREFIX
940#undef PACKET_DECL_COND_SCALAR
941#undef PACKET_DECL_COND
957 LhsProgress = ResPacketSize,
973 template<
typename RhsPacketType>
984 template<
typename RhsPacketType>
1003 template<
typename LhsPacketType>
1009 template <
typename LhsPacketType,
typename RhsPacketType,
typename AccPacketType,
typename LaneIdType>
1015 template <
typename LhsPacketType,
typename RhsPacketType,
typename AccPacketType>
1018#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
1032 template<
typename LhsPacketType,
typename AccPacketType,
typename LaneIdType>
1038 template <
typename ResPacketType,
typename AccPacketType>
1056template<
typename LhsScalar,
typename RhsScalar,
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
1093 Vectorizable = Traits::Vectorizable,
1094 LhsProgress = Traits::LhsProgress,
1095 LhsProgressHalf = HalfTraits::LhsProgress,
1096 LhsProgressQuarter = QuarterTraits::LhsProgress,
1097 RhsProgress = Traits::RhsProgress,
1098 RhsProgressHalf = HalfTraits::RhsProgress,
1099 RhsProgressQuarter = QuarterTraits::RhsProgress,
1100 ResPacketSize = Traits::ResPacketSize
1109template<
typename LhsScalar,
typename RhsScalar,
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs,
1140template<
typename LhsScalar,
typename RhsScalar,
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
1176 blB += SwappedTraits::LhsProgress/4;
1185 res.scatterPacket(
i,
j2, R);
1189template<
int nr, Index LhsProgress, Index RhsProgress,
typename LhsScalar,
typename RhsScalar,
typename ResScalar,
typename AccPacket,
typename LhsPacket,
typename RhsPacket,
typename ResPacket,
typename GEBPTraits,
typename LinearMapper,
typename DataMapper>
1194 EIGEN_STRONG_INLINE void peeled_kc_onestep(
Index K,
const LhsScalar*
blA,
const RhsScalar*
blB,
GEBPTraits traits, LhsPacket *
A0,
RhsPacketx4 *
rhs_panel, RhsPacket *
T0, AccPacket *
C0, AccPacket *
C1, AccPacket *
C2, AccPacket *
C3)
1204 #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE)
1211 const DataMapper&
res,
const LhsScalar* blockA,
const RhsScalar* blockB, ResScalar
alpha,
1247 LinearMapper
r0 =
res.getLinearMapper(
i,
j2 + 0);
1248 LinearMapper
r1 =
res.getLinearMapper(
i,
j2 + 1);
1249 LinearMapper
r2 =
res.getLinearMapper(
i,
j2 + 2);
1250 LinearMapper
r3 =
res.getLinearMapper(
i,
j2 + 3);
1268 internal::prefetch(
blB+(48+0));
1269 peeled_kc_onestep(0,
blA,
blB,
traits, &
A0, &
rhs_panel, &
T0, &
C0, &
C1, &
C2, &
C3);
1270 peeled_kc_onestep(1,
blA,
blB,
traits, &
A1, &
rhs_panel, &
T0, &
D0, &
D1, &
D2, &
D3);
1271 peeled_kc_onestep(2,
blA,
blB,
traits, &
A0, &
rhs_panel, &
T0, &
C0, &
C1, &
C2, &
C3);
1272 peeled_kc_onestep(3,
blA,
blB,
traits, &
A1, &
rhs_panel, &
T0, &
D0, &
D1, &
D2, &
D3);
1273 internal::prefetch(
blB+(48+16));
1274 peeled_kc_onestep(4,
blA,
blB,
traits, &
A0, &
rhs_panel, &
T0, &
C0, &
C1, &
C2, &
C3);
1275 peeled_kc_onestep(5,
blA,
blB,
traits, &
A1, &
rhs_panel, &
T0, &
D0, &
D1, &
D2, &
D3);
1276 peeled_kc_onestep(6,
blA,
blB,
traits, &
A0, &
rhs_panel, &
T0, &
C0, &
C1, &
C2, &
C3);
1277 peeled_kc_onestep(7,
blA,
blB,
traits, &
A1, &
rhs_panel, &
T0, &
D0, &
D1, &
D2, &
D3);
1279 blB +=
pk*4*RhsProgress;
1280 blA +=
pk*LhsProgress;
1294 peeled_kc_onestep(0,
blA,
blB,
traits, &
A0, &
rhs_panel, &
T0, &
C0, &
C1, &
C2, &
C3);
1295 blB += 4*RhsProgress;
1306 r0.storePacket(0,
R0);
1307 r1.storePacket(0,
R1);
1313 r2.storePacket(0,
R0);
1314 r3.storePacket(0,
R1);
1328 LinearMapper
r0 =
res.getLinearMapper(
i,
j2);
1339#define EIGEN_GEBGP_ONESTEP(K) \
1341 EIGEN_ASM_COMMENT("begin step of gebp micro kernel 1/half/quarterX1"); \
1342 EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
1344 traits.loadLhsUnaligned(&blA[(0+1*K)*LhsProgress], A0); \
1345 traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
1346 traits.madd(A0, B_0, C0, B_0, fix<0>); \
1347 EIGEN_ASM_COMMENT("end step of gebp micro kernel 1/half/quarterX1"); \
1359 blB +=
pk*RhsProgress;
1360 blA +=
pk*LhsProgress;
1373#undef EIGEN_GEBGP_ONESTEP
1378 r0.storePacket(0,
R0);
1384template<
int nr, Index LhsProgress, Index RhsProgress,
typename LhsScalar,
typename RhsScalar,
typename ResScalar,
typename AccPacket,
typename LhsPacket,
typename RhsPacket,
typename ResPacket,
typename GEBPTraits,
typename LinearMapper,
typename DataMapper>
1385struct lhs_process_fraction_of_packet :
lhs_process_one_packet<nr, LhsProgress, RhsProgress, LhsScalar, RhsScalar, ResScalar, AccPacket, LhsPacket, RhsPacket, ResPacket, GEBPTraits, LinearMapper, DataMapper>
1388EIGEN_STRONG_INLINE void peeled_kc_onestep(
Index K,
const LhsScalar*
blA,
const RhsScalar*
blB,
GEBPTraits traits, LhsPacket *
A0, RhsPacket *B_0, RhsPacket *B1, RhsPacket *B2, RhsPacket *B3, AccPacket *
C0, AccPacket *
C1, AccPacket *
C2, AccPacket *
C3)
1392 traits.loadLhsUnaligned(&
blA[(0+1*K)*(LhsProgress)], *
A0);
1393 traits.broadcastRhs(&
blB[(0+4*K)*RhsProgress], *B_0, *B1, *B2, *B3);
1402template<
typename LhsScalar,
typename RhsScalar,
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs,
bool ConjugateRhs>
1404void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,ConjugateRhs>
1429 if(
mr>=3*Traits::LhsProgress)
1485 #if EIGEN_COMP_GNUC_STRICT && EIGEN_ARCH_ARM64 && defined(EIGEN_VECTORIZE_NEON) && !(EIGEN_GNUC_AT_LEAST(9,0))
1489 #define EIGEN_GEBP_3PX4_REGISTER_ALLOC_WORKAROUND __asm__ ("" : "+w,m" (A0), "+w,m" (A1), "+w,m" (A2));
1491 #define EIGEN_GEBP_3PX4_REGISTER_ALLOC_WORKAROUND
1493#define EIGEN_GEBP_ONESTEP(K) \
1495 EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX4"); \
1496 EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
1497 internal::prefetch(blA + (3 * K + 16) * LhsProgress); \
1498 if (EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) { \
1499 internal::prefetch(blB + (4 * K + 16) * RhsProgress); \
1501 traits.loadLhs(&blA[(0 + 3 * K) * LhsProgress], A0); \
1502 traits.loadLhs(&blA[(1 + 3 * K) * LhsProgress], A1); \
1503 traits.loadLhs(&blA[(2 + 3 * K) * LhsProgress], A2); \
1504 EIGEN_GEBP_3PX4_REGISTER_ALLOC_WORKAROUND \
1505 traits.loadRhs(blB + (0+4*K) * Traits::RhsProgress, rhs_panel); \
1506 traits.madd(A0, rhs_panel, C0, T0, fix<0>); \
1507 traits.madd(A1, rhs_panel, C4, T0, fix<0>); \
1508 traits.madd(A2, rhs_panel, C8, T0, fix<0>); \
1509 traits.updateRhs(blB + (1+4*K) * Traits::RhsProgress, rhs_panel); \
1510 traits.madd(A0, rhs_panel, C1, T0, fix<1>); \
1511 traits.madd(A1, rhs_panel, C5, T0, fix<1>); \
1512 traits.madd(A2, rhs_panel, C9, T0, fix<1>); \
1513 traits.updateRhs(blB + (2+4*K) * Traits::RhsProgress, rhs_panel); \
1514 traits.madd(A0, rhs_panel, C2, T0, fix<2>); \
1515 traits.madd(A1, rhs_panel, C6, T0, fix<2>); \
1516 traits.madd(A2, rhs_panel, C10, T0, fix<2>); \
1517 traits.updateRhs(blB + (3+4*K) * Traits::RhsProgress, rhs_panel); \
1518 traits.madd(A0, rhs_panel, C3, T0, fix<3>); \
1519 traits.madd(A1, rhs_panel, C7, T0, fix<3>); \
1520 traits.madd(A2, rhs_panel, C11, T0, fix<3>); \
1521 EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX4"); \
1535 blA +=
pk*3*Traits::LhsProgress;
1547 blA += 3*Traits::LhsProgress;
1550#undef EIGEN_GEBP_ONESTEP
1561 r0.storePacket(0 * Traits::ResPacketSize,
R0);
1562 r0.storePacket(1 * Traits::ResPacketSize,
R1);
1563 r0.storePacket(2 * Traits::ResPacketSize,
R2);
1571 r1.storePacket(0 * Traits::ResPacketSize,
R0);
1572 r1.storePacket(1 * Traits::ResPacketSize,
R1);
1573 r1.storePacket(2 * Traits::ResPacketSize,
R2);
1581 r2.storePacket(0 * Traits::ResPacketSize,
R0);
1582 r2.storePacket(1 * Traits::ResPacketSize,
R1);
1583 r2.storePacket(2 * Traits::ResPacketSize,
R2);
1591 r3.storePacket(0 * Traits::ResPacketSize,
R0);
1592 r3.storePacket(1 * Traits::ResPacketSize,
R1);
1593 r3.storePacket(2 * Traits::ResPacketSize,
R2);
1623#define EIGEN_GEBGP_ONESTEP(K) \
1625 EIGEN_ASM_COMMENT("begin step of gebp micro kernel 3pX1"); \
1626 EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
1627 traits.loadLhs(&blA[(0 + 3 * K) * LhsProgress], A0); \
1628 traits.loadLhs(&blA[(1 + 3 * K) * LhsProgress], A1); \
1629 traits.loadLhs(&blA[(2 + 3 * K) * LhsProgress], A2); \
1630 traits.loadRhs(&blB[(0 + K) * RhsProgress], B_0); \
1631 traits.madd(A0, B_0, C0, B_0, fix<0>); \
1632 traits.madd(A1, B_0, C4, B_0, fix<0>); \
1633 traits.madd(A2, B_0, C8, B_0, fix<0>); \
1634 EIGEN_ASM_COMMENT("end step of gebp micro kernel 3pX1"); \
1658 blA += 3*Traits::LhsProgress;
1660#undef EIGEN_GEBGP_ONESTEP
1670 r0.storePacket(0 * Traits::ResPacketSize,
R0);
1671 r0.storePacket(1 * Traits::ResPacketSize,
R1);
1672 r0.storePacket(2 * Traits::ResPacketSize,
R2);
1679 if(
mr>=2*Traits::LhsProgress)
1730 #if EIGEN_GNUC_AT_LEAST(6,0) && defined(EIGEN_VECTORIZE_SSE)
1731 #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND __asm__ ("" : [a0] "+x,m" (A0),[a1] "+x,m" (A1));
1733 #define EIGEN_GEBP_2PX4_SPILLING_WORKAROUND
1735#define EIGEN_GEBGP_ONESTEP(K) \
1737 EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX4"); \
1738 traits.loadLhs(&blA[(0 + 2 * K) * LhsProgress], A0); \
1739 traits.loadLhs(&blA[(1 + 2 * K) * LhsProgress], A1); \
1740 traits.loadRhs(&blB[(0 + 4 * K) * RhsProgress], rhs_panel); \
1741 traits.madd(A0, rhs_panel, C0, T0, fix<0>); \
1742 traits.madd(A1, rhs_panel, C4, T0, fix<0>); \
1743 traits.madd(A0, rhs_panel, C1, T0, fix<1>); \
1744 traits.madd(A1, rhs_panel, C5, T0, fix<1>); \
1745 traits.madd(A0, rhs_panel, C2, T0, fix<2>); \
1746 traits.madd(A1, rhs_panel, C6, T0, fix<2>); \
1747 traits.madd(A0, rhs_panel, C3, T0, fix<3>); \
1748 traits.madd(A1, rhs_panel, C7, T0, fix<3>); \
1749 EIGEN_GEBP_2PX4_SPILLING_WORKAROUND \
1750 EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX4"); \
1765 blA +=
pk*(2*Traits::LhsProgress);
1776 blA += 2*Traits::LhsProgress;
1778#undef EIGEN_GEBGP_ONESTEP
1791 r0.storePacket(0 * Traits::ResPacketSize,
R0);
1792 r0.storePacket(1 * Traits::ResPacketSize,
R1);
1793 r1.storePacket(0 * Traits::ResPacketSize,
R2);
1794 r1.storePacket(1 * Traits::ResPacketSize,
R3);
1804 r2.storePacket(0 * Traits::ResPacketSize,
R0);
1805 r2.storePacket(1 * Traits::ResPacketSize,
R1);
1806 r3.storePacket(0 * Traits::ResPacketSize,
R2);
1807 r3.storePacket(1 * Traits::ResPacketSize,
R3);
1837#define EIGEN_GEBGP_ONESTEP(K) \
1839 EIGEN_ASM_COMMENT("begin step of gebp micro kernel 2pX1"); \
1840 EIGEN_ASM_COMMENT("Note: these asm comments work around bug 935!"); \
1841 traits.loadLhs(&blA[(0+2*K)*LhsProgress], A0); \
1842 traits.loadLhs(&blA[(1+2*K)*LhsProgress], A1); \
1843 traits.loadRhs(&blB[(0+K)*RhsProgress], B_0); \
1844 traits.madd(A0, B_0, C0, B1, fix<0>); \
1845 traits.madd(A1, B_0, C4, B_0, fix<0>); \
1846 EIGEN_ASM_COMMENT("end step of gebp micro kernel 2pX1"); \
1870 blA += 2*Traits::LhsProgress;
1872#undef EIGEN_GEBGP_ONESTEP
1880 r0.storePacket(0 * Traits::ResPacketSize,
R0);
1881 r0.storePacket(1 * Traits::ResPacketSize,
R1);
1887 if(
mr>=1*Traits::LhsProgress)
1889 lhs_process_one_packet<nr, LhsProgress, RhsProgress, LhsScalar, RhsScalar, ResScalar, AccPacket, LhsPacket, RhsPacket, ResPacket, Traits, LinearMapper, DataMapper> p;
1890 p(
res, blockA, blockB,
alpha,
peeled_mc2,
peeled_mc1,
strideA,
strideB,
offsetA,
offsetB,
prefetch_res_offset,
peeled_kc,
pk,
cols,
depth,
packet_cols4);
1895 lhs_process_fraction_of_packet<nr, LhsProgressHalf, RhsProgressHalf, LhsScalar, RhsScalar, ResScalar, AccPacketHalf, LhsPacketHalf, RhsPacketHalf, ResPacketHalf, HalfTraits, LinearMapper, DataMapper> p;
1896 p(
res, blockA, blockB,
alpha,
peeled_mc1,
peeled_mc_half,
strideA,
strideB,
offsetA,
offsetB,
prefetch_res_offset,
peeled_kc,
pk,
cols,
depth,
packet_cols4);
1899 if((LhsProgressQuarter < LhsProgressHalf) &&
mr>=LhsProgressQuarter)
1901 lhs_process_fraction_of_packet<nr, LhsProgressQuarter, RhsProgressQuarter, LhsScalar, RhsScalar, ResScalar, AccPacketQuarter, LhsPacketQuarter, RhsPacketQuarter, ResPacketQuarter, QuarterTraits, LinearMapper, DataMapper> p;
1902 p(
res, blockA, blockB,
alpha,
peeled_mc_half,
peeled_mc_quarter,
strideA,
strideB,
offsetA,
offsetB,
prefetch_res_offset,
peeled_kc,
pk,
cols,
depth,
packet_cols4);
1922 if ((SwappedTraits::LhsProgress % 4) == 0 &&
1923 (SwappedTraits::LhsProgress<=16) &&
1933 const Index spk = (std::max)(1,SwappedTraits::LhsProgress/4);
1943 straits.loadLhsUnaligned(
blB+0*SwappedTraits::LhsProgress,
A0);
1944 straits.loadLhsUnaligned(
blB+1*SwappedTraits::LhsProgress,
A1);
1951 straits.loadLhsUnaligned(
blB+2*SwappedTraits::LhsProgress,
A0);
1952 straits.loadLhsUnaligned(
blB+3*SwappedTraits::LhsProgress,
A1);
1958 blB += 4*SwappedTraits::LhsProgress;
1971 blB += SwappedTraits::LhsProgress;
1974 if(SwappedTraits::LhsProgress==8)
2000 res.scatterPacket(
i,
j2, R);
2002 else if (SwappedTraits::LhsProgress==16)
2016 res.scatterPacket(
i,
j2, R);
2033 C0 = cj.pmadd(
A0,B_0,
C0);
2038 C2 = cj.pmadd(
A0,B_0,
C2);
2065 C0 = cj.pmadd(
A0, B_0,
C0);
2088template<
typename Scalar,
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2095template<
typename Scalar,
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2104 HasHalf = (
int)HalfPacketSize < (
int)PacketSize,
2127 if(
Pack1>=3*PacketSize)
2147 if(
Pack1>=2*PacketSize)
2165 if(
Pack1>=1*PacketSize)
2193 count+=HalfPacketSize;
2210 count+=QuarterPacketSize;
2229 blockA[
count++] = cj(lhs(
i+
w, k));
2239 blockA[
count++] = cj(lhs(
i, k));
2244template<
typename Scalar,
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2251template<
typename Scalar,
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
bool Conjugate,
bool PanelMode>
2260 HasHalf = (
int)HalfPacketSize < (
int)PacketSize,
2273 int psize = PacketSize;
2284 if(pack>=
psize &&
psize >= QuarterPacketSize)
2291 if (
psize == PacketSize) {
2317 for(;
w<pack-3;
w+=4)
2320 b(cj(lhs(
i+
w+1, k))),
2321 c(cj(lhs(
i+
w+2, k))),
2322 d(cj(lhs(
i+
w+3, k)));
2326 blockA[
count++] = d;
2330 blockA[
count++] = cj(lhs(
i+
w, k));
2364 blockA[
count++] = cj(lhs(
i, k));
2376template<
typename Scalar,
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2385template<
typename Scalar,
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2386EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>
2456 if((PacketSize%4)==0)
2465 pstoreu(blockB+
count+0*PacketSize, cj.pconj(kernel.packet[0]));
2466 pstoreu(blockB+
count+1*PacketSize, cj.pconj(kernel.packet[1%PacketSize]));
2467 pstoreu(blockB+
count+2*PacketSize, cj.pconj(kernel.packet[2%PacketSize]));
2468 pstoreu(blockB+
count+3*PacketSize, cj.pconj(kernel.packet[3%PacketSize]));
2469 count+=4*PacketSize;
2500template<
typename Scalar,
typename Index,
typename DataMapper,
int nr,
bool Conjugate,
bool PanelMode>
2516 const bool HasHalf = (
int)HalfPacketSize < (
int)PacketSize;
2517 const bool HasQuarter = (
int)QuarterPacketSize < (
int)HalfPacketSize;
2564 if (PacketSize==4) {
2567 count += PacketSize;
2568 }
else if (
HasHalf && HalfPacketSize==4) {
2571 count += HalfPacketSize;
2572 }
else if (
HasQuarter && QuarterPacketSize==4) {
2575 count += QuarterPacketSize;
2595 blockB[
count] = cj(rhs(k,
j2));
2609 std::ptrdiff_t l1, l2, l3;
2618 std::ptrdiff_t l1, l2, l3;
2628 std::ptrdiff_t l1, l2, l3;
#define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
Definition PacketMath.h:22
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
Definition PacketMath.h:27
Matrix3f m
Definition AngleAxis_mimic_euler.cpp:1
ArrayXXi a
Definition Array_initializer_list_23_cxx11.cpp:1
int n
Definition BiCGSTAB_simple.cpp:1
int i
Definition BiCGSTAB_step_by_step.cpp:9
internal::enable_if< internal::valid_indexed_view_overload< RowIndices, ColIndices >::value &&internal::traits< typenameEIGEN_INDEXED_VIEW_METHOD_TYPE< RowIndices, ColIndices >::type >::ReturnAsIndexedView, typenameEIGEN_INDEXED_VIEW_METHOD_TYPE< RowIndices, ColIndices >::type >::type operator()(const RowIndices &rowIndices, const ColIndices &colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
Definition IndexedViewMethods.h:73
#define EIGEN_ASM_COMMENT(X)
Definition Macros.h:1082
#define EIGEN_COMP_MSVC
Definition Macros.h:114
#define EIGEN_PLAIN_ENUM_MIN(a, b)
Definition Macros.h:1288
#define eigen_internal_assert(x)
Definition Macros.h:1043
#define EIGEN_UNUSED_VARIABLE(var)
Definition Macros.h:1076
#define EIGEN_DONT_INLINE
Definition Macros.h:940
#define eigen_assert(x)
Definition Macros.h:1037
#define EIGEN_STRONG_INLINE
Definition Macros.h:917
RowVector3d w
Definition Matrix_resize_int.cpp:3
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition PartialRedux_count.cpp:3
float * p
Definition Tutorial_Map_using.cpp:9
int rows
Definition Tutorial_commainit_02.cpp:1
int cols
Definition Tutorial_commainit_02.cpp:1
Scalar Scalar * c
Definition benchVecAdd.cpp:17
Scalar * b
Definition benchVecAdd.cpp:17
Scalar Scalar int size
Definition benchVecAdd.cpp:17
SCALAR Scalar
Definition bench_gemm.cpp:46
Matrix< Scalar, Dynamic, Dynamic > C
Definition bench_gemm.cpp:50
NumTraits< Scalar >::Real RealScalar
Definition bench_gemm.cpp:47
Matrix< SCALARA, Dynamic, Dynamic, opt_A > A
Definition bench_gemm.cpp:48
Matrix< SCALARB, Dynamic, Dynamic, opt_B > B
Definition bench_gemm.cpp:49
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M
Definition benchmark-blocking-sizes.cpp:22
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K
Definition benchmark-blocking-sizes.cpp:21
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
Definition benchmark-blocking-sizes.cpp:20
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N
Definition benchmark-blocking-sizes.cpp:23
mp::number< mp::cpp_dec_float< 100 >, mp::et_on > Real
Definition boostmultiprec.cpp:78
Definition ForwardDeclarations.h:87
The matrix class, also used for vectors and row-vectors.
Definition Matrix.h:180
Definition TensorRef.h:81
LhsPacket LhsPacket4Packing
Definition GeneralBlockPanelKernel.h:964
PACKET_DECL_COND_PREFIX(_, Res, _PacketSize)
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar *a, LhsPacket &dest) const
Definition GeneralBlockPanelKernel.h:993
PACKET_DECL_COND_SCALAR_PREFIX(_, _PacketSize)
PACKET_DECL_COND_PREFIX(_, Rhs, _PacketSize)
QuadPacket< RhsPacket > RhsPacketx4
Definition GeneralBlockPanelKernel.h:965
EIGEN_STRONG_INLINE void madd_impl(const LhsPacketType &a, const RhsPacketType &b, AccPacketType &c, RhsPacketType &tmp, const true_type &) const
Definition GeneralBlockPanelKernel.h:1016
EIGEN_STRONG_INLINE void madd(const LhsPacketType &a, const RhsPacketx4 &b, AccPacketType &c, RhsPacket &tmp, const LaneIdType &lane) const
Definition GeneralBlockPanelKernel.h:1033
PACKET_DECL_COND_PREFIX(_, Real, _PacketSize)
EIGEN_STRONG_INLINE void madd(const LhsPacketType &a, const RhsPacketType &b, AccPacketType &c, RhsPacketType &tmp, const LaneIdType &) const
Definition GeneralBlockPanelKernel.h:1010
EIGEN_STRONG_INLINE void acc(const AccPacketType &c, const ResPacketType &alpha, ResPacketType &r) const
Definition GeneralBlockPanelKernel.h:1039
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar *b, RhsPacketType &dest) const
Definition GeneralBlockPanelKernel.h:974
conditional< Vectorizable, _RhsPacket, RhsScalar >::type RhsPacket
Definition GeneralBlockPanelKernel.h:962
Scalar ResScalar
Definition GeneralBlockPanelKernel.h:930
PACKET_DECL_COND_PREFIX(_, Lhs, _PacketSize)
EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar *a, LhsPacketType &dest) const
Definition GeneralBlockPanelKernel.h:1004
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar *, RhsPacketx4 &) const
Definition GeneralBlockPanelKernel.h:990
ResPacket AccPacket
Definition GeneralBlockPanelKernel.h:966
RealScalar LhsScalar
Definition GeneralBlockPanelKernel.h:928
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar *b, RhsPacketType &dest) const
Definition GeneralBlockPanelKernel.h:985
EIGEN_STRONG_INLINE void initAcc(AccPacket &p)
Definition GeneralBlockPanelKernel.h:968
EIGEN_STRONG_INLINE void madd_impl(const LhsScalar &a, const RhsScalar &b, ResScalar &c, RhsScalar &, const false_type &) const
Definition GeneralBlockPanelKernel.h:1027
conditional< Vectorizable, _ResPacket, ResScalar >::type ResPacket
Definition GeneralBlockPanelKernel.h:963
std::complex< RealScalar > Scalar
Definition GeneralBlockPanelKernel.h:927
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar *b, RhsPacket &dest) const
Definition GeneralBlockPanelKernel.h:998
conditional< Vectorizable, _LhsPacket, LhsScalar >::type LhsPacket
Definition GeneralBlockPanelKernel.h:961
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar *b, RhsPacketx4 &dest) const
Definition GeneralBlockPanelKernel.h:979
Scalar RhsScalar
Definition GeneralBlockPanelKernel.h:929
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar *b, RhsPacketType &dest) const
Definition GeneralBlockPanelKernel.h:605
LhsPacket LhsPacket4Packing
Definition GeneralBlockPanelKernel.h:582
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar *b, RhsPacket &dest) const
Definition GeneralBlockPanelKernel.h:613
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar *, RhsPacketx4 &) const
Definition GeneralBlockPanelKernel.h:610
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar *b, RhsPacketType &dest) const
Definition GeneralBlockPanelKernel.h:594
QuadPacket< RhsPacket > RhsPacketx4
Definition GeneralBlockPanelKernel.h:584
EIGEN_STRONG_INLINE void madd_impl(const LhsScalar &a, const RhsScalar &b, ResScalar &c, RhsScalar &, const false_type &) const
Definition GeneralBlockPanelKernel.h:660
std::complex< RealScalar > LhsScalar
Definition GeneralBlockPanelKernel.h:550
EIGEN_STRONG_INLINE void madd(const LhsPacketType &a, const RhsPacketx4 &b, AccPacketType &c, RhsPacket &tmp, const LaneIdType &lane) const
Definition GeneralBlockPanelKernel.h:666
conditional< Vectorizable, _ResPacket, ResScalar >::type ResPacket
Definition GeneralBlockPanelKernel.h:581
conditional< Vectorizable, _LhsPacket, LhsScalar >::type LhsPacket
Definition GeneralBlockPanelKernel.h:579
ScalarBinaryOpTraits< LhsScalar, RhsScalar >::ReturnType ResScalar
Definition GeneralBlockPanelKernel.h:552
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar *b, RhsPacketx4 &dest) const
Definition GeneralBlockPanelKernel.h:599
EIGEN_STRONG_INLINE void loadRhsQuad_impl(const RhsScalar *b, RhsPacket &dest, const false_type &) const
Definition GeneralBlockPanelKernel.h:626
RealScalar RhsScalar
Definition GeneralBlockPanelKernel.h:551
PACKET_DECL_COND_PREFIX(_, Res, _PacketSize)
EIGEN_STRONG_INLINE void madd(const LhsPacketType &a, const RhsPacketType &b, AccPacketType &c, RhsPacketType &tmp, const LaneIdType &) const
Definition GeneralBlockPanelKernel.h:644
EIGEN_STRONG_INLINE void initAcc(AccPacket &p)
Definition GeneralBlockPanelKernel.h:588
EIGEN_STRONG_INLINE void loadRhsQuad_impl(const RhsScalar *b, RhsPacket &dest, const true_type &) const
Definition GeneralBlockPanelKernel.h:618
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar *a, LhsPacket &dest) const
Definition GeneralBlockPanelKernel.h:632
PACKET_DECL_COND_PREFIX(_, Lhs, _PacketSize)
EIGEN_STRONG_INLINE void madd_impl(const LhsPacketType &a, const RhsPacketType &b, AccPacketType &c, RhsPacketType &tmp, const true_type &) const
Definition GeneralBlockPanelKernel.h:650
EIGEN_STRONG_INLINE void acc(const AccPacketType &c, const ResPacketType &alpha, ResPacketType &r) const
Definition GeneralBlockPanelKernel.h:672
EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar *a, LhsPacketType &dest) const
Definition GeneralBlockPanelKernel.h:638
PACKET_DECL_COND_PREFIX(_, Rhs, _PacketSize)
ResPacket AccPacket
Definition GeneralBlockPanelKernel.h:586
conditional< Vectorizable, _RhsPacket, RhsScalar >::type RhsPacket
Definition GeneralBlockPanelKernel.h:580
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar *b, ResPacket &dest) const
Definition GeneralBlockPanelKernel.h:846
conditional< Vectorizable, ScalarPacket, Scalar >::type LhsPacket4Packing
Definition GeneralBlockPanelKernel.h:792
PACKET_DECL_COND_SCALAR(_PacketSize)
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar *b, DoublePacketType &dest) const
Definition GeneralBlockPanelKernel.h:850
PACKET_DECL_COND(Real, _PacketSize)
PACKET_DECL_COND_PREFIX(_, Lhs, _PacketSize)
conditional< Vectorizable, ScalarPacket, Scalar >::type ResPacket
Definition GeneralBlockPanelKernel.h:795
EIGEN_STRONG_INLINE enable_if<!is_same< RhsPacketType, RhsPacketx4 >::value >::type madd(const LhsPacketType &a, const RhsPacketType &b, DoublePacket< ResPacketType > &c, TmpType &, const LaneIdType &) const
Definition GeneralBlockPanelKernel.h:870
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar *, RhsPacketx4 &) const
Definition GeneralBlockPanelKernel.h:844
EIGEN_STRONG_INLINE void initAcc(Scalar &p)
Definition GeneralBlockPanelKernel.h:801
PACKET_DECL_COND_PREFIX(_, Rhs, _PacketSize)
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar *b, ScalarPacket &dest) const
Definition GeneralBlockPanelKernel.h:810
EIGEN_STRONG_INLINE void acc(const DoublePacket< RealPacketType > &c, const ResPacketType &alpha, ResPacketType &r) const
Definition GeneralBlockPanelKernel.h:891
std::complex< RealScalar > ResScalar
Definition GeneralBlockPanelKernel.h:764
conditional< Vectorizable, DoublePacketType, Scalar >::type RhsPacket
Definition GeneralBlockPanelKernel.h:794
EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar *a, LhsPacketType &dest) const
Definition GeneralBlockPanelKernel.h:862
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar *b, RhsPacketx4 &dest) const
Definition GeneralBlockPanelKernel.h:823
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar *b, DoublePacket< RealPacketType > &dest) const
Definition GeneralBlockPanelKernel.h:839
conditional< Vectorizable, DoublePacketType, Scalar >::type AccPacket
Definition GeneralBlockPanelKernel.h:796
std::complex< RealScalar > LhsScalar
Definition GeneralBlockPanelKernel.h:762
EIGEN_STRONG_INLINE void madd(const LhsPacketType &a, const RhsPacketx4 &b, AccPacketType &c, RhsPacket &tmp, const LaneIdType &lane) const
Definition GeneralBlockPanelKernel.h:883
PACKET_DECL_COND_PREFIX(_, Res, _PacketSize)
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar *a, LhsPacket &dest) const
Definition GeneralBlockPanelKernel.h:856
std::complex< RealScalar > RhsScalar
Definition GeneralBlockPanelKernel.h:763
std::complex< RealScalar > Scalar
Definition GeneralBlockPanelKernel.h:761
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar *b, ScalarPacket &dest) const
Definition GeneralBlockPanelKernel.h:832
EIGEN_STRONG_INLINE void madd(const LhsPacket &a, const RhsPacket &b, ResPacket &c, RhsPacket &, const LaneIdType &) const
Definition GeneralBlockPanelKernel.h:877
conj_helper< LhsScalar, RhsScalar, ConjLhs, ConjRhs > cj
Definition GeneralBlockPanelKernel.h:920
EIGEN_STRONG_INLINE void acc(const Scalar &c, const Scalar &alpha, Scalar &r) const
Definition GeneralBlockPanelKernel.h:888
conditional< Vectorizable, RealPacket, Scalar >::type LhsPacket
Definition GeneralBlockPanelKernel.h:793
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar *b, DoublePacket< RealPacketType > &dest) const
Definition GeneralBlockPanelKernel.h:817
EIGEN_STRONG_INLINE void initAcc(DoublePacketType &p)
Definition GeneralBlockPanelKernel.h:803
QuadPacket< RhsPacket > RhsPacketx4
Definition GeneralBlockPanelKernel.h:799
DoublePacket< RealPacket > DoublePacketType
Definition GeneralBlockPanelKernel.h:790
Definition GeneralBlockPanelKernel.h:419
PACKET_DECL_COND_PREFIX(_, Lhs, _PacketSize)
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar *b, RhsPacket &dest) const
Definition GeneralBlockPanelKernel.h:494
_RhsScalar RhsScalar
Definition GeneralBlockPanelKernel.h:422
conditional< Vectorizable, _RhsPacket, RhsScalar >::type RhsPacket
Definition GeneralBlockPanelKernel.h:461
ResPacket AccPacket
Definition GeneralBlockPanelKernel.h:466
_LhsScalar LhsScalar
Definition GeneralBlockPanelKernel.h:421
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar *b, RhsPacketType &dest) const
Definition GeneralBlockPanelKernel.h:474
EIGEN_STRONG_INLINE void madd(const LhsPacketType &a, const RhsPacketType &b, AccPacketType &c, RhsPacketType &tmp, const LaneIdType &) const
Definition GeneralBlockPanelKernel.h:512
conditional< Vectorizable, _LhsPacket, LhsScalar >::type LhsPacket
Definition GeneralBlockPanelKernel.h:460
EIGEN_STRONG_INLINE void madd(const LhsPacketType &a, const RhsPacketx4 &b, AccPacketType &c, RhsPacket &tmp, const LaneIdType &lane) const
Definition GeneralBlockPanelKernel.h:528
EIGEN_STRONG_INLINE void acc(const ResPacketHalf &c, const ResPacketHalf &alpha, ResPacketHalf &r) const
Definition GeneralBlockPanelKernel.h:539
EIGEN_STRONG_INLINE void acc(const AccPacket &c, const ResPacket &alpha, ResPacket &r) const
Definition GeneralBlockPanelKernel.h:533
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar *a, LhsPacketType &dest) const
Definition GeneralBlockPanelKernel.h:500
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar *b, RhsPacketType &dest) const
Definition GeneralBlockPanelKernel.h:485
QuadPacket< RhsPacket > RhsPacketx4
Definition GeneralBlockPanelKernel.h:465
ScalarBinaryOpTraits< LhsScalar, RhsScalar >::ReturnType ResScalar
Definition GeneralBlockPanelKernel.h:423
LhsPacket LhsPacket4Packing
Definition GeneralBlockPanelKernel.h:463
PACKET_DECL_COND_PREFIX(_, Res, _PacketSize)
PACKET_DECL_COND_PREFIX(_, Rhs, _PacketSize)
EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar *a, LhsPacketType &dest) const
Definition GeneralBlockPanelKernel.h:506
@ ConjRhs
Definition GeneralBlockPanelKernel.h:431
@ nr
Definition GeneralBlockPanelKernel.h:440
@ mr
Definition GeneralBlockPanelKernel.h:452
@ LhsProgress
Definition GeneralBlockPanelKernel.h:455
@ NumberOfRegisters
Definition GeneralBlockPanelKernel.h:437
@ LhsPacketSize
Definition GeneralBlockPanelKernel.h:433
@ RhsProgress
Definition GeneralBlockPanelKernel.h:456
@ ResPacketSize
Definition GeneralBlockPanelKernel.h:435
@ Vectorizable
Definition GeneralBlockPanelKernel.h:432
@ default_mr
Definition GeneralBlockPanelKernel.h:443
@ ConjLhs
Definition GeneralBlockPanelKernel.h:430
@ RhsPacketSize
Definition GeneralBlockPanelKernel.h:434
conditional< Vectorizable, _ResPacket, ResScalar >::type ResPacket
Definition GeneralBlockPanelKernel.h:462
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar *, RhsPacketx4 &) const
Definition GeneralBlockPanelKernel.h:490
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar *b, RhsPacketx4 &dest) const
Definition GeneralBlockPanelKernel.h:479
EIGEN_STRONG_INLINE void initAcc(AccPacket &p)
Definition GeneralBlockPanelKernel.h:468
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
Definition gnuplot_common_settings.hh:64
@ ColMajor
Definition Constants.h:319
@ RowMajor
Definition Constants.h:321
else
Definition cholesky.cpp:66
RealScalar alpha
Definition level1_cplx_impl.h:147
const std::ptrdiff_t defaultL2CacheSize
Definition GeneralBlockPanelKernel.h:62
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
Definition Complex.h:167
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition GenericPacketMath.h:215
EIGEN_DEVICE_FUNC void pbroadcast4(const typename unpacket_traits< Packet >::type *a, Packet &a0, Packet &a1, Packet &a2, Packet &a3)
Definition GenericPacketMath.h:653
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition Complex.h:224
void queryCacheSizes(int &l1, int &l2, int &l3)
Definition Memory.h:1106
void evaluateProductBlockingSizesHeuristic(Index &k, Index &m, Index &n, Index num_threads=1)
Definition GeneralBlockPanelKernel.h:124
EIGEN_DEVICE_FUNC void prefetch(const Scalar *addr)
Definition GenericPacketMath.h:719
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition PacketMath.h:827
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
Definition GenericPacketMath.h:237
const std::ptrdiff_t defaultL3CacheSize
Definition GeneralBlockPanelKernel.h:63
EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd &x)
Definition Complex.h:620
GEBPPacketSizeType
Definition GeneralBlockPanelKernel.h:18
@ GEBPPacketHalf
Definition GeneralBlockPanelKernel.h:20
@ GEBPPacketQuarter
Definition GeneralBlockPanelKernel.h:21
@ GEBPPacketFull
Definition GeneralBlockPanelKernel.h:19
void computeProductBlockingSizes(Index &k, Index &m, Index &n, Index num_threads=1)
Computes the blocking parameters for a m x k times k x n matrix product.
Definition GeneralBlockPanelKernel.h:339
void manage_caching_sizes(Action action, std::ptrdiff_t *l1, std::ptrdiff_t *l2, std::ptrdiff_t *l3)
Definition GeneralBlockPanelKernel.h:86
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
Definition GenericPacketMath.h:696
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4c predux_half_dowto4(const Packet8c &a)
Definition PacketMath.h:2478
@ Lhs
Definition TensorContractionMapper.h:19
@ Rhs
Definition TensorContractionMapper.h:18
bool useSpecificBlockingSizes(Index &k, Index &m, Index &n)
Definition GeneralBlockPanelKernel.h:305
EIGEN_DEVICE_FUNC void pstoreu(Scalar *to, const Packet &from)
Definition GenericPacketMath.h:700
std::ptrdiff_t manage_caching_sizes_helper(std::ptrdiff_t a, std::ptrdiff_t b)
Definition GeneralBlockPanelKernel.h:29
const std::ptrdiff_t defaultL1CacheSize
Definition GeneralBlockPanelKernel.h:61
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
Definition GenericPacketMath.h:222
void loadQuadToDoublePacket(const Scalar *b, DoublePacket< RealPacket > &dest, typename enable_if< unpacket_traits< RealPacket >::size<=8 >::type *=0)
Definition GeneralBlockPanelKernel.h:725
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
Definition MathFunctions.h:1091
EIGEN_DEVICE_FUNC T div_ceil(const T &a, const T &b)
Definition Meta.h:779
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
std::ptrdiff_t l1CacheSize()
Definition GeneralBlockPanelKernel.h:2607
std::ptrdiff_t l2CacheSize()
Definition GeneralBlockPanelKernel.h:2616
Action
Definition Constants.h:504
@ GetAction
Definition Constants.h:504
@ SetAction
Definition Constants.h:504
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
std::ptrdiff_t l3CacheSize()
Definition GeneralBlockPanelKernel.h:2626
void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2, std::ptrdiff_t l3)
Definition GeneralBlockPanelKernel.h:2638
Definition BandTriangularSolver.h:13
#define EIGEN_SET_DEFAULT_L1_CACHE_SIZE(val)
Definition GeneralBlockPanelKernel.h:37
#define EIGEN_SET_DEFAULT_L3_CACHE_SIZE(val)
Definition GeneralBlockPanelKernel.h:49
#define EIGEN_SET_DEFAULT_L2_CACHE_SIZE(val)
Definition GeneralBlockPanelKernel.h:43
#define EIGEN_GEBGP_ONESTEP(K)
#define EIGEN_GEBP_ONESTEP(K)
uint8_t count
Definition ref_serial.h:256
Determines whether the given binary operation of two numeric types is allowed and what the scalar ret...
Definition XprHelper.h:806
Definition GeneralBlockPanelKernel.h:71
std::ptrdiff_t m_l1
Definition GeneralBlockPanelKernel.h:80
CacheSizes()
Definition GeneralBlockPanelKernel.h:72
std::ptrdiff_t m_l2
Definition GeneralBlockPanelKernel.h:81
std::ptrdiff_t m_l3
Definition GeneralBlockPanelKernel.h:82
Definition GeneralBlockPanelKernel.h:683
Packet second
Definition GeneralBlockPanelKernel.h:685
Packet first
Definition GeneralBlockPanelKernel.h:684
Definition GenericPacketMath.h:1014
Definition GeneralBlockPanelKernel.h:362
const Packet & get(const FixedInt< 0 > &) const
Definition GeneralBlockPanelKernel.h:364
const Packet & get(const FixedInt< 1 > &) const
Definition GeneralBlockPanelKernel.h:365
Packet B2
Definition GeneralBlockPanelKernel.h:363
const Packet & get(const FixedInt< 2 > &) const
Definition GeneralBlockPanelKernel.h:366
Packet B_0
Definition GeneralBlockPanelKernel.h:363
Packet B1
Definition GeneralBlockPanelKernel.h:363
const Packet & get(const FixedInt< 3 > &) const
Definition GeneralBlockPanelKernel.h:367
Packet B3
Definition GeneralBlockPanelKernel.h:363
Definition GeneralBlockPanelKernel.h:353
RhsPacket ::type type
Definition GeneralBlockPanelKernel.h:357
typedef RhsPacketx4
Definition GeneralBlockPanelKernel.h:357
Definition GeneralBlockPanelKernel.h:1058
DataMapper::LinearMapper LinearMapper
Definition GeneralBlockPanelKernel.h:1090
Traits::RhsPacket RhsPacket
Definition GeneralBlockPanelKernel.h:1065
SwappedTraits::ResPacket SResPacket
Definition GeneralBlockPanelKernel.h:1077
gebp_traits< RhsScalar, LhsScalar, ConjugateRhs, ConjugateLhs, Architecture::Target > SwappedTraits
Definition GeneralBlockPanelKernel.h:1072
QuarterTraits::ResPacket ResPacketQuarter
Definition GeneralBlockPanelKernel.h:1087
SwappedTraits::AccPacket SAccPacket
Definition GeneralBlockPanelKernel.h:1078
QuarterTraits::RhsPacket RhsPacketQuarter
Definition GeneralBlockPanelKernel.h:1086
HalfTraits::ResPacket ResPacketHalf
Definition GeneralBlockPanelKernel.h:1082
HalfTraits::RhsPacket RhsPacketHalf
Definition GeneralBlockPanelKernel.h:1081
Traits::RhsPacketx4 RhsPacketx4
Definition GeneralBlockPanelKernel.h:1068
SwappedTraits::RhsPacket SRhsPacket
Definition GeneralBlockPanelKernel.h:1076
QuarterTraits::LhsPacket LhsPacketQuarter
Definition GeneralBlockPanelKernel.h:1085
HalfTraits::LhsPacket LhsPacketHalf
Definition GeneralBlockPanelKernel.h:1080
QuarterTraits::AccPacket AccPacketQuarter
Definition GeneralBlockPanelKernel.h:1088
SwappedTraits::ResScalar SResScalar
Definition GeneralBlockPanelKernel.h:1074
RhsPanelHelper< RhsPacket, RhsPacketx4, 15 >::type RhsPanel15
Definition GeneralBlockPanelKernel.h:1070
gebp_traits< LhsScalar, RhsScalar, ConjugateLhs, ConjugateRhs, Architecture::Target, GEBPPacketHalf > HalfTraits
Definition GeneralBlockPanelKernel.h:1060
Traits::LhsPacket LhsPacket
Definition GeneralBlockPanelKernel.h:1064
Traits::ResScalar ResScalar
Definition GeneralBlockPanelKernel.h:1063
SwappedTraits::LhsPacket SLhsPacket
Definition GeneralBlockPanelKernel.h:1075
Traits::ResPacket ResPacket
Definition GeneralBlockPanelKernel.h:1066
Traits::AccPacket AccPacket
Definition GeneralBlockPanelKernel.h:1067
gebp_traits< LhsScalar, RhsScalar, ConjugateLhs, ConjugateRhs, Architecture::Target > Traits
Definition GeneralBlockPanelKernel.h:1059
HalfTraits::AccPacket AccPacketHalf
Definition GeneralBlockPanelKernel.h:1083
gebp_traits< LhsScalar, RhsScalar, ConjugateLhs, ConjugateRhs, Architecture::Target, GEBPPacketQuarter > QuarterTraits
Definition GeneralBlockPanelKernel.h:1061
DataMapper::LinearMapper LinearMapper
Definition GeneralBlockPanelKernel.h:2247
DataMapper::LinearMapper LinearMapper
Definition GeneralBlockPanelKernel.h:2091
DataMapper::LinearMapper LinearMapper
Definition GeneralBlockPanelKernel.h:2380
packet_traits< Scalar >::type Packet
Definition GeneralBlockPanelKernel.h:2379
DataMapper::LinearMapper LinearMapper
Definition GeneralBlockPanelKernel.h:2506
EIGEN_DONT_INLINE void operator()(Scalar *blockB, const DataMapper &rhs, Index depth, Index cols, Index stride=0, Index offset=0)
Definition GeneralBlockPanelKernel.h:2510
unpacket_traits< typenameunpacket_traits< Packet >::half >::half QuarterPacket
Definition GeneralBlockPanelKernel.h:2505
unpacket_traits< Packet >::half HalfPacket
Definition GeneralBlockPanelKernel.h:2504
packet_traits< Scalar >::type Packet
Definition GeneralBlockPanelKernel.h:2503
SwappedTraits::LhsPacket SLhsPacket
Definition GeneralBlockPanelKernel.h:1146
gebp_traits< RhsScalar, LhsScalar, ConjugateRhs, ConjugateLhs, Architecture::Target > SwappedTraits
Definition GeneralBlockPanelKernel.h:1143
SwappedTraits::ResPacket SResPacket
Definition GeneralBlockPanelKernel.h:1148
EIGEN_STRONG_INLINE void operator()(const DataMapper &res, SwappedTraits &straits, const LhsScalar *blA, const RhsScalar *blB, Index depth, const Index endk, Index i, Index j2, ResScalar alpha, SAccPacket &C0)
Definition GeneralBlockPanelKernel.h:1151
SwappedTraits::AccPacket SAccPacket
Definition GeneralBlockPanelKernel.h:1149
gebp_traits< LhsScalar, RhsScalar, ConjugateLhs, ConjugateRhs, Architecture::Target > Traits
Definition GeneralBlockPanelKernel.h:1142
SwappedTraits::RhsPacket SRhsPacket
Definition GeneralBlockPanelKernel.h:1147
Traits::ResScalar ResScalar
Definition GeneralBlockPanelKernel.h:1145
Definition GeneralBlockPanelKernel.h:1112
SwappedTraits::AccPacket SAccPacket
Definition GeneralBlockPanelKernel.h:1120
gebp_traits< LhsScalar, RhsScalar, ConjugateLhs, ConjugateRhs, Architecture::Target > Traits
Definition GeneralBlockPanelKernel.h:1113
SwappedTraits::ResPacket SResPacket
Definition GeneralBlockPanelKernel.h:1119
Traits::ResScalar ResScalar
Definition GeneralBlockPanelKernel.h:1116
SwappedTraits::LhsPacket SLhsPacket
Definition GeneralBlockPanelKernel.h:1117
gebp_traits< RhsScalar, LhsScalar, ConjugateRhs, ConjugateLhs, Architecture::Target > SwappedTraits
Definition GeneralBlockPanelKernel.h:1114
EIGEN_STRONG_INLINE void operator()(const DataMapper &res, SwappedTraits &straits, const LhsScalar *blA, const RhsScalar *blB, Index depth, const Index endk, Index i, Index j2, ResScalar alpha, SAccPacket &C0)
Definition GeneralBlockPanelKernel.h:1122
SwappedTraits::RhsPacket SRhsPacket
Definition GeneralBlockPanelKernel.h:1118
Definition GeneralBlockPanelKernel.h:1386
EIGEN_STRONG_INLINE void peeled_kc_onestep(Index K, const LhsScalar *blA, const RhsScalar *blB, GEBPTraits traits, LhsPacket *A0, RhsPacket *B_0, RhsPacket *B1, RhsPacket *B2, RhsPacket *B3, AccPacket *C0, AccPacket *C1, AccPacket *C2, AccPacket *C3)
Definition GeneralBlockPanelKernel.h:1388
Definition GeneralBlockPanelKernel.h:1191
EIGEN_STRONG_INLINE void operator()(const DataMapper &res, const LhsScalar *blockA, const RhsScalar *blockB, ResScalar alpha, Index peelStart, Index peelEnd, Index strideA, Index strideB, Index offsetA, Index offsetB, int prefetch_res_offset, Index peeled_kc, Index pk, Index cols, Index depth, Index packet_cols4)
Definition GeneralBlockPanelKernel.h:1210
GEBPTraits::RhsPacketx4 RhsPacketx4
Definition GeneralBlockPanelKernel.h:1192
EIGEN_STRONG_INLINE void peeled_kc_onestep(Index K, const LhsScalar *blA, const RhsScalar *blB, GEBPTraits traits, LhsPacket *A0, RhsPacketx4 *rhs_panel, RhsPacket *T0, AccPacket *C0, AccPacket *C1, AccPacket *C2, AccPacket *C3)
Definition GeneralBlockPanelKernel.h:1194
T1 type
Definition GeneralBlockPanelKernel.h:374
T2 type
Definition GeneralBlockPanelKernel.h:377
Definition GeneralBlockPanelKernel.h:371
T3 type
Definition GeneralBlockPanelKernel.h:371
Definition GenericPacketMath.h:107
Definition ForwardDeclarations.h:17
DoublePacket< typename unpacket_traits< Packet >::half > half
Definition GeneralBlockPanelKernel.h:746
Definition GenericPacketMath.h:133
T half
Definition GenericPacketMath.h:135
Definition datatypes.h:12
Definition PacketMath.h:47