10#ifndef EIGEN_SPARSELU_GEMM_KERNEL_H
11#define EIGEN_SPARSELU_GEMM_KERNEL_H
24template<
typename Scalar>
36 RK = NumberOfRegisters>=16 ? 4 : 2,
42 Index i0 = internal::first_default_aligned(
A,
m);
52 for(
Index k=0; k<d; ++k)
107#define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}
109 c0 = pload<Packet>(C0+i+(I)*PacketSize); \
110 c1 = pload<Packet>(C1+i+(I)*PacketSize); \
111 KMADD(c0, a0, b00, t0) \
112 KMADD(c1, a0, b01, t1) \
113 a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
114 KMADD(c0, a1, b10, t0) \
115 KMADD(c1, a1, b11, t1) \
116 a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
117 if(RK==4){ KMADD(c0, a2, b20, t0) }\
118 if(RK==4){ KMADD(c1, a2, b21, t1) }\
119 if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
120 if(RK==4){ KMADD(c0, a3, b30, t0) }\
121 if(RK==4){ KMADD(c1, a3, b31, t1) }\
122 if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
123 pstore(C0+i+(I)*PacketSize, c0); \
124 pstore(C1+i+(I)*PacketSize, c1)
207 c0 = pload<Packet>(C0+i+(I)*PacketSize); \
208 KMADD(c0, a0, b00, t0) \
209 a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
210 KMADD(c0, a1, b10, t0) \
211 a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
212 if(RK==4){ KMADD(c0, a2, b20, t0) }\
213 if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
214 if(RK==4){ KMADD(c0, a3, b30, t0) }\
215 if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
216 pstore(C0+i+(I)*PacketSize, c0);
257 Alignment = PacketSize>1 ?
Aligned : 0
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
Definition PacketMath.h:27
Matrix3f m
Definition AngleAxis_mimic_euler.cpp:1
int n
Definition BiCGSTAB_simple.cpp:1
int i
Definition BiCGSTAB_step_by_step.cpp:9
#define EIGEN_ASM_COMMENT(X)
Definition Macros.h:1082
#define eigen_internal_assert(x)
Definition Macros.h:1043
#define EIGEN_DONT_INLINE
Definition Macros.h:940
Scalar Scalar * c
Definition benchVecAdd.cpp:17
SCALAR Scalar
Definition bench_gemm.cpp:46
Matrix< SCALARB, Dynamic, Dynamic, opt_B > B
Definition bench_gemm.cpp:49
The matrix class, also used for vectors and row-vectors.
Definition Matrix.h:180
* lda
Definition eigenvalues.cpp:59
@ Aligned
Definition Constants.h:240
Definition bench_norm.cpp:86
EIGEN_DONT_INLINE void sparselu_gemm(Index m, Index n, Index d, const Scalar *A, Index lda, const Scalar *B, Index ldb, Scalar *C, Index ldc)
Definition SparseLU_gemm_kernel.h:26
EIGEN_DEVICE_FUNC void prefetch(const Scalar *addr)
Definition GenericPacketMath.h:719
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
Definition BandTriangularSolver.h:13
Definition GenericPacketMath.h:107
Definition ForwardDeclarations.h:17
std::ptrdiff_t j
Definition tut_arithmetic_redux_minmax.cpp:2
Definition PacketMath.h:47