10#ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
24template<DenseIndex DimId,
typename XprType>
27 typedef typename XprType::Scalar
Scalar;
30 typedef typename XprTraits::Index
Index;
31 typedef typename XprType::Nested
Nested;
33 static const int NumDimensions = XprTraits::NumDimensions - 1;
34 static const int Layout = XprTraits::Layout;
38template<DenseIndex DimId,
typename XprType>
44template<DenseIndex DimId,
typename XprType>
50template <DenseIndex DimId>
79template<DenseIndex DimId,
typename XprType>
114template<DenseIndex DimId,
typename ArgType,
typename Device>
119 static const int NumDims = NumInputDims-1;
138 IsOuterChipping = (
static_cast<int>(
Layout) ==
ColMajor && DimId == NumInputDims - 1) ||
141 IsInnerChipping = (
static_cast<int>(
Layout) ==
ColMajor && DimId == 0) ||
168 : m_impl(op.expression(), device), m_dim(op.dim()),
m_device(device)
177 for (
int i = 0;
i < NumInputDims; ++
i) {
178 if (
i != m_dim.actualDim()) {
179 m_dimensions[
j] = input_dims[
i];
187 for (
int i = 0;
i < m_dim.actualDim(); ++
i) {
188 m_stride *= input_dims[
i];
189 m_inputStride *= input_dims[
i];
192 for (
int i = NumInputDims-1;
i > m_dim.actualDim(); --
i) {
193 m_stride *= input_dims[
i];
194 m_inputStride *= input_dims[
i];
197 m_inputStride *= input_dims[m_dim.actualDim()];
198 m_inputOffset = m_stride * op.
offset();
204 m_impl.evalSubExprsIfNeeded(NULL);
214 return m_impl.coeff(srcCoeff(index));
217 template<
int LoadMode>
223 if (isInnerChipping()) {
226 Index inputIndex = index * m_inputStride + m_inputOffset;
230 values[
i] = m_impl.coeff(inputIndex);
231 inputIndex += m_inputStride;
235 }
else if (isOuterChipping()) {
238 return m_impl.template packet<LoadMode>(index + m_inputOffset);
240 const Index idx = index / m_stride;
241 const Index rem = index - idx * m_stride;
243 Index inputIndex = idx * m_inputStride + m_inputOffset + rem;
244 return m_impl.template packet<LoadMode>(inputIndex);
263 m_dim.actualDim() == 0) ||
265 m_dim.actualDim() == NumInputDims - 1)) {
266 cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
267 }
else if ((
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor) &&
268 m_dim.actualDim() == NumInputDims - 1) ||
270 m_dim.actualDim() == 0)) {
271 cost += TensorOpCost::AddCost<Index>();
273 cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() +
274 3 * TensorOpCost::AddCost<Index>();
277 return m_impl.costPerCoeff(vectorized) +
283 const size_t target_size =
m_device.lastLevelCacheSize();
285 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
286 m_impl.getResourceRequirements());
291 bool root_of_expr_ast =
false)
const {
292 const Index chip_dim = m_dim.actualDim();
295 for (
int i = 0;
i < NumInputDims; ++
i) {
307 for (
int i = 0;
i < NumInputDims; ++
i) {
308 arg_destination_strides[
i]
314 arg_desc.template AddDestinationBuffer<Layout>(
316 arg_destination_strides);
319 ArgTensorBlock arg_block = m_impl.block(arg_desc, scratch, root_of_expr_ast);
322 if (arg_block.data() != NULL) {
324 return TensorBlock(arg_block.kind(), arg_block.data(),
331 const typename TensorBlock::Storage block_storage =
332 TensorBlock::prepareStorage(desc, scratch);
336 TensorBlockAssignment;
338 TensorBlockAssignment::Run(
339 TensorBlockAssignment::target(
340 arg_desc.dimensions(),
342 block_storage.data()),
345 return block_storage.AsTensorMaterializedBlock();
351 if (isOuterChipping() && result) {
352 return result + m_inputOffset;
368 if (isInnerChipping()) {
371 inputIndex = index * m_inputStride + m_inputOffset;
372 }
else if (isOuterChipping()) {
376 inputIndex = index + m_inputOffset;
378 const Index idx = index / m_stride;
379 inputIndex = idx * m_inputStride + m_inputOffset;
380 index -= idx * m_stride;
387 return IsInnerChipping ||
388 (
static_cast<int>(
Layout) ==
ColMajor && m_dim.actualDim() == 0) ||
389 (
static_cast<int>(
Layout) ==
RowMajor && m_dim.actualDim() == NumInputDims - 1);
393 return IsOuterChipping ||
394 (
static_cast<int>(
Layout) ==
ColMajor && m_dim.actualDim() == NumInputDims-1) ||
409template<DenseIndex DimId,
typename ArgType,
typename Device>
411 :
public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
416 static const int NumDims = NumInputDims-1;
442 return this->m_impl.coeffRef(this->srcCoeff(index));
450 if (this->isInnerChipping()) {
455 Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
458 this->m_impl.coeffRef(inputIndex) = values[
i];
459 inputIndex += this->m_inputStride;
461 }
else if (this->isOuterChipping()) {
464 this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset,
x);
466 const Index idx = index / this->m_stride;
467 const Index rem = index - idx * this->m_stride;
468 if (rem + PacketSize <= this->m_stride) {
469 const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem;
470 this->m_impl.template writePacket<StoreMode>(inputIndex,
x);
484 template <
typename TensorBlock>
487 assert(this->m_impl.data() != NULL);
489 const Index chip_dim = this->m_dim.actualDim();
492 for (
int i = 0;
i < NumInputDims; ++
i) {
499 const typename TensorBlock::XprType>
503 TensorBlockExpr,
Index>
506 TensorBlockAssign::Run(
507 TensorBlockAssign::target(
510 this->m_impl.data(), this->srcCoeff(desc.
offset())),
511 block.expr().reshape(input_block_dims));
int i
Definition BiCGSTAB_step_by_step.cpp:9
#define EIGEN_UNROLL_LOOP
Definition Macros.h:1461
#define EIGEN_UNUSED_VARIABLE(var)
Definition Macros.h:1076
#define EIGEN_DEVICE_FUNC
Definition Macros.h:976
#define eigen_assert(x)
Definition Macros.h:1037
#define EIGEN_STRONG_INLINE
Definition Macros.h:917
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
Definition StaticAssert.h:127
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
Definition TensorMacros.h:94
#define EIGEN_DEVICE_REF
Definition TensorMacros.h:50
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition CwiseBinaryOp.h:84
The tensor base class.
Definition TensorBase.h:973
Definition TensorChipping.h:81
Eigen::internal::traits< TensorChippingOp >::Index Index
Definition TensorChipping.h:89
TensorBase< TensorChippingOp< DimId, XprType > > Base
Definition TensorChipping.h:83
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorChippingOp(const XprType &expr, const Index offset, const Index dim)
Definition TensorChipping.h:91
Eigen::internal::traits< TensorChippingOp >::StorageKind StorageKind
Definition TensorChipping.h:88
Eigen::internal::nested< TensorChippingOp >::type Nested
Definition TensorChipping.h:87
XprType::Nested m_xpr
Definition TensorChipping.h:107
Eigen::internal::traits< TensorChippingOp >::Scalar Scalar
Definition TensorChipping.h:84
const internal::DimensionId< DimId > m_dim
Definition TensorChipping.h:109
Eigen::NumTraits< Scalar >::Real RealScalar
Definition TensorChipping.h:85
EIGEN_DEVICE_FUNC const internal::remove_all< typenameXprType::Nested >::type & expression() const
Definition TensorChipping.h:102
const Index m_offset
Definition TensorChipping.h:108
EIGEN_DEVICE_FUNC const Index offset() const
Definition TensorChipping.h:96
EIGEN_DEVICE_FUNC const Index dim() const
Definition TensorChipping.h:98
XprType::CoeffReturnType CoeffReturnType
Definition TensorChipping.h:86
Definition TensorCostModel.h:25
Definition TensorMorphing.h:55
Definition TensorBlock.h:1381
const DestinationBuffer & destination() const
Definition TensorBlock.h:303
IndexType offset() const
Definition TensorBlock.h:298
bool HasDestinationBuffer() const
Definition TensorBlock.h:326
IndexType dimension(int index) const
Definition TensorBlock.h:300
const Dimensions & dimensions() const
Definition TensorBlock.h:299
TensorBlockDescriptor & DropDestinationBuffer()
Definition TensorBlock.h:320
Definition TensorRef.h:81
Definition TensorBlock.h:656
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
Definition gnuplot_common_settings.hh:12
@ ColMajor
Definition Constants.h:319
@ RowMajor
Definition Constants.h:321
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T * constCast(const T *data)
Definition TensorForwardDeclarations.h:27
EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex
Definition Meta.h:66
const int Dynamic
Definition Constants.h:22
Definition BandTriangularSolver.h:13
Definition TensorDimensions.h:263
Definition Constants.h:507
Definition TensorMeta.h:50
Definition TensorForwardDeclarations.h:37
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition TensorChipping.h:421
DSizes< Index, NumDims > Dimensions
Definition TensorChipping.h:418
TensorEvaluator< const TensorChippingOp< DimId, ArgType >, Device > Base
Definition TensorChipping.h:413
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
Definition TensorChipping.h:485
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index)
Definition TensorChipping.h:440
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition TensorChipping.h:433
XprType::CoeffReturnType CoeffReturnType
Definition TensorChipping.h:420
TensorChippingOp< DimId, ArgType > XprType
Definition TensorChipping.h:414
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition TensorChipping.h:436
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x)
Definition TensorChipping.h:446
XprType::Index Index
Definition TensorChipping.h:417
XprType::Scalar Scalar
Definition TensorChipping.h:419
Definition TensorChipping.h:116
Storage::Type EvaluatorPointerType
Definition TensorChipping.h:127
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
Definition TensorChipping.h:218
const Device EIGEN_DEVICE_REF m_device
Definition TensorChipping.h:404
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
Definition TensorChipping.h:365
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition TensorChipping.h:212
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition TensorChipping.h:201
Index m_inputStride
Definition TensorChipping.h:401
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isOuterChipping() const
Definition TensorChipping.h:392
TensorEvaluator< ArgType, Device > m_impl
Definition TensorChipping.h:402
XprType::Scalar Scalar
Definition TensorChipping.h:122
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool isInnerChipping() const
Definition TensorChipping.h:386
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition TensorChipping.h:154
TensorEvaluator< constArgType, Device >::TensorBlock ArgTensorBlock
Definition TensorChipping.h:160
TensorChippingOp< DimId, ArgType > XprType
Definition TensorChipping.h:117
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Definition TensorChipping.h:282
StorageMemory< CoeffReturnType, Device > Storage
Definition TensorChipping.h:126
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition TensorChipping.h:260
Dimensions m_dimensions
Definition TensorChipping.h:398
XprType::Index Index
Definition TensorChipping.h:120
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Definition TensorChipping.h:203
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition TensorChipping.h:124
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Definition TensorChipping.h:155
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock
Definition TensorChipping.h:164
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition TensorChipping.h:167
internal::TensorBlockDescriptor< NumInputDims, Index > ArgTensorBlockDesc
Definition TensorChipping.h:158
EIGEN_STRONG_INLINE void cleanup()
Definition TensorChipping.h:208
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool root_of_expr_ast=false) const
Definition TensorChipping.h:290
XprType::CoeffReturnType CoeffReturnType
Definition TensorChipping.h:123
DSizes< Index, NumDims > Dimensions
Definition TensorChipping.h:121
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Storage::Type data() const
Definition TensorChipping.h:349
const internal::DimensionId< DimId > m_dim
Definition TensorChipping.h:403
Index m_stride
Definition TensorChipping.h:399
internal::remove_const< Scalar >::type ScalarNoConst
Definition TensorChipping.h:151
Index m_inputOffset
Definition TensorChipping.h:400
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition TensorEvaluator.h:29
Derived::Scalar Scalar
Definition TensorEvaluator.h:31
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index)
Definition TensorEvaluator.h:99
const Device EIGEN_DEVICE_REF m_device
Definition TensorEvaluator.h:192
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition TensorEvaluator.h:73
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition TensorEvaluator.h:94
@ BlockAccess
Definition TensorEvaluator.h:48
@ PreferBlockAccess
Definition TensorEvaluator.h:49
@ PacketAccess
Definition TensorEvaluator.h:47
@ Layout
Definition TensorEvaluator.h:50
@ IsAligned
Definition TensorEvaluator.h:46
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
Definition TensorEvaluator.h:63
Derived::Dimensions Dimensions
Definition TensorEvaluator.h:34
static const int PacketSize
Definition TensorEvaluator.h:36
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition TensorEvaluator.h:158
internal::remove_const< Scalar >::type ScalarNoConst
Definition TensorEvaluator.h:55
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const
Definition TensorChipping.h:67
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim)
Definition TensorChipping.h:64
Definition TensorChipping.h:52
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DimensionId(DenseIndex dim)
Definition TensorChipping.h:53
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex actualDim() const
Definition TensorChipping.h:57
Definition TensorBlock.h:75
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements merge(const TensorBlockResourceRequirements &lhs, const TensorBlockResourceRequirements &rhs)
Definition TensorBlock.h:138
const TensorChippingOp< DimId, XprType > EIGEN_DEVICE_REF type
Definition TensorChipping.h:41
Definition XprHelper.h:332
TensorChippingOp< DimId, XprType > type
Definition TensorChipping.h:47
Definition TensorTraits.h:175
XprTraits::StorageKind StorageKind
Definition TensorChipping.h:29
XprType::Nested Nested
Definition TensorChipping.h:31
XprTraits::PointerType PointerType
Definition TensorChipping.h:35
traits< XprType > XprTraits
Definition TensorChipping.h:28
remove_reference< Nested >::type _Nested
Definition TensorChipping.h:32
XprTraits::Index Index
Definition TensorChipping.h:30
XprType::Scalar Scalar
Definition TensorChipping.h:27
Definition ForwardDeclarations.h:17
std::ptrdiff_t j
Definition tut_arithmetic_redux_minmax.cpp:2