10#ifndef EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_SHUFFLING_H
23template<
typename Shuffle,
typename XprType>
26 typedef typename XprType::Scalar
Scalar;
29 typedef typename XprTraits::Index
Index;
30 typedef typename XprType::Nested
Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
37template<
typename Shuffle,
typename XprType>
43template<
typename Shuffle,
typename XprType>
53template<
typename Shuffle,
typename XprType>
85template<
typename Shuffle,
typename ArgType,
typename Device>
123 m_impl(op.expression(), device)
127 m_is_identity =
true;
128 for (
int i = 0;
i < NumDims; ++
i) {
129 m_shuffle[
i] =
static_cast<int>(shuffle[
i]);
130 m_dimensions[
i] = input_dims[shuffle[
i]];
131 m_inverseShuffle[shuffle[
i]] =
i;
132 if (m_is_identity && shuffle[
i] !=
i) {
133 m_is_identity =
false;
138 m_unshuffledInputStrides[0] = 1;
139 m_outputStrides[0] = 1;
141 for (
int i = 1;
i < NumDims; ++
i) {
142 m_unshuffledInputStrides[
i] =
143 m_unshuffledInputStrides[
i - 1] * input_dims[
i - 1];
144 m_outputStrides[
i] = m_outputStrides[
i - 1] * m_dimensions[
i - 1];
146 m_outputStrides[
i] > 0 ? m_outputStrides[
i] :
Index(1));
149 m_unshuffledInputStrides[NumDims - 1] = 1;
150 m_outputStrides[NumDims - 1] = 1;
151 for (
int i = NumDims - 2;
i >= 0; --
i) {
152 m_unshuffledInputStrides[
i] =
153 m_unshuffledInputStrides[
i + 1] * input_dims[
i + 1];
154 m_outputStrides[
i] = m_outputStrides[
i + 1] * m_dimensions[
i + 1];
156 m_outputStrides[
i] > 0 ? m_outputStrides[
i] :
Index(1));
160 for (
int i = 0;
i < NumDims; ++
i) {
161 m_inputStrides[
i] = m_unshuffledInputStrides[shuffle[
i]];
168 m_impl.evalSubExprsIfNeeded(NULL);
172#ifdef EIGEN_USE_THREADS
173 template <
typename EvalSubExprsCallback>
176 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
187 return m_impl.coeff(index);
189 return m_impl.coeff(srcCoeff(index));
193 template <
int LoadMode,
typename Self,
bool ImplPacketAccess>
194 struct PacketLoader {
199 for (
int i = 0;
i < PacketSize; ++
i) {
200 values[
i] = self.
coeff(index +
i);
207 template<
int LoadMode,
typename Self>
208 struct PacketLoader<LoadMode,
Self, true> {
212 return self.
m_impl.template packet<LoadMode>(index);
216 for (
int i = 0;
i < PacketSize; ++
i) {
217 values[
i] = self.
coeff(index +
i);
225 template<
int LoadMode>
230 return PacketLoader<LoadMode, Self, TensorEvaluator<ArgType, Device>::PacketAccess>::Run(*
this, index);
235 static const int inner_dim =
238 const size_t target_size =
m_device.firstLevelCacheSize();
239 const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim;
247 if (inner_dim_shuffled) {
248 return BlockRequirements::uniform<Scalar>(target_size)
251 return BlockRequirements::skewed<Scalar>(target_size);
257 bool root_of_expr_ast =
false)
const {
258 assert(m_impl.data() != NULL);
262 typedef typename TensorBlockIO::Dst TensorBlockIODst;
263 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
265 const typename TensorBlock::Storage block_storage =
266 TensorBlock::prepareStorage(
267 desc, scratch, root_of_expr_ast);
269 typename TensorBlockIO::Dimensions input_strides(m_unshuffledInputStrides);
270 TensorBlockIOSrc src(input_strides, m_impl.data(), srcCoeff(desc.
offset()));
272 TensorBlockIODst dst(block_storage.dimensions(), block_storage.strides(),
273 block_storage.data());
275 typename TensorBlockIO::DimensionsMap dst_to_src_dim_map(m_shuffle);
276 TensorBlockIO::Copy(dst, src, dst_to_src_dim_map);
278 return block_storage.AsTensorMaterializedBlock();
282 const double compute_cost = m_is_identity ? TensorOpCost::AddCost<Index>() :
283 NumDims * (2 * TensorOpCost::AddCost<Index>() +
284 2 * TensorOpCost::MulCost<Index>() +
285 TensorOpCost::DivCost<Index>());
286 return m_impl.costPerCoeff(vectorized) +
304 Index output_index = 0;
306 for (
int i = NumDims - 1;
i > 0; --
i) {
307 const Index idx = input_index / fast_input_block_strides[
i];
308 output_index += idx * output_block_strides[m_inverseShuffle[
i]];
309 input_index -= idx * input_block_strides[
i];
311 return output_index + input_index *
312 output_block_strides[m_inverseShuffle[0]];
314 for (
int i = 0;
i < NumDims - 1; ++
i) {
315 const Index idx = input_index / fast_input_block_strides[
i];
316 output_index += idx * output_block_strides[m_inverseShuffle[
i]];
317 input_index -= idx * input_block_strides[
i];
319 return output_index + input_index *
320 output_block_strides[m_inverseShuffle[NumDims - 1]];
325 Index inputIndex = 0;
327 for (
int i = NumDims - 1;
i > 0; --
i) {
328 const Index idx = index / m_fastOutputStrides[
i];
329 inputIndex += idx * m_inputStrides[
i];
330 index -= idx * m_outputStrides[
i];
332 return inputIndex + index * m_inputStrides[0];
334 for (
int i = 0;
i < NumDims - 1; ++
i) {
335 const Index idx = index / m_fastOutputStrides[
i];
336 inputIndex += idx * m_inputStrides[
i];
337 index -= idx * m_outputStrides[
i];
339 return inputIndex + index * m_inputStrides[NumDims - 1];
358template<
typename Shuffle,
typename ArgType,
typename Device>
360 :
public TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
394 return this->m_impl.coeffRef(this->srcCoeff(index));
410 template <
typename TensorBlock>
417 typedef typename TensorBlockIO::Dst TensorBlockIODst;
418 typedef typename TensorBlockIO::Src TensorBlockIOSrc;
425 if (block_buffer == NULL) {
431 TensorBlockAssignment;
433 TensorBlockAssignment::Run(
434 TensorBlockAssignment::target(
447 typename TensorBlockIO::Dimensions output_strides(
448 this->m_unshuffledInputStrides);
449 typename TensorBlockIO::Dimensions output_dimensions;
450 for (
int i = 0;
i < NumDims; ++
i) {
451 output_dimensions[this->m_shuffle[
i]] = desc.
dimension(
i);
453 TensorBlockIODst dst(output_dimensions, output_strides, this->m_impl.data(),
454 this->srcCoeff(desc.
offset()));
457 typename TensorBlockIO::DimensionsMap dst_to_src_dim_map;
458 for (
int i = 0;
i < NumDims; ++
i) {
459 dst_to_src_dim_map[
i] =
static_cast<int>(this->m_inverseShuffle[
i]);
461 TensorBlockIO::Copy(dst, src, dst_to_src_dim_map);
464 if (mem != NULL) this->
m_device.deallocate(mem);
int i
Definition BiCGSTAB_step_by_step.cpp:9
#define EIGEN_UNROLL_LOOP
Definition Macros.h:1461
#define EIGEN_DEVICE_FUNC
Definition Macros.h:976
#define eigen_assert(x)
Definition Macros.h:1037
#define EIGEN_STRONG_INLINE
Definition Macros.h:917
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
Definition StaticAssert.h:127
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
Definition TensorMacros.h:94
#define EIGEN_DEVICE_REF
Definition TensorMacros.h:50
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition CwiseBinaryOp.h:84
The tensor base class.
Definition TensorBase.h:973
Definition TensorCostModel.h:25
Definition TensorShuffling.h:55
EIGEN_DEVICE_FUNC const Shuffle & shufflePermutation() const
Definition TensorShuffling.h:69
XprType::Nested m_xpr
Definition TensorShuffling.h:79
Eigen::internal::nested< TensorShufflingOp >::type Nested
Definition TensorShuffling.h:61
EIGEN_DEVICE_FUNC const internal::remove_all< typenameXprType::Nested >::type & expression() const
Definition TensorShuffling.h:73
Eigen::internal::traits< TensorShufflingOp >::StorageKind StorageKind
Definition TensorShuffling.h:62
XprType::CoeffReturnType CoeffReturnType
Definition TensorShuffling.h:60
TensorBase< TensorShufflingOp< Shuffle, XprType > > Base
Definition TensorShuffling.h:57
Eigen::NumTraits< Scalar >::Real RealScalar
Definition TensorShuffling.h:59
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType &expr, const Shuffle &shfl)
Definition TensorShuffling.h:65
Eigen::internal::traits< TensorShufflingOp >::Scalar Scalar
Definition TensorShuffling.h:58
const Shuffle m_shuffle
Definition TensorShuffling.h:80
Eigen::internal::traits< TensorShufflingOp >::Index Index
Definition TensorShuffling.h:63
Definition EmulateArray.h:21
Definition TensorBlock.h:1381
IndexType size() const
Definition TensorBlock.h:301
IndexType offset() const
Definition TensorBlock.h:298
IndexType dimension(int index) const
Definition TensorBlock.h:300
const Dimensions & dimensions() const
Definition TensorBlock.h:299
Definition TensorRef.h:81
Definition TensorBlock.h:656
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
Definition gnuplot_common_settings.hh:12
@ ColMajor
Definition Constants.h:319
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
Definition BandTriangularSolver.h:13
Definition TensorDimensions.h:263
Definition Constants.h:507
Definition TensorMeta.h:50
Definition TensorForwardDeclarations.h:37
XprType::CoeffReturnType CoeffReturnType
Definition TensorShuffling.h:369
XprType::Scalar Scalar
Definition TensorShuffling.h:368
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index)
Definition TensorShuffling.h:392
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition TensorShuffling.h:370
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition TensorShuffling.h:388
XprType::Index Index
Definition TensorShuffling.h:365
EIGEN_STRONG_INLINE void writePacket(Index index, const PacketReturnType &x)
Definition TensorShuffling.h:398
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
Definition TensorShuffling.h:411
TensorShufflingOp< Shuffle, ArgType > XprType
Definition TensorShuffling.h:364
DSizes< Index, NumDims > Dimensions
Definition TensorShuffling.h:367
TensorEvaluator< const TensorShufflingOp< Shuffle, ArgType >, Device > Base
Definition TensorShuffling.h:362
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition TensorShuffling.h:385
internal::remove_const< Scalar >::type ScalarNoConst
Definition TensorShuffling.h:382
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketReturnType Run(const Self &self, Index index)
Definition TensorShuffling.h:210
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketReturnType Run(const Self &self, Index index)
Definition TensorShuffling.h:196
Definition TensorShuffling.h:87
DSizes< Index, NumDims > Dimensions
Definition TensorShuffling.h:92
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition TensorShuffling.h:281
XprType::Scalar Scalar
Definition TensorShuffling.h:93
array< Index, NumDims > m_unshuffledInputStrides
Definition TensorShuffling.h:350
array< int, NumDims > m_shuffle
Definition TensorShuffling.h:345
bool m_is_identity
Definition TensorShuffling.h:344
XprType::Index Index
Definition TensorShuffling.h:90
internal::remove_const< Scalar >::type ScalarNoConst
Definition TensorShuffling.h:110
array< internal::TensorIntDivisor< Index >, NumDims > m_fastOutputStrides
Definition TensorShuffling.h:348
TensorShufflingOp< Shuffle, ArgType > XprType
Definition TensorShuffling.h:89
TensorEvaluator< const TensorShufflingOp< Shuffle, ArgType >, Device > Self
Definition TensorShuffling.h:88
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Definition TensorShuffling.h:234
Storage::Type EvaluatorPointerType
Definition TensorShuffling.h:98
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition TensorShuffling.h:121
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition TensorShuffling.h:184
TensorEvaluator< ArgType, Device > m_impl
Definition TensorShuffling.h:353
EIGEN_DEVICE_FUNC Storage::Type data() const
Definition TensorShuffling.h:290
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Definition TensorShuffling.h:114
array< Index, NumDims > m_inverseShuffle
Definition TensorShuffling.h:346
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index GetBlockOutputIndex(Index input_index, const DSizes< Index, NumDims > &input_block_strides, const DSizes< Index, NumDims > &output_block_strides, const DSizes< internal::TensorIntDivisor< Index >, NumDims > &fast_input_block_strides) const
Definition TensorShuffling.h:299
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition TensorShuffling.h:113
array< Index, NumDims > m_outputStrides
Definition TensorShuffling.h:347
EIGEN_STRONG_INLINE void cleanup()
Definition TensorShuffling.h:180
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
Definition TensorShuffling.h:226
StorageMemory< CoeffReturnType, Device > Storage
Definition TensorShuffling.h:97
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition TensorShuffling.h:165
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool root_of_expr_ast=false) const
Definition TensorShuffling.h:256
const Device EIGEN_DEVICE_REF m_device
Definition TensorShuffling.h:352
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock
Definition TensorShuffling.h:118
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition TensorShuffling.h:95
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Definition TensorShuffling.h:167
Dimensions m_dimensions
Definition TensorShuffling.h:343
XprType::CoeffReturnType CoeffReturnType
Definition TensorShuffling.h:94
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
Definition TensorShuffling.h:324
array< Index, NumDims > m_inputStrides
Definition TensorShuffling.h:349
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition TensorEvaluator.h:29
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType & coeffRef(Index index)
Definition TensorEvaluator.h:99
const Device EIGEN_DEVICE_REF m_device
Definition TensorEvaluator.h:192
Storage::Type EvaluatorPointerType
Definition TensorEvaluator.h:39
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition TensorEvaluator.h:73
@ BlockAccess
Definition TensorEvaluator.h:48
@ PreferBlockAccess
Definition TensorEvaluator.h:49
@ PacketAccess
Definition TensorEvaluator.h:47
@ Layout
Definition TensorEvaluator.h:50
@ IsAligned
Definition TensorEvaluator.h:46
Derived::Index Index
Definition TensorEvaluator.h:30
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
Definition TensorEvaluator.h:63
Derived::Dimensions Dimensions
Definition TensorEvaluator.h:34
static const int PacketSize
Definition TensorEvaluator.h:36
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition TensorEvaluator.h:158
internal::remove_const< Scalar >::type ScalarNoConst
Definition TensorEvaluator.h:55
Definition TensorBlock.h:75
EIGEN_DEVICE_FUNC TensorBlockResourceRequirements & addCostPerCoeff(TensorOpCost cost)
Definition TensorBlock.h:145
const TensorShufflingOp< Shuffle, XprType > & type
Definition TensorShuffling.h:40
Definition XprHelper.h:332
TensorShufflingOp< Shuffle, XprType > type
Definition TensorShuffling.h:46
Definition TensorTraits.h:175
XprTraits::Index Index
Definition TensorShuffling.h:29
XprType::Nested Nested
Definition TensorShuffling.h:30
XprTraits::PointerType PointerType
Definition TensorShuffling.h:34
XprTraits::StorageKind StorageKind
Definition TensorShuffling.h:28
XprType::Scalar Scalar
Definition TensorShuffling.h:26
traits< XprType > XprTraits
Definition TensorShuffling.h:27
remove_reference< Nested >::type _Nested
Definition TensorShuffling.h:31
Definition ForwardDeclarations.h:17