TR-mbed 1.0
Loading...
Searching...
No Matches
TensorConversion.h
Go to the documentation of this file.
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
11#define EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
12
13namespace Eigen {
14
22namespace internal {
23template<typename TargetType, typename XprType>
24struct traits<TensorConversionOp<TargetType, XprType> >
25{
26 // Type promotion to handle the case where the types of the lhs and the rhs are different.
27 typedef TargetType Scalar;
29 typedef typename traits<XprType>::Index Index;
30 typedef typename XprType::Nested Nested;
32 static const int NumDimensions = traits<XprType>::NumDimensions;
33 static const int Layout = traits<XprType>::Layout;
34 enum { Flags = 0 };
36};
37
38template<typename TargetType, typename XprType>
43
44template<typename TargetType, typename XprType>
49
50} // end namespace internal
51
52
53template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
55
56template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
57struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, 1> {
60 : m_impl(impl) {}
61
62 template<int LoadMode, typename Index>
64 return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<LoadMode>(index));
65 }
66
67 private:
68 const TensorEvaluator& m_impl;
69};
70
71
72template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
73struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 2, 1> {
76 : m_impl(impl) {}
77
78 template<int LoadMode, typename Index>
80 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
81
82 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
83 SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
84 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2);
85 return result;
86 }
87
88 private:
89 const TensorEvaluator& m_impl;
90};
91
92template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
93struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 4, 1> {
96 : m_impl(impl) {}
97
98 template<int LoadMode, typename Index>
100 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
101
102 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
103 SrcPacket src2 = m_impl.template packet<LoadMode>(index + SrcPacketSize);
104 SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
105 SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
106 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4);
107 return result;
108 }
109
110 private:
111 const TensorEvaluator& m_impl;
112};
113
114template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket>
115struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 8, 1> {
118 : m_impl(impl) {}
119
120 template<int LoadMode, typename Index>
122 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
123
124 SrcPacket src1 = m_impl.template packet<LoadMode>(index);
125 SrcPacket src2 = m_impl.template packet<LoadMode>(index + 1 * SrcPacketSize);
126 SrcPacket src3 = m_impl.template packet<LoadMode>(index + 2 * SrcPacketSize);
127 SrcPacket src4 = m_impl.template packet<LoadMode>(index + 3 * SrcPacketSize);
128 SrcPacket src5 = m_impl.template packet<LoadMode>(index + 4 * SrcPacketSize);
129 SrcPacket src6 = m_impl.template packet<LoadMode>(index + 5 * SrcPacketSize);
130 SrcPacket src7 = m_impl.template packet<LoadMode>(index + 6 * SrcPacketSize);
131 SrcPacket src8 = m_impl.template packet<LoadMode>(index + 7 * SrcPacketSize);
132 TgtPacket result = internal::pcast<SrcPacket, TgtPacket>(src1, src2, src3, src4, src5, src6, src7, src8);
133 return result;
134 }
135
136 private:
137 const TensorEvaluator& m_impl;
138};
139
140template <typename TensorEvaluator, typename SrcPacket, typename TgtPacket, int TgtCoeffRatio>
141struct PacketConverter<TensorEvaluator, SrcPacket, TgtPacket, 1, TgtCoeffRatio> {
144 : m_impl(impl), m_maxIndex(impl.dimensions().TotalSize()) {}
145
146 template<int LoadMode, typename Index>
148 const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
149 // Only call m_impl.packet() when we have direct access to the underlying data. This
150 // ensures that we don't compute the subexpression twice. We may however load some
151 // coefficients twice, but in practice this doesn't negatively impact performance.
152 if (m_impl.data() && (index + SrcPacketSize < m_maxIndex)) {
153 // Force unaligned memory loads since we can't ensure alignment anymore
154 return internal::pcast<SrcPacket, TgtPacket>(m_impl.template packet<Unaligned>(index));
155 } else {
156 const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
157 typedef typename internal::unpacket_traits<SrcPacket>::type SrcType;
158 typedef typename internal::unpacket_traits<TgtPacket>::type TgtType;
160 EIGEN_ALIGN_MAX typename internal::unpacket_traits<TgtPacket>::type values[TgtPacketSize];
162 for (int i = 0; i < TgtPacketSize; ++i) {
163 values[i] = converter(m_impl.coeff(index+i));
164 }
165 TgtPacket rslt = internal::pload<TgtPacket>(values);
166 return rslt;
167 }
168 }
169
170 private:
171 const TensorEvaluator& m_impl;
172 const typename TensorEvaluator::Index m_maxIndex;
173};
174
175template<typename TargetType, typename XprType>
176class TensorConversionOp : public TensorBase<TensorConversionOp<TargetType, XprType>, ReadOnlyAccessors>
177{
178 public:
185
188
191 expression() const { return m_xpr; }
192
193 protected:
194 typename XprType::Nested m_xpr;
195};
196
197template <bool SameType, typename Eval, typename EvalPointerType> struct ConversionSubExprEval {
198 static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType) {
199 impl.evalSubExprsIfNeeded(NULL);
200 return true;
201 }
202};
203
204template <typename Eval, typename EvalPointerType> struct ConversionSubExprEval<true, Eval, EvalPointerType> {
205 static EIGEN_STRONG_INLINE bool run(Eval& impl, EvalPointerType data) {
206 return impl.evalSubExprsIfNeeded(data);
207 }
208};
209
210#ifdef EIGEN_USE_THREADS
211template <bool SameType, typename Eval, typename EvalPointerType,
212 typename EvalSubExprsCallback>
213struct ConversionSubExprEvalAsync {
214 static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType, EvalSubExprsCallback done) {
215 impl.evalSubExprsIfNeededAsync(nullptr, std::move(done));
216 }
217};
218
219template <typename Eval, typename EvalPointerType,
220 typename EvalSubExprsCallback>
221struct ConversionSubExprEvalAsync<true, Eval, EvalPointerType,
222 EvalSubExprsCallback> {
223 static EIGEN_STRONG_INLINE void run(Eval& impl, EvalPointerType data, EvalSubExprsCallback done) {
224 impl.evalSubExprsIfNeededAsync(data, std::move(done));
225 }
226};
227#endif
228
229namespace internal {
230
231template <typename SrcType, typename TargetType, bool IsSameT>
232struct CoeffConv {
233 template <typename ArgType, typename Device>
238};
239
240template <typename SrcType, typename TargetType>
241struct CoeffConv<SrcType, TargetType, true> {
242 template <typename ArgType, typename Device>
244 return impl.coeff(index);
245 }
246};
247
248template <typename SrcPacket, typename TargetPacket, int LoadMode, bool ActuallyVectorize, bool IsSameT>
267
268template <typename SrcPacket, typename TargetPacket, int LoadMode, bool IsSameT>
282
283template <typename SrcPacket, typename TargetPacket, int LoadMode>
284struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/false, /*IsSameT=*/true> {
287
288 template <typename ArgType, typename Device>
291 for (int i = 0; i < PacketSize; ++i) values[i] = impl.coeff(index+i);
292 return internal::pload<TargetPacket>(values);
293 }
294};
295
296template <typename SrcPacket, typename TargetPacket, int LoadMode>
297struct PacketConv<SrcPacket, TargetPacket, LoadMode, /*ActuallyVectorize=*/true, /*IsSameT=*/true> {
298 template <typename ArgType, typename Device>
300 return impl.template packet<LoadMode>(index);
301 }
302};
303
304} // namespace internal
305
306// Eval as rvalue
307template<typename TargetType, typename ArgType, typename Device>
308struct TensorEvaluator<const TensorConversionOp<TargetType, ArgType>, Device>
309{
311 typedef typename XprType::Index Index;
313 typedef TargetType Scalar;
314 typedef TargetType CoeffReturnType;
319 static const bool IsSameType = internal::is_same<TargetType, SrcType>::value;
322
323 enum {
324 IsAligned = false,
326 #ifndef EIGEN_USE_SYCL
327 true,
328 #else
331 #endif
335 RawAccess = false
336 };
337
338 static const int NumDims = internal::array_size<Dimensions>::value;
339
340 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
343
346
347 struct TensorConversionOpBlockFactory {
348 template <typename ArgXprType>
352
353 template <typename ArgXprType>
354 typename XprType<ArgXprType>::type expr(const ArgXprType& expr) const {
355 return typename XprType<ArgXprType>::type(expr);
356 }
357 };
358
359 typedef internal::TensorUnaryExprBlock<TensorConversionOpBlockFactory,
360 ArgTensorBlock>
362 //===--------------------------------------------------------------------===//
363
364 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
365 : m_impl(op.expression(), device)
366 {
367 }
368
369 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_impl.dimensions(); }
370
375
376#ifdef EIGEN_USE_THREADS
377 template <typename EvalSubExprsCallback>
378 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
379 EvaluatorPointerType data, EvalSubExprsCallback done) {
380 ConversionSubExprEvalAsync<IsSameType, TensorEvaluator<ArgType, Device>,
382 EvalSubExprsCallback>::run(m_impl, data, std::move(done));
383 }
384#endif
385
387 {
388 m_impl.cleanup();
389 }
390
395
396 template<int LoadMode>
398 packet(Index index) const {
399 // If we are not going to do the cast, we just need to check that base
400 // TensorEvaluator has packet access. Otherwise we also need to make sure,
401 // that we have an implementation of vectorized cast.
402 const bool Vectorizable =
403 IsSameType
407
409 Vectorizable, IsSameType>::run(m_impl, index);
410 }
411
413 costPerCoeff(bool vectorized) const {
414 const double cast_cost = TensorOpCost::CastCost<SrcType, TargetType>();
415 if (vectorized) {
416 const double SrcCoeffRatio =
418 const double TgtCoeffRatio =
420 return m_impl.costPerCoeff(vectorized) * (SrcCoeffRatio / PacketSize) +
421 TensorOpCost(0, 0, TgtCoeffRatio * (cast_cost / PacketSize));
422 } else {
423 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, cast_cost);
424 }
425 }
426
429 return m_impl.getResourceRequirements();
430 }
431
434 bool /*root_of_expr_ast*/ = false) const {
435 return TensorBlock(m_impl.block(desc, scratch),
436 TensorConversionOpBlockFactory());
437 }
438
440
442 const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
443#ifdef EIGEN_USE_SYCL
444 // binding placeholder accessors to a command group handler for SYCL
445 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
446 m_impl.bind(cgh);
447 }
448#endif
449
450 protected:
452};
453
454} // end namespace Eigen
455
456#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVERSION_H
int i
Definition BiCGSTAB_step_by_step.cpp:9
#define EIGEN_ALIGN_MAX
Definition ConfigureVectorization.h:157
#define EIGEN_UNROLL_LOOP
Definition Macros.h:1461
#define EIGEN_DEVICE_FUNC
Definition Macros.h:976
#define EIGEN_STRONG_INLINE
Definition Macros.h:917
int data[]
Definition Map_placement_new.cpp:1
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition CwiseBinaryOp.h:84
The tensor base class.
Definition TensorBase.h:973
Tensor conversion class. This class makes it possible to vectorize type casting operations when the n...
Definition TensorConversion.h:177
internal::traits< TensorConversionOp >::StorageKind StorageKind
Definition TensorConversion.h:180
NumTraits< Scalar >::Real RealScalar
Definition TensorConversion.h:184
Scalar CoeffReturnType
Definition TensorConversion.h:183
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConversionOp(const XprType &xpr)
Definition TensorConversion.h:186
EIGEN_DEVICE_FUNC const internal::remove_all< typenameXprType::Nested >::type & expression() const
Definition TensorConversion.h:191
XprType::Nested m_xpr
Definition TensorConversion.h:194
internal::traits< TensorConversionOp >::Index Index
Definition TensorConversion.h:181
internal::traits< TensorConversionOp >::Scalar Scalar
Definition TensorConversion.h:179
internal::nested< TensorConversionOp >::type Nested
Definition TensorConversion.h:182
Definition TensorCostModel.h:25
Definition TensorBlock.h:912
return int(ret)+1
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
Definition BandTriangularSolver.h:13
static EIGEN_STRONG_INLINE bool run(Eval &impl, EvalPointerType data)
Definition TensorConversion.h:205
Definition TensorConversion.h:197
static EIGEN_STRONG_INLINE bool run(Eval &impl, EvalPointerType)
Definition TensorConversion.h:198
Definition Constants.h:507
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator &impl)
Definition TensorConversion.h:59
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const
Definition TensorConversion.h:63
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator &impl)
Definition TensorConversion.h:143
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const
Definition TensorConversion.h:147
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator &impl)
Definition TensorConversion.h:75
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const
Definition TensorConversion.h:79
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const
Definition TensorConversion.h:99
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator &impl)
Definition TensorConversion.h:95
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketConverter(const TensorEvaluator &impl)
Definition TensorConversion.h:117
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket packet(Index index) const
Definition TensorConversion.h:121
Definition TensorConversion.h:54
Definition TensorMeta.h:50
Definition TensorForwardDeclarations.h:37
TensorEvaluator< ArgType, Device >::Dimensions Dimensions
Definition TensorConversion.h:312
TensorEvaluator< constArgType, Device >::TensorBlock ArgTensorBlock
Definition TensorConversion.h:345
StorageMemory< CoeffReturnType, Device > Storage
Definition TensorConversion.h:320
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition TensorConversion.h:413
PacketType< SrcType, Device >::type PacketSourceType
Definition TensorConversion.h:317
internal::remove_all< typenameinternal::traits< ArgType >::Scalar >::type SrcType
Definition TensorConversion.h:315
EIGEN_STRONG_INLINE void cleanup()
Definition TensorConversion.h:386
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition TensorConversion.h:341
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition TensorConversion.h:391
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Definition TensorConversion.h:342
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
Definition TensorConversion.h:371
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
Definition TensorConversion.h:439
TensorEvaluator< ArgType, Device > m_impl
Definition TensorConversion.h:451
TensorConversionOp< TargetType, ArgType > XprType
Definition TensorConversion.h:310
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition TensorConversion.h:369
internal::TensorUnaryExprBlock< TensorConversionOpBlockFactory, ArgTensorBlock > TensorBlock
Definition TensorConversion.h:361
const TensorEvaluator< ArgType, Device > & impl() const
required by sycl in order to extract the sycl accessor
Definition TensorConversion.h:442
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition TensorConversion.h:364
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Definition TensorConversion.h:428
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
Definition TensorConversion.h:398
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition TensorConversion.h:316
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition TensorConversion.h:433
TensorConversionOp< TargetType, const ArgXprType > type
Definition TensorConversion.h:350
XprType< ArgXprType >::type expr(const ArgXprType &expr) const
Definition TensorConversion.h:354
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition TensorEvaluator.h:29
Storage::Type EvaluatorPointerType
Definition TensorEvaluator.h:39
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition TensorEvaluator.h:33
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
Definition TensorEvaluator.h:181
@ BlockAccess
Definition TensorEvaluator.h:48
@ PreferBlockAccess
Definition TensorEvaluator.h:49
@ PacketAccess
Definition TensorEvaluator.h:47
@ Layout
Definition TensorEvaluator.h:50
@ IsAligned
Definition TensorEvaluator.h:46
Derived::Index Index
Definition TensorEvaluator.h:30
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
Definition TensorEvaluator.h:63
Derived::Dimensions Dimensions
Definition TensorEvaluator.h:34
static const int PacketSize
Definition TensorEvaluator.h:36
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator< ArgType, Device > &impl, Index index)
Definition TensorConversion.h:243
Definition TensorConversion.h:232
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetType run(const TensorEvaluator< ArgType, Device > &impl, Index index)
Definition TensorConversion.h:234
internal::unpacket_traits< TargetPacket >::type TargetType
Definition TensorConversion.h:285
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator< ArgType, Device > &impl, Index index)
Definition TensorConversion.h:289
internal::unpacket_traits< TargetPacket >::type TargetType
Definition TensorConversion.h:271
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator< ArgType, Device > &impl, Index index)
Definition TensorConversion.h:274
internal::unpacket_traits< SrcPacket >::type SrcType
Definition TensorConversion.h:270
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator< ArgType, Device > &impl, Index index)
Definition TensorConversion.h:299
Definition TensorConversion.h:249
static const int PacketSize
Definition TensorConversion.h:253
internal::unpacket_traits< TargetPacket >::type TargetType
Definition TensorConversion.h:251
internal::unpacket_traits< SrcPacket >::type SrcType
Definition TensorConversion.h:250
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TargetPacket run(const TensorEvaluator< ArgType, Device > &impl, Index index)
Definition TensorConversion.h:256
Definition Meta.h:445
const TensorConversionOp< TargetType, XprType > & type
Definition TensorConversion.h:41
Definition XprHelper.h:332
Definition Meta.h:148
Definition TensorTraits.h:175
traits< XprType >::Index Index
Definition TensorConversion.h:29
XprType::Nested Nested
Definition TensorConversion.h:30
remove_reference< Nested >::type _Nested
Definition TensorConversion.h:31
TypeConversion< Scalar, typenametraits< XprType >::PointerType >::type PointerType
Definition TensorConversion.h:35
traits< XprType >::StorageKind StorageKind
Definition TensorConversion.h:28
Definition ForwardDeclarations.h:17
Definition GenericPacketMath.h:148
Definition GenericPacketMath.h:133