TR-mbed 1.0
Loading...
Searching...
No Matches
TensorScan.h
Go to the documentation of this file.
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2016 Igor Babuschkin <igor@babuschk.in>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
11#define EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
12
13namespace Eigen {
14
15namespace internal {
16
17template <typename Op, typename XprType>
19 : public traits<XprType> {
20 typedef typename XprType::Scalar Scalar;
22 typedef typename XprTraits::StorageKind StorageKind;
23 typedef typename XprType::Nested Nested;
25 static const int NumDimensions = XprTraits::NumDimensions;
26 static const int Layout = XprTraits::Layout;
27 typedef typename XprTraits::PointerType PointerType;
28};
29
30template<typename Op, typename XprType>
32{
34};
35
36template<typename Op, typename XprType>
42} // end namespace internal
43
49template <typename Op, typename XprType>
51 : public TensorBase<TensorScanOp<Op, XprType>, ReadOnlyAccessors> {
52public:
55 typedef typename XprType::CoeffReturnType CoeffReturnType;
59
61 const XprType& expr, const Index& axis, bool exclusive = false, const Op& op = Op())
63
65 const Index axis() const { return m_axis; }
67 const XprType& expression() const { return m_expr; }
69 const Op accumulator() const { return m_accumulator; }
71 bool exclusive() const { return m_exclusive; }
72
73protected:
74 typename XprType::Nested m_expr;
76 const Op m_accumulator;
77 const bool m_exclusive;
78};
79
80
81namespace internal {
82
83template <typename Self>
85 typename Self::CoeffReturnType* data) {
86 // Compute the scan along the axis, starting at the given offset
87 typename Self::CoeffReturnType accum = self.accumulator().initialize();
88 if (self.stride() == 1) {
89 if (self.exclusive()) {
90 for (Index curr = offset; curr < offset + self.size(); ++curr) {
91 data[curr] = self.accumulator().finalize(accum);
92 self.accumulator().reduce(self.inner().coeff(curr), &accum);
93 }
94 } else {
95 for (Index curr = offset; curr < offset + self.size(); ++curr) {
96 self.accumulator().reduce(self.inner().coeff(curr), &accum);
97 data[curr] = self.accumulator().finalize(accum);
98 }
99 }
100 } else {
101 if (self.exclusive()) {
102 for (Index idx3 = 0; idx3 < self.size(); idx3++) {
103 Index curr = offset + idx3 * self.stride();
104 data[curr] = self.accumulator().finalize(accum);
105 self.accumulator().reduce(self.inner().coeff(curr), &accum);
106 }
107 } else {
108 for (Index idx3 = 0; idx3 < self.size(); idx3++) {
109 Index curr = offset + idx3 * self.stride();
110 self.accumulator().reduce(self.inner().coeff(curr), &accum);
111 data[curr] = self.accumulator().finalize(accum);
112 }
113 }
114 }
115}
116
117template <typename Self>
119 typename Self::CoeffReturnType* data) {
120 using Scalar = typename Self::CoeffReturnType;
121 using Packet = typename Self::PacketReturnType;
122 // Compute the scan along the axis, starting at the calculated offset
123 Packet accum = self.accumulator().template initializePacket<Packet>();
124 if (self.stride() == 1) {
125 if (self.exclusive()) {
126 for (Index curr = offset; curr < offset + self.size(); ++curr) {
127 internal::pstoreu<Scalar, Packet>(data + curr, self.accumulator().finalizePacket(accum));
128 self.accumulator().reducePacket(self.inner().template packet<Unaligned>(curr), &accum);
129 }
130 } else {
131 for (Index curr = offset; curr < offset + self.size(); ++curr) {
132 self.accumulator().reducePacket(self.inner().template packet<Unaligned>(curr), &accum);
133 internal::pstoreu<Scalar, Packet>(data + curr, self.accumulator().finalizePacket(accum));
134 }
135 }
136 } else {
137 if (self.exclusive()) {
138 for (Index idx3 = 0; idx3 < self.size(); idx3++) {
139 const Index curr = offset + idx3 * self.stride();
140 internal::pstoreu<Scalar, Packet>(data + curr, self.accumulator().finalizePacket(accum));
141 self.accumulator().reducePacket(self.inner().template packet<Unaligned>(curr), &accum);
142 }
143 } else {
144 for (Index idx3 = 0; idx3 < self.size(); idx3++) {
145 const Index curr = offset + idx3 * self.stride();
146 self.accumulator().reducePacket(self.inner().template packet<Unaligned>(curr), &accum);
147 internal::pstoreu<Scalar, Packet>(data + curr, self.accumulator().finalizePacket(accum));
148 }
149 }
150 }
151}
152
153template <typename Self, bool Vectorize, bool Parallel>
156 typename Self::CoeffReturnType* data) {
157 for (Index idx2 = 0; idx2 < self.stride(); idx2++) {
158 // Calculate the starting offset for the scan
159 Index offset = idx1 + idx2;
161 }
162 }
163};
164
165// Specialization for vectorized reduction.
166template <typename Self>
167struct ReduceBlock<Self, /*Vectorize=*/true, /*Parallel=*/false> {
169 typename Self::CoeffReturnType* data) {
170 using Packet = typename Self::PacketReturnType;
171 const int PacketSize = internal::unpacket_traits<Packet>::size;
172 Index idx2 = 0;
173 for (; idx2 + PacketSize <= self.stride(); idx2 += PacketSize) {
174 // Calculate the starting offset for the packet scan
175 Index offset = idx1 + idx2;
177 }
178 for (; idx2 < self.stride(); idx2++) {
179 // Calculate the starting offset for the scan
180 Index offset = idx1 + idx2;
182 }
183 }
184};
185
186// Single-threaded CPU implementation of scan
187template <typename Self, typename Reducer, typename Device,
188 bool Vectorize =
192 void operator()(Self& self, typename Self::CoeffReturnType* data) {
193 Index total_size = internal::array_prod(self.dimensions());
194
195 // We fix the index along the scan axis to 0 and perform a
196 // scan per remaining entry. The iteration is split into two nested
197 // loops to avoid an integer division by keeping track of each idx1 and
198 // idx2.
199 for (Index idx1 = 0; idx1 < total_size; idx1 += self.stride() * self.size()) {
200 ReduceBlock<Self, Vectorize, /*Parallel=*/false> block_reducer;
202 }
203 }
204};
205
206#ifdef EIGEN_USE_THREADS
207
208// Adjust block_size to avoid false sharing of cachelines among
209// threads. Currently set to twice the cache line size on Intel and ARM
210// processors.
211EIGEN_STRONG_INLINE Index AdjustBlockSize(Index item_size, Index block_size) {
212 EIGEN_CONSTEXPR Index kBlockAlignment = 128;
213 const Index items_per_cacheline =
214 numext::maxi<Index>(1, kBlockAlignment / item_size);
215 return items_per_cacheline * divup(block_size, items_per_cacheline);
216}
217
218template <typename Self>
219struct ReduceBlock<Self, /*Vectorize=*/true, /*Parallel=*/true> {
220 EIGEN_STRONG_INLINE void operator()(Self& self, Index idx1,
221 typename Self::CoeffReturnType* data) {
222 using Scalar = typename Self::CoeffReturnType;
223 using Packet = typename Self::PacketReturnType;
224 const int PacketSize = internal::unpacket_traits<Packet>::size;
225 Index num_scalars = self.stride();
226 Index num_packets = 0;
227 if (self.stride() >= PacketSize) {
228 num_packets = self.stride() / PacketSize;
229 self.device().parallelFor(
230 num_packets,
231 TensorOpCost(PacketSize * self.size(), PacketSize * self.size(),
232 16 * PacketSize * self.size(), true, PacketSize),
233 // Make the shard size large enough that two neighboring threads
234 // won't write to the same cacheline of `data`.
235 [=](Index blk_size) {
236 return AdjustBlockSize(PacketSize * sizeof(Scalar), blk_size);
237 },
238 [&](Index first, Index last) {
239 for (Index packet = first; packet < last; ++packet) {
240 const Index idx2 = packet * PacketSize;
241 ReducePacket(self, idx1 + idx2, data);
242 }
243 });
244 num_scalars -= num_packets * PacketSize;
245 }
246 self.device().parallelFor(
247 num_scalars, TensorOpCost(self.size(), self.size(), 16 * self.size()),
248 // Make the shard size large enough that two neighboring threads
249 // won't write to the same cacheline of `data`.
250 [=](Index blk_size) {
251 return AdjustBlockSize(sizeof(Scalar), blk_size);
252 },
253 [&](Index first, Index last) {
254 for (Index scalar = first; scalar < last; ++scalar) {
255 const Index idx2 = num_packets * PacketSize + scalar;
256 ReduceScalar(self, idx1 + idx2, data);
257 }
258 });
259 }
260};
261
262template <typename Self>
263struct ReduceBlock<Self, /*Vectorize=*/false, /*Parallel=*/true> {
264 EIGEN_STRONG_INLINE void operator()(Self& self, Index idx1,
265 typename Self::CoeffReturnType* data) {
266 using Scalar = typename Self::CoeffReturnType;
267 self.device().parallelFor(
268 self.stride(), TensorOpCost(self.size(), self.size(), 16 * self.size()),
269 // Make the shard size large enough that two neighboring threads
270 // won't write to the same cacheline of `data`.
271 [=](Index blk_size) {
272 return AdjustBlockSize(sizeof(Scalar), blk_size);
273 },
274 [&](Index first, Index last) {
275 for (Index idx2 = first; idx2 < last; ++idx2) {
276 ReduceScalar(self, idx1 + idx2, data);
277 }
278 });
279 }
280};
281
282// Specialization for multi-threaded execution.
283template <typename Self, typename Reducer, bool Vectorize>
284struct ScanLauncher<Self, Reducer, ThreadPoolDevice, Vectorize> {
285 void operator()(Self& self, typename Self::CoeffReturnType* data) {
286 using Scalar = typename Self::CoeffReturnType;
287 using Packet = typename Self::PacketReturnType;
288 const int PacketSize = internal::unpacket_traits<Packet>::size;
289 const Index total_size = internal::array_prod(self.dimensions());
290 const Index inner_block_size = self.stride() * self.size();
291 bool parallelize_by_outer_blocks = (total_size >= (self.stride() * inner_block_size));
292
293 if ((parallelize_by_outer_blocks && total_size <= 4096) ||
294 (!parallelize_by_outer_blocks && self.stride() < PacketSize)) {
295 ScanLauncher<Self, Reducer, DefaultDevice, Vectorize> launcher;
296 launcher(self, data);
297 return;
298 }
299
300 if (parallelize_by_outer_blocks) {
301 // Parallelize over outer blocks.
302 const Index num_outer_blocks = total_size / inner_block_size;
303 self.device().parallelFor(
304 num_outer_blocks,
305 TensorOpCost(inner_block_size, inner_block_size,
306 16 * PacketSize * inner_block_size, Vectorize,
307 PacketSize),
308 [=](Index blk_size) {
309 return AdjustBlockSize(inner_block_size * sizeof(Scalar), blk_size);
310 },
311 [&](Index first, Index last) {
312 for (Index idx1 = first; idx1 < last; ++idx1) {
313 ReduceBlock<Self, Vectorize, /*Parallelize=*/false> block_reducer;
314 block_reducer(self, idx1 * inner_block_size, data);
315 }
316 });
317 } else {
318 // Parallelize over inner packets/scalars dimensions when the reduction
319 // axis is not an inner dimension.
320 ReduceBlock<Self, Vectorize, /*Parallelize=*/true> block_reducer;
321 for (Index idx1 = 0; idx1 < total_size;
322 idx1 += self.stride() * self.size()) {
323 block_reducer(self, idx1, data);
324 }
325 }
326 }
327};
328#endif // EIGEN_USE_THREADS
329
330#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC))
331
332// GPU implementation of scan
333// TODO(ibab) This placeholder implementation performs multiple scans in
334// parallel, but it would be better to use a parallel scan algorithm and
335// optimize memory access.
336template <typename Self, typename Reducer>
337__global__ EIGEN_HIP_LAUNCH_BOUNDS_1024 void ScanKernel(Self self, Index total_size, typename Self::CoeffReturnType* data) {
338 // Compute offset as in the CPU version
339 Index val = threadIdx.x + blockIdx.x * blockDim.x;
340 Index offset = (val / self.stride()) * self.stride() * self.size() + val % self.stride();
341
342 if (offset + (self.size() - 1) * self.stride() < total_size) {
343 // Compute the scan along the axis, starting at the calculated offset
344 typename Self::CoeffReturnType accum = self.accumulator().initialize();
345 for (Index idx = 0; idx < self.size(); idx++) {
346 Index curr = offset + idx * self.stride();
347 if (self.exclusive()) {
348 data[curr] = self.accumulator().finalize(accum);
349 self.accumulator().reduce(self.inner().coeff(curr), &accum);
350 } else {
351 self.accumulator().reduce(self.inner().coeff(curr), &accum);
352 data[curr] = self.accumulator().finalize(accum);
353 }
354 }
355 }
356 __syncthreads();
357
358}
359
360template <typename Self, typename Reducer, bool Vectorize>
361struct ScanLauncher<Self, Reducer, GpuDevice, Vectorize> {
362 void operator()(const Self& self, typename Self::CoeffReturnType* data) {
363 Index total_size = internal::array_prod(self.dimensions());
364 Index num_blocks = (total_size / self.size() + 63) / 64;
365 Index block_size = 64;
366
367 LAUNCH_GPU_KERNEL((ScanKernel<Self, Reducer>), num_blocks, block_size, 0, self.device(), self, total_size, data);
368 }
369};
370#endif // EIGEN_USE_GPU && (EIGEN_GPUCC)
371
372} // namespace internal
373
374// Eval as rvalue
375template <typename Op, typename ArgType, typename Device>
376struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
377
379 typedef typename XprType::Index Index;
380 typedef const ArgType ChildTypeNoConst;
381 typedef const ArgType ChildType;
390
391 enum {
392 IsAligned = false,
394 BlockAccess = false,
395 PreferBlockAccess = false,
397 CoordAccess = false,
398 RawAccess = true
399 };
400
401 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
403 //===--------------------------------------------------------------------===//
404
405 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
406 : m_impl(op.expression(), device),
407 m_device(device),
408 m_exclusive(op.exclusive()),
409 m_accumulator(op.accumulator()),
410 m_size(m_impl.dimensions()[op.axis()]),
411 m_stride(1), m_consume_dim(op.axis()),
412 m_output(NULL) {
413
414 // Accumulating a scalar isn't supported.
415 EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
416 eigen_assert(op.axis() >= 0 && op.axis() < NumDims);
417
418 // Compute stride of scan axis
419 const Dimensions& dims = m_impl.dimensions();
420 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
421 for (int i = 0; i < op.axis(); ++i) {
422 m_stride = m_stride * dims[i];
423 }
424 } else {
425 // dims can only be indexed through unsigned integers,
426 // so let's use an unsigned type to let the compiler knows.
427 // This prevents stupid warnings: ""'*((void*)(& evaluator)+64)[18446744073709551615]' may be used uninitialized in this function"
428 unsigned int axis = internal::convert_index<unsigned int>(op.axis());
429 for (unsigned int i = NumDims - 1; i > axis; --i) {
430 m_stride = m_stride * dims[i];
431 }
432 }
433 }
434
436 return m_impl.dimensions();
437 }
438
440 return m_stride;
441 }
442
444 return m_consume_dim;
445 }
446
448 return m_size;
449 }
450
452 return m_accumulator;
453 }
454
456 return m_exclusive;
457 }
458
462
464 return m_device;
465 }
466
468 m_impl.evalSubExprsIfNeeded(NULL);
470 if (data) {
471 launcher(*this, data);
472 return false;
473 }
474
475 const Index total_size = internal::array_prod(dimensions());
476 m_output = static_cast<EvaluatorPointerType>(m_device.get((Scalar*) m_device.allocate_temp(total_size * sizeof(Scalar))));
477 launcher(*this, m_output);
478 return true;
479 }
480
481 template<int LoadMode>
485
487 {
488 return m_output;
489 }
490
492 {
493 return m_output[index];
494 }
495
499
501 if (m_output) {
502 m_device.deallocate_temp(m_output);
503 m_output = NULL;
504 }
505 m_impl.cleanup();
506 }
507
508#ifdef EIGEN_USE_SYCL
509 // binding placeholder accessors to a command group handler for SYCL
510 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
511 m_impl.bind(cgh);
512 m_output.bind(cgh);
513 }
514#endif
515protected:
518 const bool m_exclusive;
524};
525
526} // end namespace Eigen
527
528#endif // EIGEN_CXX11_TENSOR_TENSOR_SCAN_H
int i
Definition BiCGSTAB_step_by_step.cpp:9
internal::enable_if< internal::valid_indexed_view_overload< RowIndices, ColIndices >::value &&internal::traits< typenameEIGEN_INDEXED_VIEW_METHOD_TYPE< RowIndices, ColIndices >::type >::ReturnAsIndexedView, typenameEIGEN_INDEXED_VIEW_METHOD_TYPE< RowIndices, ColIndices >::type >::type operator()(const RowIndices &rowIndices, const ColIndices &colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST
Definition IndexedViewMethods.h:73
#define EIGEN_CONSTEXPR
Definition Macros.h:787
#define EIGEN_DEVICE_FUNC
Definition Macros.h:976
#define EIGEN_HIP_LAUNCH_BOUNDS_1024
Definition Macros.h:510
#define eigen_assert(x)
Definition Macros.h:1037
#define EIGEN_STRONG_INLINE
Definition Macros.h:917
int data[]
Definition Map_placement_new.cpp:1
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
Definition StaticAssert.h:127
#define EIGEN_DEVICE_REF
Definition TensorMacros.h:50
SCALAR Scalar
Definition bench_gemm.cpp:46
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition CwiseBinaryOp.h:84
The tensor base class.
Definition TensorBase.h:973
Definition TensorCostModel.h:25
Definition TensorScan.h:51
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType & expression() const
Definition TensorScan.h:67
XprType::CoeffReturnType CoeffReturnType
Definition TensorScan.h:55
Eigen::internal::nested< TensorScanOp >::type Nested
Definition TensorScan.h:56
const bool m_exclusive
Definition TensorScan.h:77
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index axis() const
Definition TensorScan.h:65
Eigen::internal::traits< TensorScanOp >::StorageKind StorageKind
Definition TensorScan.h:57
Eigen::internal::traits< TensorScanOp >::Scalar Scalar
Definition TensorScan.h:53
XprType::Nested m_expr
Definition TensorScan.h:74
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Op accumulator() const
Definition TensorScan.h:69
Eigen::NumTraits< Scalar >::Real RealScalar
Definition TensorScan.h:54
const Op m_accumulator
Definition TensorScan.h:76
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool exclusive() const
Definition TensorScan.h:71
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorScanOp(const XprType &expr, const Index &axis, bool exclusive=false, const Op &op=Op())
Definition TensorScan.h:60
Eigen::internal::traits< TensorScanOp >::Index Index
Definition TensorScan.h:58
const Index m_axis
Definition TensorScan.h:75
Definition TensorBlock.h:617
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
Definition gnuplot_common_settings.hh:64
dim3 threadIdx
Definition gpu_common.h:19
dim3 blockDim
Definition gpu_common.h:19
dim3 blockIdx
Definition gpu_common.h:19
@ ColMajor
Definition Constants.h:319
EIGEN_STRONG_INLINE void ReduceScalar(Self &self, Index offset, typename Self::CoeffReturnType *data)
Definition TensorScan.h:84
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::ptrdiff_t array_prod(const Sizes< Indices... > &)
Definition TensorDimensions.h:140
EIGEN_CONSTEXPR Index first(const T &x) EIGEN_NOEXCEPT
Definition IndexedViewHelper.h:81
EIGEN_STRONG_INLINE void ReducePacket(Self &self, Index offset, typename Self::CoeffReturnType *data)
Definition TensorScan.h:118
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T divup(const X x, const Y y)
Definition TensorMeta.h:30
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
Definition BandTriangularSolver.h:13
Definition TensorDimensions.h:263
Definition Constants.h:507
Definition TensorMeta.h:50
Definition TensorForwardDeclarations.h:37
Storage::Type EvaluatorPointerType
Definition TensorScan.h:389
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition TensorScan.h:491
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const
Definition TensorScan.h:486
XprType::CoeffReturnType CoeffReturnType
Definition TensorScan.h:385
TensorScanOp< Op, ArgType > XprType
Definition TensorScan.h:378
DSizes< Index, NumDims > Dimensions
Definition TensorScan.h:383
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Device & device() const
Definition TensorScan.h:463
internal::TensorBlockNotImplemented TensorBlock
Definition TensorScan.h:402
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index & size() const
Definition TensorScan.h:447
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool exclusive() const
Definition TensorScan.h:455
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorEvaluator< ArgType, Device > & inner() const
Definition TensorScan.h:459
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition TensorScan.h:405
const ArgType ChildTypeNoConst
Definition TensorScan.h:380
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
Definition TensorScan.h:467
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition TensorScan.h:386
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Op & accumulator() const
Definition TensorScan.h:451
const Device EIGEN_DEVICE_REF m_device
Definition TensorScan.h:517
EIGEN_STRONG_INLINE void cleanup()
Definition TensorScan.h:500
EvaluatorPointerType m_output
Definition TensorScan.h:523
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition TensorScan.h:435
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index & consume_dim() const
Definition TensorScan.h:443
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const
Definition TensorScan.h:482
TensorEvaluator< ArgType, Device > m_impl
Definition TensorScan.h:516
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Index & stride() const
Definition TensorScan.h:439
StorageMemory< Scalar, Device > Storage
Definition TensorScan.h:388
TensorEvaluator< const TensorScanOp< Op, ArgType >, Device > Self
Definition TensorScan.h:387
internal::remove_const< typenameXprType::Scalar >::type Scalar
Definition TensorScan.h:384
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const
Definition TensorScan.h:496
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition TensorEvaluator.h:29
EIGEN_STRONG_INLINE void operator()(Self &self, Index idx1, typename Self::CoeffReturnType *data)
Definition TensorScan.h:168
Definition TensorScan.h:154
EIGEN_STRONG_INLINE void operator()(Self &self, Index idx1, typename Self::CoeffReturnType *data)
Definition TensorScan.h:155
Definition TensorScan.h:191
void operator()(Self &self, typename Self::CoeffReturnType *data)
Definition TensorScan.h:192
const TensorScanOp< Op, XprType > & type
Definition TensorScan.h:33
Definition XprHelper.h:332
Definition TensorTraits.h:175
Definition TensorFunctors.h:58
traits< XprType > XprTraits
Definition TensorScan.h:21
XprTraits::StorageKind StorageKind
Definition TensorScan.h:22
XprTraits::PointerType PointerType
Definition TensorScan.h:27
remove_reference< Nested >::type _Nested
Definition TensorScan.h:24
XprType::Nested Nested
Definition TensorScan.h:23
XprType::Scalar Scalar
Definition TensorScan.h:20
Definition ForwardDeclarations.h:17
Definition GenericPacketMath.h:133
@ size
Definition GenericPacketMath.h:138
Definition PacketMath.h:47