TR-mbed 1.0
Loading...
Searching...
No Matches
TensorBroadcasting.h
Go to the documentation of this file.
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
12
13namespace Eigen {
14
22namespace internal {
23template<typename Broadcast, typename XprType>
25{
26 typedef typename XprType::Scalar Scalar;
28 typedef typename XprTraits::StorageKind StorageKind;
29 typedef typename XprTraits::Index Index;
30 typedef typename XprType::Nested Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
34 typedef typename XprTraits::PointerType PointerType;
35};
36
37template<typename Broadcast, typename XprType>
42
43template<typename Broadcast, typename XprType>
48
49template <typename Dims>
51 static const bool value = false;
52};
53template <>
55 static const bool value = true;
56};
57#ifndef EIGEN_EMULATE_CXX11_META_H
58template <typename std::ptrdiff_t... Indices>
59struct is_input_scalar<Sizes<Indices...> > {
60 static const bool value = (Sizes<Indices...>::total_size == 1);
61};
62#endif
63
64} // end namespace internal
65
66
67
68template<typename Broadcast, typename XprType>
69class TensorBroadcastingOp : public TensorBase<TensorBroadcastingOp<Broadcast, XprType>, ReadOnlyAccessors>
70{
71 public:
74 typedef typename XprType::CoeffReturnType CoeffReturnType;
78
81
83 const Broadcast& broadcast() const { return m_broadcast; }
84
87 expression() const { return m_xpr; }
88
89 protected:
90 typename XprType::Nested m_xpr;
91 const Broadcast m_broadcast;
92};
93
94
95// Eval as rvalue
96template<typename Broadcast, typename ArgType, typename Device>
97struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
98{
100 typedef typename XprType::Index Index;
103 typedef typename XprType::Scalar Scalar;
108 protected: // all the non-static fields must have the same access control, otherwise the TensorEvaluator wont be standard layout;
109 bool isCopy, nByOne, oneByN;
110 public:
113
114 enum {
120 RawAccess = false
121 };
122
124
125 // We do block based broadcasting using a trick with 2x tensor rank and 0
126 // strides. See block method implementation for details.
128
129 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
132
135
136 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
137 Layout, Index>
139 //===--------------------------------------------------------------------===//
140
141 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
142 : isCopy(false), nByOne(false), oneByN(false),
143 m_device(device), m_broadcast(op.broadcast()), m_impl(op.expression(), device)
144 {
145
146 // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar
147 // and store the result in a scalar. Instead one should reshape the scalar into a a N-D
148 // tensor with N >= 1 of 1 element first and then broadcast.
149 EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
150 const InputDimensions& input_dims = m_impl.dimensions();
151 isCopy = true;
152 for (int i = 0; i < NumDims; ++i) {
153 eigen_assert(input_dims[i] > 0);
154 m_dimensions[i] = input_dims[i] * m_broadcast[i];
155 if (m_broadcast[i] != 1) {
156 isCopy = false;
157 }
158 }
159
160 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
161 m_inputStrides[0] = 1;
162 m_outputStrides[0] = 1;
163 for (int i = 1; i < NumDims; ++i) {
164 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
165 m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
166 }
167 } else {
168 m_inputStrides[NumDims-1] = 1;
169 m_outputStrides[NumDims-1] = 1;
170 for (int i = NumDims-2; i >= 0; --i) {
171 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
172 m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
173 }
174 }
175
176 if (input_dims[0] == 1) {
177 oneByN = true;
178 for (int i = 1; i < NumDims; ++i) {
179 if (m_broadcast[i] != 1) {
180 oneByN = false;
181 break;
182 }
183 }
184 } else if (input_dims[NumDims-1] == 1) {
185 nByOne = true;
186 for (int i = 0; i < NumDims-1; ++i) {
187 if (m_broadcast[i] != 1) {
188 nByOne = false;
189 break;
190 }
191 }
192 }
193
194 // Handle special format like NCHW, its input shape is '[1, N..., 1]' and
195 // broadcast shape is '[N, 1..., N]'
196 if (!oneByN && !nByOne) {
197 if (input_dims[0] == 1 && input_dims[NumDims-1] == 1 && NumDims > 2) {
198 nByOne = true;
199 oneByN = true;
200 for (int i = 1; i < NumDims-1; ++i) {
201 if (m_broadcast[i] != 1) {
202 nByOne = false;
203 oneByN = false;
204 break;
205 }
206 }
207 }
208 }
209 }
210
211 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
212
214 m_impl.evalSubExprsIfNeeded(NULL);
215 return true;
216 }
217
218#ifdef EIGEN_USE_THREADS
219 template <typename EvalSubExprsCallback>
220 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
221 EvaluatorPointerType, EvalSubExprsCallback done) {
222 m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
223 }
224#endif // EIGEN_USE_THREADS
225
227 m_impl.cleanup();
228 }
229
231 {
233 return m_impl.coeff(0);
234 }
235
236 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
237 if (isCopy) {
238 return m_impl.coeff(index);
239 } else {
240 return coeffColMajor(index);
241 }
242 } else {
243 if (isCopy) {
244 return m_impl.coeff(index);
245 } else {
246 return coeffRowMajor(index);
247 }
248 }
249 }
250
251 // TODO: attempt to speed this up. The integer divisions and modulo are slow
253 Index inputIndex = 0;
255 for (int i = NumDims - 1; i > 0; --i) {
256 const Index idx = index / m_outputStrides[i];
258 eigen_assert(idx < m_impl.dimensions()[i]);
259 inputIndex += idx * m_inputStrides[i];
260 } else {
262 eigen_assert(idx % m_impl.dimensions()[i] == 0);
263 } else {
264 inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
265 }
266 }
267 index -= idx * m_outputStrides[i];
268 }
270 eigen_assert(index < m_impl.dimensions()[0]);
271 inputIndex += index;
272 } else {
274 eigen_assert(index % m_impl.dimensions()[0] == 0);
275 } else {
276 inputIndex += (index % m_impl.dimensions()[0]);
277 }
278 }
279 return inputIndex;
280 }
281
283 {
284 return m_impl.coeff(indexColMajor(index));
285 }
286
288 Index inputIndex = 0;
290 for (int i = 0; i < NumDims - 1; ++i) {
291 const Index idx = index / m_outputStrides[i];
293 eigen_assert(idx < m_impl.dimensions()[i]);
294 inputIndex += idx * m_inputStrides[i];
295 } else {
297 eigen_assert(idx % m_impl.dimensions()[i] == 0);
298 } else {
299 inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
300 }
301 }
302 index -= idx * m_outputStrides[i];
303 }
304 if (internal::index_statically_eq<Broadcast>(NumDims - 1, 1)) {
305 eigen_assert(index < m_impl.dimensions()[NumDims - 1]);
306 inputIndex += index;
307 } else {
309 eigen_assert(index % m_impl.dimensions()[NumDims - 1] == 0);
310 } else {
311 inputIndex += (index % m_impl.dimensions()[NumDims - 1]);
312 }
313 }
314 return inputIndex;
315 }
316
318 {
319 return m_impl.coeff(indexRowMajor(index));
320 }
321
322 template<int LoadMode>
324 {
326 return internal::pset1<PacketReturnType>(m_impl.coeff(0));
327 }
328
329 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
330 if (isCopy) {
331 #ifdef EIGEN_GPU_COMPILE_PHASE
332 // See PR 437: on NVIDIA P100 and K20m we observed a x3-4 speed up by enforcing
333 // unaligned loads here. The reason is unclear though.
334 return m_impl.template packet<Unaligned>(index);
335 #else
336 return m_impl.template packet<LoadMode>(index);
337 #endif
338 } else if (oneByN && !nByOne) {
339 return packetNByOne<LoadMode>(index);
340 } else if (!oneByN && nByOne) {
341 return packetOneByN<LoadMode>(index);
342 } else if (oneByN && nByOne) {
343 return packetOneByNByOne<LoadMode>(index);
344 } else {
345 return packetColMajor<LoadMode>(index);
346 }
347 } else {
348 if (isCopy) {
349 #ifdef EIGEN_GPU_COMPILE_PHASE
350 // See above.
351 return m_impl.template packet<Unaligned>(index);
352 #else
353 return m_impl.template packet<LoadMode>(index);
354 #endif
355 } else if (oneByN && !nByOne) {
356 return packetOneByN<LoadMode>(index);
357 } else if (!oneByN && nByOne) {
358 return packetNByOne<LoadMode>(index);
359 } else if (oneByN && nByOne) {
360 return packetOneByNByOne<LoadMode>(index);
361 } else {
362 return packetRowMajor<LoadMode>(index);
363 }
364 }
365 }
366
367 template<int LoadMode>
369 (Index index) const
370 {
371 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
372 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
373
375 Index startDim, endDim;
376 Index inputIndex, outputOffset, batchedIndex;
377
378 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
379 startDim = NumDims - 1;
380 endDim = 1;
381 } else {
382 startDim = 0;
383 endDim = NumDims - 2;
384 }
385
386 batchedIndex = index % m_outputStrides[startDim];
387 inputIndex = batchedIndex / m_outputStrides[endDim];
388 outputOffset = batchedIndex % m_outputStrides[endDim];
389
390 if (outputOffset + PacketSize <= m_outputStrides[endDim]) {
391 values[0] = m_impl.coeff(inputIndex);
393 } else {
395 for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
396 if (outputOffset + cur < m_outputStrides[endDim]) {
397 values[i] = m_impl.coeff(inputIndex);
398 } else {
399 ++inputIndex;
400 inputIndex = (inputIndex == m_inputStrides[startDim] ? 0 : inputIndex);
401 values[i] = m_impl.coeff(inputIndex);
402 outputOffset = 0;
403 cur = 0;
404 }
405 }
407 }
408 }
409
410 template<int LoadMode>
412 {
413 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
414 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
415
416 Index dim, inputIndex;
417
418 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
419 dim = NumDims - 1;
420 } else {
421 dim = 0;
422 }
423
424 inputIndex = index % m_inputStrides[dim];
425 if (inputIndex + PacketSize <= m_inputStrides[dim]) {
426 return m_impl.template packet<Unaligned>(inputIndex);
427 } else {
430 for (int i = 0; i < PacketSize; ++i) {
431 if (inputIndex > m_inputStrides[dim]-1) {
432 inputIndex = 0;
433 }
434 values[i] = m_impl.coeff(inputIndex++);
435 }
437 }
438 }
439
440 template<int LoadMode>
442 {
443 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
444 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
445
447 Index dim, inputIndex, outputOffset;
448
449 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
450 dim = 1;
451 } else {
452 dim = NumDims - 2;
453 }
454
455 inputIndex = index / m_outputStrides[dim];
456 outputOffset = index % m_outputStrides[dim];
457 if (outputOffset + PacketSize <= m_outputStrides[dim]) {
458 values[0] = m_impl.coeff(inputIndex);
460 } else {
462 for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
463 if (outputOffset + cur < m_outputStrides[dim]) {
464 values[i] = m_impl.coeff(inputIndex);
465 } else {
466 values[i] = m_impl.coeff(++inputIndex);
467 outputOffset = 0;
468 cur = 0;
469 }
470 }
472 }
473 }
474
475 // Ignore the LoadMode and always use unaligned loads since we can't guarantee
476 // the alignment at compile time.
477 template<int LoadMode>
479 {
480 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
481 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
482
483 const Index originalIndex = index;
484
485 Index inputIndex = 0;
487 for (int i = NumDims - 1; i > 0; --i) {
488 const Index idx = index / m_outputStrides[i];
490 eigen_assert(idx < m_impl.dimensions()[i]);
491 inputIndex += idx * m_inputStrides[i];
492 } else {
494 eigen_assert(idx % m_impl.dimensions()[i] == 0);
495 } else {
496 inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
497 }
498 }
499 index -= idx * m_outputStrides[i];
500 }
501 Index innermostLoc;
503 eigen_assert(index < m_impl.dimensions()[0]);
504 innermostLoc = index;
505 } else {
507 eigen_assert(index % m_impl.dimensions()[0] == 0);
508 innermostLoc = 0;
509 } else {
510 innermostLoc = index % m_impl.dimensions()[0];
511 }
512 }
513 inputIndex += innermostLoc;
514
515 // Todo: this could be extended to the second dimension if we're not
516 // broadcasting alongside the first dimension, and so on.
517 if (innermostLoc + PacketSize <= m_impl.dimensions()[0]) {
518 return m_impl.template packet<Unaligned>(inputIndex);
519 } else {
521 values[0] = m_impl.coeff(inputIndex);
523 for (int i = 1; i < PacketSize; ++i) {
524 if (innermostLoc + i < m_impl.dimensions()[0]) {
525 values[i] = m_impl.coeff(inputIndex+i);
526 } else {
527 values[i] = coeffColMajor(originalIndex+i);
528 }
529 }
531 return rslt;
532 }
533 }
534
535 template<int LoadMode>
537 {
538 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
539 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
540
541 const Index originalIndex = index;
542
543 Index inputIndex = 0;
545 for (int i = 0; i < NumDims - 1; ++i) {
546 const Index idx = index / m_outputStrides[i];
548 eigen_assert(idx < m_impl.dimensions()[i]);
549 inputIndex += idx * m_inputStrides[i];
550 } else {
552 eigen_assert(idx % m_impl.dimensions()[i] == 0);
553 } else {
554 inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i];
555 }
556 }
557 index -= idx * m_outputStrides[i];
558 }
559 Index innermostLoc;
561 eigen_assert(index < m_impl.dimensions()[NumDims-1]);
562 innermostLoc = index;
563 } else {
565 eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0);
566 innermostLoc = 0;
567 } else {
568 innermostLoc = index % m_impl.dimensions()[NumDims-1];
569 }
570 }
571 inputIndex += innermostLoc;
572
573 // Todo: this could be extended to the second dimension if we're not
574 // broadcasting alongside the first dimension, and so on.
575 if (innermostLoc + PacketSize <= m_impl.dimensions()[NumDims-1]) {
576 return m_impl.template packet<Unaligned>(inputIndex);
577 } else {
579 values[0] = m_impl.coeff(inputIndex);
581 for (int i = 1; i < PacketSize; ++i) {
582 if (innermostLoc + i < m_impl.dimensions()[NumDims-1]) {
583 values[i] = m_impl.coeff(inputIndex+i);
584 } else {
585 values[i] = coeffRowMajor(originalIndex+i);
586 }
587 }
589 return rslt;
590 }
591 }
592
594 costPerCoeff(bool vectorized) const {
595 double compute_cost = TensorOpCost::AddCost<Index>();
596 if (!isCopy && NumDims > 0) {
598 for (int i = NumDims - 1; i > 0; --i) {
599 compute_cost += TensorOpCost::DivCost<Index>();
601 compute_cost +=
602 TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
603 } else {
605 compute_cost += TensorOpCost::MulCost<Index>() +
606 TensorOpCost::ModCost<Index>() +
607 TensorOpCost::AddCost<Index>();
608 }
609 }
610 compute_cost +=
611 TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
612 }
613 }
614 return m_impl.costPerCoeff(vectorized) +
615 TensorOpCost(0, 0, compute_cost, vectorized, PacketSize);
616 }
617
620 // TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large
621 // tensors. But this might need further tuning.
622 const size_t target_size = m_device.firstLevelCacheSize();
624 m_impl.getResourceRequirements(),
625 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size));
626 }
627
630 bool /*root_of_expr_ast*/ = false) const {
631 BlockBroadcastingParams params = blockBroadcastingParams(desc);
632
633 if (params.inner_dim_size == 0 || params.bcast_dim_size == 0) {
634 return emptyBlock();
635 }
636
637 // Prepare storage for the materialized broadcasting result.
638 const typename TensorBlock::Storage block_storage =
639 TensorBlock::prepareStorage(desc, scratch);
640 ScalarNoConst* materialized_output = block_storage.data();
641
642 // We potentially will need to materialize input blocks.
643 size_t materialized_input_size = 0;
644 ScalarNoConst* materialized_input = NULL;
645
646 // Initialize block broadcating iterator state for outer dimensions (outer
647 // with regard to bcast dimension). Dimension in this array are always in
648 // inner_most -> outer_most order (col major layout).
650 int idx = 0;
651
652 for (int i = params.inner_dim_count + 1; i < NumDims; ++i) {
653 const Index dim = IsColMajor ? i : NumDims - 1 - i;
654 it[idx].size = params.output_dims[dim];
655 it[idx].count = 0;
656 it[idx].output_stride = m_outputStrides[dim];
657 it[idx].output_span = it[idx].output_stride * (it[idx].size - 1);
658 idx++;
659 }
660
661 // Write output into the beginning of `materialized_output`.
662 Index output_offset = 0;
663
664 // We will fill output block by broadcasting along the bcast dim, and
665 // iterating over outer dimension.
666 const Index output_size = NumDims == 0 ? 1 : params.output_dims.TotalSize();
667
668 for (Index num_output_coeffs = 0; num_output_coeffs < output_size;) {
669 ScalarNoConst* bcast_output = materialized_output + num_output_coeffs;
670 Index bcast_offset = desc.offset() + output_offset;
671
672 // Broadcast along the bcast dimension.
673 num_output_coeffs += BroadcastBlockAlongBcastDim(
674 params, bcast_offset, scratch, bcast_output, &materialized_input,
675 &materialized_input_size);
676
677 // Switch to the next outer dimension.
678 for (int j = 0; j < idx; ++j) {
679 if (++it[j].count < it[j].size) {
680 output_offset += it[j].output_stride;
681 break;
682 }
683 it[j].count = 0;
684 output_offset -= it[j].output_span;
685 }
686 }
687
688 return block_storage.AsTensorMaterializedBlock();
689 }
690
692
693 const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
694
695 Broadcast functor() const { return m_broadcast; }
696#ifdef EIGEN_USE_SYCL
697 // binding placeholder accessors to a command group handler for SYCL
699 cl::sycl::handler& cgh) const {
700 m_impl.bind(cgh);
701 }
702#endif
703 private:
704 static const bool IsColMajor =
705 static_cast<int>(Layout) == static_cast<int>(ColMajor);
706
707 // We will build a general case block broadcasting on top of broadcasting
708 // primitive that will do broadcasting only for the inner dimension(s) along
709 // the first dimension smaller than the input size (it's called `bcast_dim`).
710 //
711 // Example:
712 // dim: 0 1 2 (ColMajor)
713 // input size: [9, 3, 6]
714 // block size: [9, 2, 6]
715 //
716 // We will compute broadcasted block by iterating over the outer dimensions
717 // before `bcast_dim` (only dimension `2` in this example) and computing
718 // broadcasts along the `bcast_dim` (dimension `1` in this example).
719
720 // BlockBroadcastingParams holds precomputed parameters for broadcasting a
721 // single block along the broadcasting dimension. Sizes and strides along the
722 // `bcast_dim` might be invalid, they will be adjusted later in
723 // `BroadcastBlockAlongBcastDim`.
724 struct BlockBroadcastingParams {
725 Dimensions input_dims; // input expression dimensions
726 Dimensions output_dims; // output block sizes
727 Dimensions output_strides; // output block strides
728
729 int inner_dim_count; // count inner dimensions matching in size
730 int bcast_dim; // broadcasting dimension index
731 Index bcast_dim_size; // broadcasting dimension size
732 Index inner_dim_size; // inner dimensions size
733
734 // Block sizes and strides for the input block where all dimensions before
735 // `bcast_dim` are equal to `1`.
736 Dimensions input_block_sizes;
737 Dimensions input_block_strides;
738
739 // Block sizes and strides for blocks with extra dimensions and strides `0`.
740 BroadcastDimensions bcast_block_sizes;
741 BroadcastDimensions bcast_block_strides;
742 BroadcastDimensions bcast_input_strides;
743 };
744
745 struct BlockBroadcastingIteratorState {
746 Index size;
747 Index count;
748 Index output_stride;
749 Index output_span;
750 };
751
752 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlockBroadcastingParams
753 blockBroadcastingParams(TensorBlockDesc& desc) const {
754 BlockBroadcastingParams params;
755
756 params.input_dims = Dimensions(m_impl.dimensions());
757
758 // Output block sizes and strides.
759 params.output_dims = desc.dimensions();
760 params.output_strides = internal::strides<Layout>(params.output_dims);
761
762 // Find the broadcasting dimension (first dimension with output size smaller
763 // that the input size).
764 params.bcast_dim = 0;
765 params.bcast_dim_size = 1;
766 params.inner_dim_size = 1;
767
768 // Count the number of inner dimensions that have the same size in the block
769 // and in the broadcast expression.
770 params.inner_dim_count = 0;
771
772 for (int i = 0; i < NumDims; ++i) {
773 const int dim = IsColMajor ? i : NumDims - i - 1;
774
775 if (params.output_dims[dim] == m_dimensions[dim]) {
776 params.inner_dim_size *= params.output_dims[dim];
777 ++params.inner_dim_count;
778 continue;
779 }
780
781 // First non-matching dimension is the broadcasting dimension.
782 eigen_assert(params.output_dims[dim] < m_dimensions[dim]);
783 params.bcast_dim = dim;
784 params.bcast_dim_size = params.output_dims[dim];
785 break;
786 }
787
788 // Calculate the input block size for looking into the input.
789 for (int i = 0; i < params.inner_dim_count; ++i) {
790 const int dim = IsColMajor ? i : NumDims - i - 1;
791 params.input_block_sizes[dim] = params.input_dims[dim];
792 }
793 for (int i = params.inner_dim_count; i < NumDims; ++i) {
794 const int dim = IsColMajor ? i : NumDims - i - 1;
795 params.input_block_sizes[dim] = 1;
796 }
797 params.input_block_strides =
798 internal::strides<Layout>(params.input_block_sizes);
799
800 // Broadcast with the 0-stride trick: Create 1 extra dim for each
801 // broadcast, set the input stride to 0.
802 //
803 // When ColMajor:
804 //
805 // - bcast_block_sizes:
806 // [d_0, b_0, d_1, b_1, ...]
807 //
808 // - bcast_block_strides:
809 // [output_block_strides[0], output_block_strides[0] * d_0,
810 // output_block_strides[1], output_block_strides[1] * d_1,
811 // ...]
812 //
813 // - bcast_input_strides:
814 // [input_block_strides[0], 0,
815 // input_block_strides[1], 0,
816 // ...].
817 //
818 for (int i = 0; i < params.inner_dim_count; ++i) {
819 const int dim = IsColMajor ? i : NumDims - i - 1;
820
821 const int copy_dim = IsColMajor ? 2 * i : 2 * NumDims - 2 * i - 1;
822 const int broadcast_dim = IsColMajor ? copy_dim + 1 : copy_dim - 1;
823
824 params.bcast_block_sizes[copy_dim] = params.input_dims[dim];
825 params.bcast_block_sizes[broadcast_dim] = m_broadcast[dim];
826 params.bcast_block_strides[copy_dim] = params.output_strides[dim];
827 params.bcast_block_strides[broadcast_dim] =
828 params.output_strides[dim] * params.input_dims[dim];
829 params.bcast_input_strides[copy_dim] = params.input_block_strides[dim];
830 params.bcast_input_strides[broadcast_dim] = 0;
831 }
832
833 for (int i = 2 * params.inner_dim_count; i < 2 * NumDims; ++i) {
834 const int dim = IsColMajor ? i : 2 * NumDims - i - 1;
835 params.bcast_block_sizes[dim] = 1;
836 params.bcast_block_strides[dim] = 0;
837 params.bcast_input_strides[dim] = 0;
838 }
839
840 return params;
841 }
842
844 DSizes<Index, NumDims> dimensions;
845 for (int i = 0; i < NumDims; ++i) dimensions[i] = 0;
847 }
848
849 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index BroadcastBlockAlongBcastDim(
850 BlockBroadcastingParams params, Index bcast_offset,
851 TensorBlockScratch& scratch, ScalarNoConst* materialized_output,
852 ScalarNoConst** materialized_input,
853 size_t* materialized_input_size) const {
854 if (params.bcast_dim_size == 1) {
855 // We just need one block read using the ready-set values above.
856 return BroadcastBlock(
857 params.input_block_sizes, params.input_block_strides,
858 params.bcast_block_sizes, params.bcast_block_strides,
859 params.bcast_input_strides, bcast_offset, 0, scratch,
860 materialized_output, materialized_input, materialized_input_size);
861
862 } else if (params.input_dims[params.bcast_dim] == 1) {
863 // Broadcast bcast dimension (< NumDims) by bcast_dim_size.
864 const int broadcast_bcast_dim =
865 IsColMajor ? 2 * params.inner_dim_count + 1
866 : 2 * NumDims - 2 * params.inner_dim_count - 2;
867
868 params.bcast_block_sizes[broadcast_bcast_dim] = params.bcast_dim_size;
869 params.bcast_input_strides[broadcast_bcast_dim] = 0;
870 params.bcast_block_strides[broadcast_bcast_dim] =
871 params.output_strides[params.bcast_dim];
872
873 return BroadcastBlock(
874 params.input_block_sizes, params.input_block_strides,
875 params.bcast_block_sizes, params.bcast_block_strides,
876 params.bcast_input_strides, bcast_offset, 0, scratch,
877 materialized_output, materialized_input, materialized_input_size);
878
879 } else {
880 // Keep track of the total number of the coefficients written to the
881 // output block.
882 Index num_output_coeffs = 0;
883
884 // The general case. Let's denote the output block as
885 //
886 // x[..., a:a+bcast_dim_size, :, ..., :]
887 //
888 // where a:a+bcast_dim_size is a slice on the bcast_dim dimension
889 // (< NumDims). We need to split the a:a+bcast_dim_size into possibly 3
890 // sub-blocks:
891 //
892 // (1) a:b, where b is the smallest multiple of
893 // input_dims[bcast_dim_start] in [a, a+bcast_dim_size].
894 //
895 // (2) b:c, where c is the largest multiple of input_dims[bcast_dim_start]
896 // in [a, a+bcast_dim_size].
897 //
898 // (3) c:a+bcast_dim_size .
899 //
900 // Or, when b and c do not exist, we just need to process the whole block
901 // together.
902
903 // Find a.
904 const Index bcast_dim_left_index =
905 bcast_offset / m_outputStrides[params.bcast_dim];
906
907 // Find b and c.
908 const Index input_bcast_dim_size = params.input_dims[params.bcast_dim];
909
910 // First multiple after a. This is b when <= bcast_dim_left_index +
911 // bcast_dim_size.
912 const Index first_multiple =
913 divup<Index>(bcast_dim_left_index, input_bcast_dim_size) *
914 input_bcast_dim_size;
915
916 if (first_multiple <= bcast_dim_left_index + params.bcast_dim_size) {
917 // b exists, so does c. Find it.
918 const Index last_multiple =
919 (bcast_dim_left_index + params.bcast_dim_size) /
920 input_bcast_dim_size * input_bcast_dim_size;
921 const int copy_bcast_dim =
922 IsColMajor ? 2 * params.inner_dim_count
923 : 2 * NumDims - 2 * params.inner_dim_count - 1;
924 const int broadcast_bcast_dim =
925 IsColMajor ? 2 * params.inner_dim_count + 1
926 : 2 * NumDims - 2 * params.inner_dim_count - 2;
927
928 if (first_multiple > bcast_dim_left_index) {
929 const Index head_size = first_multiple - bcast_dim_left_index;
930 params.input_block_sizes[params.bcast_dim] = head_size;
931 params.bcast_block_sizes[copy_bcast_dim] = head_size;
932 params.bcast_input_strides[copy_bcast_dim] =
933 params.input_block_strides[params.bcast_dim];
934 params.bcast_block_strides[copy_bcast_dim] =
935 params.output_strides[params.bcast_dim];
936 params.bcast_block_sizes[broadcast_bcast_dim] = 1;
937 params.bcast_input_strides[broadcast_bcast_dim] = 0;
938 params.bcast_block_strides[broadcast_bcast_dim] =
939 params.output_strides[params.bcast_dim] *
940 params.input_dims[params.bcast_dim];
941
942 num_output_coeffs += BroadcastBlock(
943 params.input_block_sizes, params.input_block_strides,
944 params.bcast_block_sizes, params.bcast_block_strides,
945 params.bcast_input_strides, bcast_offset, 0, scratch,
946 materialized_output, materialized_input, materialized_input_size);
947 }
948 if (first_multiple < last_multiple) {
949 params.input_block_sizes[params.bcast_dim] = input_bcast_dim_size;
950 params.bcast_block_sizes[copy_bcast_dim] = input_bcast_dim_size;
951 params.bcast_input_strides[copy_bcast_dim] =
952 params.input_block_strides[params.bcast_dim];
953 params.bcast_block_strides[copy_bcast_dim] =
954 params.output_strides[params.bcast_dim];
955 params.bcast_block_sizes[broadcast_bcast_dim] =
956 (last_multiple - first_multiple) / input_bcast_dim_size;
957 params.bcast_input_strides[broadcast_bcast_dim] = 0;
958 params.bcast_block_strides[broadcast_bcast_dim] =
959 params.output_strides[params.bcast_dim] *
960 params.input_dims[params.bcast_dim];
961 const Index offset = (first_multiple - bcast_dim_left_index) *
962 m_outputStrides[params.bcast_dim];
963
964 num_output_coeffs += BroadcastBlock(
965 params.input_block_sizes, params.input_block_strides,
966 params.bcast_block_sizes, params.bcast_block_strides,
967 params.bcast_input_strides, bcast_offset, offset, scratch,
968 materialized_output, materialized_input, materialized_input_size);
969 }
970 if (last_multiple < bcast_dim_left_index + params.bcast_dim_size) {
971 const Index tail_size =
972 bcast_dim_left_index + params.bcast_dim_size - last_multiple;
973 params.input_block_sizes[params.bcast_dim] = tail_size;
974 params.bcast_block_sizes[copy_bcast_dim] = tail_size;
975 params.bcast_input_strides[copy_bcast_dim] =
976 params.input_block_strides[params.bcast_dim];
977 params.bcast_block_strides[copy_bcast_dim] =
978 params.output_strides[params.bcast_dim];
979 params.bcast_block_sizes[broadcast_bcast_dim] = 1;
980 params.bcast_input_strides[broadcast_bcast_dim] = 0;
981 params.bcast_block_strides[broadcast_bcast_dim] =
982 params.output_strides[params.bcast_dim] *
983 params.input_dims[params.bcast_dim];
984 const Index offset = (last_multiple - bcast_dim_left_index) *
985 m_outputStrides[params.bcast_dim];
986
987 num_output_coeffs += BroadcastBlock(
988 params.input_block_sizes, params.input_block_strides,
989 params.bcast_block_sizes, params.bcast_block_strides,
990 params.bcast_input_strides, bcast_offset, offset, scratch,
991 materialized_output, materialized_input, materialized_input_size);
992 }
993 } else {
994 // b and c do not exist.
995 const int copy_bcast_dim =
996 IsColMajor ? 2 * params.inner_dim_count
997 : 2 * NumDims - 2 * params.inner_dim_count - 1;
998 params.input_block_sizes[params.bcast_dim] = params.bcast_dim_size;
999 params.bcast_block_sizes[copy_bcast_dim] = params.bcast_dim_size;
1000 params.bcast_input_strides[copy_bcast_dim] =
1001 params.input_block_strides[params.bcast_dim];
1002 params.bcast_block_strides[copy_bcast_dim] =
1003 params.output_strides[params.bcast_dim];
1004
1005 num_output_coeffs += BroadcastBlock(
1006 params.input_block_sizes, params.input_block_strides,
1007 params.bcast_block_sizes, params.bcast_block_strides,
1008 params.bcast_input_strides, bcast_offset, 0, scratch,
1009 materialized_output, materialized_input, materialized_input_size);
1010 }
1011
1012 return num_output_coeffs;
1013 }
1014 }
1015
1017 const Dimensions& input_block_sizes,
1018 const Dimensions& input_block_strides,
1019 const BroadcastDimensions& bcast_block_sizes,
1020 const BroadcastDimensions& bcast_block_strides,
1021 const BroadcastDimensions& bcast_input_strides, Index bcast_offset,
1023 ScalarNoConst* materialized_output, ScalarNoConst** materialized_input,
1024 size_t* materialized_input_size) const {
1025 // ---------------------------------------------------------------------- //
1026 // Tensor block descriptor for reading block from the input.
1027 const Index input_offset = bcast_offset + offset;
1028 TensorBlockDesc input_desc(
1029 IsColMajor ? indexColMajor(input_offset) : indexRowMajor(input_offset),
1030 input_block_sizes);
1031
1032 ArgTensorBlock input_block = m_impl.block(input_desc, scratch);
1033
1034 // ---------------------------------------------------------------------- //
1035 // Materialize input block into a temporary memory buffer only if it's not
1036 // already available in the arg block.
1037 const ScalarNoConst* input_buffer = NULL;
1038
1039 if (input_block.data() != NULL) {
1040 // Input block already has raw data, there is no need to materialize it.
1041 input_buffer = input_block.data();
1042
1043 } else {
1044 // Otherwise we have to do block assignment into a temporary buffer.
1045
1046 // Maybe reuse previously allocated buffer, or allocate a new one with a
1047 // scratch allocator.
1048 const size_t input_total_size = input_block_sizes.TotalSize();
1049 if (*materialized_input == NULL ||
1050 *materialized_input_size < input_total_size) {
1051 *materialized_input_size = input_total_size;
1052 void* mem = scratch.allocate(*materialized_input_size * sizeof(Scalar));
1053 *materialized_input = static_cast<ScalarNoConst*>(mem);
1054 }
1055
1056 typedef internal::TensorBlockAssignment<
1057 ScalarNoConst, NumDims, typename ArgTensorBlock::XprType, Index>
1058 TensorBlockAssignment;
1059
1060 TensorBlockAssignment::Run(
1061 TensorBlockAssignment::target(input_block_sizes, input_block_strides,
1062 *materialized_input),
1063 input_block.expr());
1064
1065 input_buffer = *materialized_input;
1066 }
1067
1068 // ---------------------------------------------------------------------- //
1069 // Copy data from materialized input block to the materialized output, using
1070 // given broadcast strides (strides with zeroes).
1071 typedef internal::TensorBlockIO<ScalarNoConst, Index, 2 * NumDims, Layout>
1072 TensorBlockIO;
1073
1074 typename TensorBlockIO::Src src(bcast_input_strides, input_buffer);
1075 typename TensorBlockIO::Dst dst(bcast_block_sizes, bcast_block_strides,
1076 materialized_output + offset);
1077
1078 return TensorBlockIO::Copy(dst, src);
1079 }
1080
1081protected:
1088};
1089
1090
1091} // end namespace Eigen
1092
1093#endif // EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
int i
Definition BiCGSTAB_step_by_step.cpp:9
#define EIGEN_ALIGN_MAX
Definition ConfigureVectorization.h:157
#define EIGEN_ALWAYS_INLINE
Definition Macros.h:932
#define EIGEN_UNROLL_LOOP
Definition Macros.h:1461
#define EIGEN_DEVICE_FUNC
Definition Macros.h:976
#define eigen_assert(x)
Definition Macros.h:1037
#define EIGEN_STRONG_INLINE
Definition Macros.h:917
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
Definition StaticAssert.h:127
#define EIGEN_DEVICE_REF
Definition TensorMacros.h:50
Scalar Scalar int size
Definition benchVecAdd.cpp:17
SCALAR Scalar
Definition bench_gemm.cpp:46
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition CwiseBinaryOp.h:84
The tensor base class.
Definition TensorBase.h:973
Definition TensorBroadcasting.h:70
XprType::CoeffReturnType CoeffReturnType
Definition TensorBroadcasting.h:74
EIGEN_DEVICE_FUNC const Broadcast & broadcast() const
Definition TensorBroadcasting.h:83
Eigen::NumTraits< Scalar >::Real RealScalar
Definition TensorBroadcasting.h:73
Eigen::internal::traits< TensorBroadcastingOp >::Index Index
Definition TensorBroadcasting.h:77
const Broadcast m_broadcast
Definition TensorBroadcasting.h:91
Eigen::internal::nested< TensorBroadcastingOp >::type Nested
Definition TensorBroadcasting.h:75
EIGEN_DEVICE_FUNC const internal::remove_all< typenameXprType::Nested >::type & expression() const
Definition TensorBroadcasting.h:87
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBroadcastingOp(const XprType &expr, const Broadcast &broadcast)
Definition TensorBroadcasting.h:79
Eigen::internal::traits< TensorBroadcastingOp >::Scalar Scalar
Definition TensorBroadcasting.h:72
XprType::Nested m_xpr
Definition TensorBroadcasting.h:90
Eigen::internal::traits< TensorBroadcastingOp >::StorageKind StorageKind
Definition TensorBroadcasting.h:76
Definition TensorCostModel.h:25
Definition EmulateArray.h:21
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::size_t size()
Definition EmulateArray.h:44
IndexType offset() const
Definition TensorBlock.h:298
Definition TensorBlock.h:656
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
Definition gnuplot_common_settings.hh:64
@ ColMajor
Definition Constants.h:319
@ kView
Definition TensorBlock.h:596
Index first_multiple(Index size, Index base)
Definition Memory.h:506
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
Definition BandTriangularSolver.h:13
uint8_t count
Definition ref_serial.h:256
Definition TensorDimensions.h:263
Definition Constants.h:507
Definition TensorMeta.h:50
Definition TensorDimensions.h:93
Definition TensorForwardDeclarations.h:37
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition TensorBroadcasting.h:629
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock
Definition TensorBroadcasting.h:138
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
Definition TensorBroadcasting.h:478
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition TensorBroadcasting.h:141
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition TensorBroadcasting.h:106
StorageMemory< CoeffReturnType, Device > Storage
Definition TensorBroadcasting.h:111
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexRowMajor(Index index) const
Definition TensorBroadcasting.h:287
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition TensorBroadcasting.h:594
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition TensorBroadcasting.h:130
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Definition TensorBroadcasting.h:213
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
Definition TensorBroadcasting.h:536
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Definition TensorBroadcasting.h:619
TensorBroadcastingOp< Broadcast, ArgType > XprType
Definition TensorBroadcasting.h:99
TensorEvaluator< ArgType, Device >::Dimensions InputDimensions
Definition TensorBroadcasting.h:104
EIGEN_STRONG_INLINE void cleanup()
Definition TensorBroadcasting.h:226
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition TensorBroadcasting.h:211
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetNByOne(Index index) const
Definition TensorBroadcasting.h:441
const TensorEvaluator< ArgType, Device > & impl() const
Definition TensorBroadcasting.h:693
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index indexColMajor(Index index) const
Definition TensorBroadcasting.h:252
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByN(Index index) const
Definition TensorBroadcasting.h:411
array< Index, NumDims > m_inputStrides
Definition TensorBroadcasting.h:1086
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
Definition TensorBroadcasting.h:691
XprType::CoeffReturnType CoeffReturnType
Definition TensorBroadcasting.h:105
const internal::remove_reference< Broadcast >::type m_broadcast
Definition TensorBroadcasting.h:1083
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByNByOne(Index index) const
Definition TensorBroadcasting.h:369
DSizes< Index, 2 *NumDims > BroadcastDimensions
Definition TensorBroadcasting.h:127
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffRowMajor(Index index) const
Definition TensorBroadcasting.h:317
TensorEvaluator< constArgType, Device >::TensorBlock ArgTensorBlock
Definition TensorBroadcasting.h:134
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const
Definition TensorBroadcasting.h:230
array< Index, NumDims > m_outputStrides
Definition TensorBroadcasting.h:1085
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Definition TensorBroadcasting.h:131
internal::remove_const< Scalar >::type ScalarNoConst
Definition TensorBroadcasting.h:123
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffColMajor(Index index) const
Definition TensorBroadcasting.h:282
const Device EIGEN_DEVICE_REF m_device
Definition TensorBroadcasting.h:1082
DSizes< Index, NumDims > Dimensions
Definition TensorBroadcasting.h:102
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
Definition TensorBroadcasting.h:323
TensorEvaluator< ArgType, Device > m_impl
Definition TensorBroadcasting.h:1087
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition TensorEvaluator.h:29
const Device EIGEN_DEVICE_REF m_device
Definition TensorEvaluator.h:192
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Definition TensorEvaluator.h:59
Storage::Type EvaluatorPointerType
Definition TensorEvaluator.h:39
internal::TensorBlockDescriptor< NumCoords, Index > TensorBlockDesc
Definition TensorEvaluator.h:58
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition TensorEvaluator.h:73
@ BlockAccess
Definition TensorEvaluator.h:48
@ PreferBlockAccess
Definition TensorEvaluator.h:49
@ PacketAccess
Definition TensorEvaluator.h:47
@ Layout
Definition TensorEvaluator.h:50
@ IsAligned
Definition TensorEvaluator.h:46
Derived::Index Index
Definition TensorEvaluator.h:30
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
Definition TensorEvaluator.h:63
Derived::Dimensions Dimensions
Definition TensorEvaluator.h:34
static const int PacketSize
Definition TensorEvaluator.h:36
internal::remove_const< Scalar >::type ScalarNoConst
Definition TensorEvaluator.h:55
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements merge(const TensorBlockResourceRequirements &lhs, const TensorBlockResourceRequirements &rhs)
Definition TensorBlock.h:138
const TensorBroadcastingOp< Broadcast, XprType > EIGEN_DEVICE_REF type
Definition TensorBroadcasting.h:40
Definition XprHelper.h:332
Definition TensorBroadcasting.h:50
static const bool value
Definition TensorBroadcasting.h:51
Definition TensorTraits.h:175
remove_reference< Nested >::type _Nested
Definition TensorBroadcasting.h:31
XprTraits::Index Index
Definition TensorBroadcasting.h:29
XprType::Nested Nested
Definition TensorBroadcasting.h:30
XprType::Scalar Scalar
Definition TensorBroadcasting.h:26
traits< XprType > XprTraits
Definition TensorBroadcasting.h:27
XprTraits::PointerType PointerType
Definition TensorBroadcasting.h:34
XprTraits::StorageKind StorageKind
Definition TensorBroadcasting.h:28
Definition ForwardDeclarations.h:17
std::ptrdiff_t j
Definition tut_arithmetic_redux_minmax.cpp:2