TR-mbed 1.0
Loading...
Searching...
No Matches
TensorPadding.h
Go to the documentation of this file.
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
12
13namespace Eigen {
14
22namespace internal {
23template<typename PaddingDimensions, typename XprType>
25{
26 typedef typename XprType::Scalar Scalar;
28 typedef typename XprTraits::StorageKind StorageKind;
29 typedef typename XprTraits::Index Index;
30 typedef typename XprType::Nested Nested;
32 static const int NumDimensions = XprTraits::NumDimensions;
33 static const int Layout = XprTraits::Layout;
34 typedef typename XprTraits::PointerType PointerType;
35};
36
37template<typename PaddingDimensions, typename XprType>
42
43template<typename PaddingDimensions, typename XprType>
48
49} // end namespace internal
50
51
52
53template<typename PaddingDimensions, typename XprType>
54class TensorPaddingOp : public TensorBase<TensorPaddingOp<PaddingDimensions, XprType>, ReadOnlyAccessors>
55{
56 public:
59 typedef typename XprType::CoeffReturnType CoeffReturnType;
63
64 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType& expr, const PaddingDimensions& padding_dims, const Scalar padding_value)
65 : m_xpr(expr), m_padding_dims(padding_dims), m_padding_value(padding_value) {}
66
68 const PaddingDimensions& padding() const { return m_padding_dims; }
71
74 expression() const { return m_xpr; }
75
76 protected:
77 typename XprType::Nested m_xpr;
78 const PaddingDimensions m_padding_dims;
80};
81
82
83// Eval as rvalue
84template<typename PaddingDimensions, typename ArgType, typename Device>
85struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device>
86{
88 typedef typename XprType::Index Index;
91 typedef typename XprType::Scalar Scalar;
97
98 enum {
99 IsAligned = true,
104 CoordAccess = true,
105 RawAccess = false
106 };
107
109
110 //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
113
114 typedef typename internal::TensorMaterializedBlock<ScalarNoConst, NumDims,
115 Layout, Index>
117 //===--------------------------------------------------------------------===//
118
119 EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
120 : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()), m_device(device)
121 {
122 // The padding op doesn't change the rank of the tensor. Directly padding a scalar would lead
123 // to a vector, which doesn't make sense. Instead one should reshape the scalar into a vector
124 // of 1 element first and then pad.
125 EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
126
127 // Compute dimensions
128 m_dimensions = m_impl.dimensions();
129 for (int i = 0; i < NumDims; ++i) {
130 m_dimensions[i] += m_padding[i].first + m_padding[i].second;
131 }
132 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
133 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
134 m_inputStrides[0] = 1;
135 m_outputStrides[0] = 1;
136 for (int i = 1; i < NumDims; ++i) {
137 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
138 m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
139 }
140 m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
141 } else {
142 m_inputStrides[NumDims - 1] = 1;
143 m_outputStrides[NumDims] = 1;
144 for (int i = NumDims - 2; i >= 0; --i) {
145 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
146 m_outputStrides[i+1] = m_outputStrides[i+2] * m_dimensions[i+1];
147 }
148 m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0];
149 }
150 }
151
152 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
153
155 m_impl.evalSubExprsIfNeeded(NULL);
156 return true;
157 }
158
159#ifdef EIGEN_USE_THREADS
160 template <typename EvalSubExprsCallback>
161 EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(
162 EvaluatorPointerType, EvalSubExprsCallback done) {
163 m_impl.evalSubExprsIfNeededAsync(nullptr, [done](bool) { done(true); });
164 }
165#endif // EIGEN_USE_THREADS
166
168 m_impl.cleanup();
169 }
170
172 {
173 eigen_assert(index < dimensions().TotalSize());
174 Index inputIndex = 0;
175 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
177 for (int i = NumDims - 1; i > 0; --i) {
178 const Index idx = index / m_outputStrides[i];
179 if (isPaddingAtIndexForDim(idx, i)) {
180 return m_paddingValue;
181 }
182 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
183 index -= idx * m_outputStrides[i];
184 }
185 if (isPaddingAtIndexForDim(index, 0)) {
186 return m_paddingValue;
187 }
188 inputIndex += (index - m_padding[0].first);
189 } else {
191 for (int i = 0; i < NumDims - 1; ++i) {
192 const Index idx = index / m_outputStrides[i+1];
193 if (isPaddingAtIndexForDim(idx, i)) {
194 return m_paddingValue;
195 }
196 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
197 index -= idx * m_outputStrides[i+1];
198 }
199 if (isPaddingAtIndexForDim(index, NumDims-1)) {
200 return m_paddingValue;
201 }
202 inputIndex += (index - m_padding[NumDims-1].first);
203 }
204 return m_impl.coeff(inputIndex);
205 }
206
207 template<int LoadMode>
209 {
210 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
211 return packetColMajor(index);
212 }
213 return packetRowMajor(index);
214 }
215
217 TensorOpCost cost = m_impl.costPerCoeff(vectorized);
218 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
220 for (int i = 0; i < NumDims; ++i)
221 updateCostPerDimension(cost, i, i == 0);
222 } else {
224 for (int i = NumDims - 1; i >= 0; --i)
225 updateCostPerDimension(cost, i, i == NumDims - 1);
226 }
227 return cost;
228 }
229
232 const size_t target_size = m_device.lastLevelCacheSize();
234 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
235 m_impl.getResourceRequirements());
236 }
237
240 bool /*root_of_expr_ast*/ = false) const {
241 // If one of the dimensions is zero, return empty block view.
242 if (desc.size() == 0) {
244 desc.dimensions());
245 }
246
247 static const bool IsColMajor = Layout == static_cast<int>(ColMajor);
248 const int inner_dim_idx = IsColMajor ? 0 : NumDims - 1;
249
250 Index offset = desc.offset();
251
252 // Compute offsets in the output tensor corresponding to the desc.offset().
253 DSizes<Index, NumDims> output_offsets;
254 for (int i = NumDims - 1; i > 0; --i) {
255 const int dim = IsColMajor ? i : NumDims - i - 1;
256 const int stride_dim = IsColMajor ? dim : dim + 1;
257 output_offsets[dim] = offset / m_outputStrides[stride_dim];
258 offset -= output_offsets[dim] * m_outputStrides[stride_dim];
259 }
260 output_offsets[inner_dim_idx] = offset;
261
262 // Offsets in the input corresponding to output offsets.
263 DSizes<Index, NumDims> input_offsets = output_offsets;
264 for (int i = 0; i < NumDims; ++i) {
265 const int dim = IsColMajor ? i : NumDims - i - 1;
266 input_offsets[dim] = input_offsets[dim] - m_padding[dim].first;
267 }
268
269 // Compute offset in the input buffer (at this point it might be illegal and
270 // point outside of the input buffer, because we don't check for negative
271 // offsets, it will be autocorrected in the block iteration loop below).
272 Index input_offset = 0;
273 for (int i = 0; i < NumDims; ++i) {
274 const int dim = IsColMajor ? i : NumDims - i - 1;
275 input_offset += input_offsets[dim] * m_inputStrides[dim];
276 }
277
278 // Destination buffer and scratch buffer both indexed from 0 and have the
279 // same dimensions as the requested block (for destination buffer this
280 // property is guaranteed by `desc.destination()`).
281 Index output_offset = 0;
282 const DSizes<Index, NumDims> output_strides =
284
285 // NOTE(ezhulenev): We initialize bock iteration state for `NumDims - 1`
286 // dimensions, skipping innermost dimension. In theory it should be possible
287 // to squeeze matching innermost dimensions, however in practice that did
288 // not show any improvements in benchmarks. Also in practice first outer
289 // dimension usually has padding, and will prevent squeezing.
290
291 // Initialize output block iterator state. Dimension in this array are
292 // always in inner_most -> outer_most order (col major layout).
293 array<BlockIteratorState, NumDims - 1> it;
294 for (int i = 0; i < NumDims - 1; ++i) {
295 const int dim = IsColMajor ? i + 1 : NumDims - i - 2;
296 it[i].count = 0;
297 it[i].size = desc.dimension(dim);
298
299 it[i].input_stride = m_inputStrides[dim];
300 it[i].input_span = it[i].input_stride * (it[i].size - 1);
301
302 it[i].output_stride = output_strides[dim];
303 it[i].output_span = it[i].output_stride * (it[i].size - 1);
304 }
305
306 const Index input_inner_dim_size =
307 static_cast<Index>(m_impl.dimensions()[inner_dim_idx]);
308
309 // Total output size.
310 const Index output_size = desc.size();
311
312 // We will fill inner dimension of this size in the output. It might be
313 // larger than the inner dimension in the input, so we might have to pad
314 // before/after we copy values from the input inner dimension.
315 const Index output_inner_dim_size = desc.dimension(inner_dim_idx);
316
317 // How many values to fill with padding BEFORE reading from the input inner
318 // dimension.
319 const Index output_inner_pad_before_size =
320 input_offsets[inner_dim_idx] < 0
321 ? numext::mini(numext::abs(input_offsets[inner_dim_idx]),
322 output_inner_dim_size)
323 : 0;
324
325 // How many values we can actually copy from the input inner dimension.
326 const Index output_inner_copy_size = numext::mini(
327 // Want to copy from input.
328 (output_inner_dim_size - output_inner_pad_before_size),
329 // Can copy from input.
330 numext::maxi(input_inner_dim_size - (input_offsets[inner_dim_idx] +
331 output_inner_pad_before_size),
332 Index(0)));
333
334 eigen_assert(output_inner_copy_size >= 0);
335
336 // How many values to fill with padding AFTER reading from the input inner
337 // dimension.
338 const Index output_inner_pad_after_size =
339 (output_inner_dim_size - output_inner_copy_size -
340 output_inner_pad_before_size);
341
342 // Sanity check, sum of all sizes must be equal to the output size.
343 eigen_assert(output_inner_dim_size ==
344 (output_inner_pad_before_size + output_inner_copy_size +
345 output_inner_pad_after_size));
346
347 // Keep track of current coordinates and padding in the output.
348 DSizes<Index, NumDims> output_coord = output_offsets;
349 DSizes<Index, NumDims> output_padded;
350 for (int i = 0; i < NumDims; ++i) {
351 const int dim = IsColMajor ? i : NumDims - i - 1;
352 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
353 }
354
356
357 // Prepare storage for the materialized padding result.
358 const typename TensorBlock::Storage block_storage =
359 TensorBlock::prepareStorage(desc, scratch);
360
361 // TODO(ezhulenev): Squeeze multiple non-padded inner dimensions into a
362 // single logical inner dimension.
363
364 // When possible we squeeze writes for the innermost (only if non-padded)
365 // dimension with the first padded dimension. This allows to reduce the
366 // number of calls to LinCopy and better utilize vector instructions.
367 const bool squeeze_writes =
368 NumDims > 1 &&
369 // inner dimension is not padded
370 (input_inner_dim_size == m_dimensions[inner_dim_idx]) &&
371 // and equal to the block inner dimension
372 (input_inner_dim_size == output_inner_dim_size);
373
374 const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1;
375
376 // Maximum coordinate on a squeeze dimension that we can write to.
377 const Index squeeze_max_coord =
378 squeeze_writes ? numext::mini(
379 // max non-padded element in the input
380 static_cast<Index>(m_dimensions[squeeze_dim] -
381 m_padding[squeeze_dim].second),
382 // max element in the output buffer
383 static_cast<Index>(output_offsets[squeeze_dim] +
384 desc.dimension(squeeze_dim)))
385 : static_cast<Index>(0);
386
387 // Iterate copying data from `m_impl.data()` to the output buffer.
388 for (Index size = 0; size < output_size;) {
389 // Detect if we are in the padded region (exclude innermost dimension).
390 bool is_padded = false;
391 for (int j = 1; j < NumDims; ++j) {
392 const int dim = IsColMajor ? j : NumDims - j - 1;
393 is_padded = output_padded[dim];
394 if (is_padded) break;
395 }
396
397 if (is_padded) {
398 // Fill single innermost dimension with padding value.
399 size += output_inner_dim_size;
400
401 LinCopy::template Run<LinCopy::Kind::FillLinear>(
402 typename LinCopy::Dst(output_offset, 1, block_storage.data()),
403 typename LinCopy::Src(0, 0, &m_paddingValue),
404 output_inner_dim_size);
405
406
407 } else if (squeeze_writes) {
408 // Squeeze multiple reads from innermost dimensions.
409 const Index squeeze_num = squeeze_max_coord - output_coord[squeeze_dim];
410 size += output_inner_dim_size * squeeze_num;
411
412 // Copy `squeeze_num` inner dimensions from input to output.
413 LinCopy::template Run<LinCopy::Kind::Linear>(
414 typename LinCopy::Dst(output_offset, 1, block_storage.data()),
415 typename LinCopy::Src(input_offset, 1, m_impl.data()),
416 output_inner_dim_size * squeeze_num);
417
418 // Update iteration state for only `squeeze_num - 1` processed inner
419 // dimensions, because we have another iteration state update at the end
420 // of the loop that will update iteration state for the last inner
421 // processed dimension.
422 it[0].count += (squeeze_num - 1);
423 input_offset += it[0].input_stride * (squeeze_num - 1);
424 output_offset += it[0].output_stride * (squeeze_num - 1);
425 output_coord[squeeze_dim] += (squeeze_num - 1);
426
427 } else {
428 // Single read from innermost dimension.
429 size += output_inner_dim_size;
430
431 { // Fill with padding before copying from input inner dimension.
432 const Index out = output_offset;
433
434 LinCopy::template Run<LinCopy::Kind::FillLinear>(
435 typename LinCopy::Dst(out, 1, block_storage.data()),
436 typename LinCopy::Src(0, 0, &m_paddingValue),
437 output_inner_pad_before_size);
438 }
439
440 { // Copy data from input inner dimension.
441 const Index out = output_offset + output_inner_pad_before_size;
442 const Index in = input_offset + output_inner_pad_before_size;
443
444 eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL);
445
446 LinCopy::template Run<LinCopy::Kind::Linear>(
447 typename LinCopy::Dst(out, 1, block_storage.data()),
448 typename LinCopy::Src(in, 1, m_impl.data()),
449 output_inner_copy_size);
450 }
451
452 { // Fill with padding after copying from input inner dimension.
453 const Index out = output_offset + output_inner_pad_before_size +
454 output_inner_copy_size;
455
456 LinCopy::template Run<LinCopy::Kind::FillLinear>(
457 typename LinCopy::Dst(out, 1, block_storage.data()),
458 typename LinCopy::Src(0, 0, &m_paddingValue),
459 output_inner_pad_after_size);
460 }
461 }
462
463 for (int j = 0; j < NumDims - 1; ++j) {
464 const int dim = IsColMajor ? j + 1 : NumDims - j - 2;
465
466 if (++it[j].count < it[j].size) {
467 input_offset += it[j].input_stride;
468 output_offset += it[j].output_stride;
469 output_coord[dim] += 1;
470 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
471 break;
472 }
473 it[j].count = 0;
474 input_offset -= it[j].input_span;
475 output_offset -= it[j].output_span;
476 output_coord[dim] -= it[j].size - 1;
477 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
478 }
479 }
480
481 return block_storage.AsTensorMaterializedBlock();
482 }
483
485
486#ifdef EIGEN_USE_SYCL
487 // binding placeholder accessors to a command group handler for SYCL
488 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void bind(cl::sycl::handler &cgh) const {
489 m_impl.bind(cgh);
490 }
491#endif
492
493 private:
494 struct BlockIteratorState {
495 BlockIteratorState()
496 : count(0),
497 size(0),
498 input_stride(0),
499 input_span(0),
500 output_stride(0),
501 output_span(0) {}
502
503 Index count;
504 Index size;
505 Index input_stride;
506 Index input_span;
507 Index output_stride;
508 Index output_span;
509 };
510
511 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(
512 Index index, int dim_index) const {
513#if defined(EIGEN_HAS_INDEX_LIST)
514 return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
515 index < m_padding[dim_index].first) ||
516 (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
517 index >= m_dimensions[dim_index] - m_padding[dim_index].second);
518#else
519 return (index < m_padding[dim_index].first) ||
520 (index >= m_dimensions[dim_index] - m_padding[dim_index].second);
521#endif
522 }
523
524 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(
525 int dim_index) const {
526#if defined(EIGEN_HAS_INDEX_LIST)
527 return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
528#else
529 EIGEN_UNUSED_VARIABLE(dim_index);
530 return false;
531#endif
532 }
533
534 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(
535 int dim_index) const {
536#if defined(EIGEN_HAS_INDEX_LIST)
537 return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
538#else
539 EIGEN_UNUSED_VARIABLE(dim_index);
540 return false;
541#endif
542 }
543
544
545 void updateCostPerDimension(TensorOpCost& cost, int i, bool first) const {
546 const double in = static_cast<double>(m_impl.dimensions()[i]);
547 const double out = in + m_padding[i].first + m_padding[i].second;
548 if (out == 0)
549 return;
550 const double reduction = in / out;
551 cost *= reduction;
552 if (first) {
553 cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
554 reduction * (1 * TensorOpCost::AddCost<Index>()));
555 } else {
556 cost += TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
557 2 * TensorOpCost::MulCost<Index>() +
558 reduction * (2 * TensorOpCost::MulCost<Index>() +
559 1 * TensorOpCost::DivCost<Index>()));
560 }
561 }
562
563 protected:
564
566 {
567 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
568 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
569
570 const Index initialIndex = index;
571 Index inputIndex = 0;
573 for (int i = NumDims - 1; i > 0; --i) {
574 const Index firstIdx = index;
575 const Index lastIdx = index + PacketSize - 1;
576 const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
577 const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
578 const Index lastPaddedRight = m_outputStrides[i+1];
579
580 if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
581 // all the coefficient are in the padding zone.
582 return internal::pset1<PacketReturnType>(m_paddingValue);
583 }
584 else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
585 // all the coefficient are in the padding zone.
586 return internal::pset1<PacketReturnType>(m_paddingValue);
587 }
588 else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
589 // all the coefficient are between the 2 padding zones.
590 const Index idx = index / m_outputStrides[i];
591 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
592 index -= idx * m_outputStrides[i];
593 }
594 else {
595 // Every other case
596 return packetWithPossibleZero(initialIndex);
597 }
598 }
599
600 const Index lastIdx = index + PacketSize - 1;
601 const Index firstIdx = index;
602 const Index lastPaddedLeft = m_padding[0].first;
603 const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
604 const Index lastPaddedRight = m_outputStrides[1];
605
606 if (!isLeftPaddingCompileTimeZero(0) && lastIdx < lastPaddedLeft) {
607 // all the coefficient are in the padding zone.
608 return internal::pset1<PacketReturnType>(m_paddingValue);
609 }
610 else if (!isRightPaddingCompileTimeZero(0) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
611 // all the coefficient are in the padding zone.
612 return internal::pset1<PacketReturnType>(m_paddingValue);
613 }
614 else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
615 // all the coefficient are between the 2 padding zones.
616 inputIndex += (index - m_padding[0].first);
617 return m_impl.template packet<Unaligned>(inputIndex);
618 }
619 // Every other case
620 return packetWithPossibleZero(initialIndex);
621 }
622
624 {
625 EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
626 eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
627
628 const Index initialIndex = index;
629 Index inputIndex = 0;
631 for (int i = 0; i < NumDims - 1; ++i) {
632 const Index firstIdx = index;
633 const Index lastIdx = index + PacketSize - 1;
634 const Index lastPaddedLeft = m_padding[i].first * m_outputStrides[i+1];
635 const Index firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i+1];
636 const Index lastPaddedRight = m_outputStrides[i];
637
638 if (!isLeftPaddingCompileTimeZero(i) && lastIdx < lastPaddedLeft) {
639 // all the coefficient are in the padding zone.
640 return internal::pset1<PacketReturnType>(m_paddingValue);
641 }
642 else if (!isRightPaddingCompileTimeZero(i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
643 // all the coefficient are in the padding zone.
644 return internal::pset1<PacketReturnType>(m_paddingValue);
645 }
646 else if ((isLeftPaddingCompileTimeZero(i) && isRightPaddingCompileTimeZero(i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
647 // all the coefficient are between the 2 padding zones.
648 const Index idx = index / m_outputStrides[i+1];
649 inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
650 index -= idx * m_outputStrides[i+1];
651 }
652 else {
653 // Every other case
654 return packetWithPossibleZero(initialIndex);
655 }
656 }
657
658 const Index lastIdx = index + PacketSize - 1;
659 const Index firstIdx = index;
660 const Index lastPaddedLeft = m_padding[NumDims-1].first;
661 const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
662 const Index lastPaddedRight = m_outputStrides[NumDims-1];
663
664 if (!isLeftPaddingCompileTimeZero(NumDims-1) && lastIdx < lastPaddedLeft) {
665 // all the coefficient are in the padding zone.
666 return internal::pset1<PacketReturnType>(m_paddingValue);
667 }
668 else if (!isRightPaddingCompileTimeZero(NumDims-1) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
669 // all the coefficient are in the padding zone.
670 return internal::pset1<PacketReturnType>(m_paddingValue);
671 }
672 else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
673 // all the coefficient are between the 2 padding zones.
674 inputIndex += (index - m_padding[NumDims-1].first);
675 return m_impl.template packet<Unaligned>(inputIndex);
676 }
677 // Every other case
678 return packetWithPossibleZero(initialIndex);
679 }
680
682 {
685 for (int i = 0; i < PacketSize; ++i) {
686 values[i] = coeff(index+i);
687 }
689 return rslt;
690 }
691
696 PaddingDimensions m_padding;
697
699
701};
702
703
704
705
706} // end namespace Eigen
707
708#endif // EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
int i
Definition BiCGSTAB_step_by_step.cpp:9
#define EIGEN_ALIGN_MAX
Definition ConfigureVectorization.h:157
#define EIGEN_ALWAYS_INLINE
Definition Macros.h:932
#define EIGEN_UNROLL_LOOP
Definition Macros.h:1461
#define EIGEN_UNUSED_VARIABLE(var)
Definition Macros.h:1076
#define EIGEN_DEVICE_FUNC
Definition Macros.h:976
#define eigen_assert(x)
Definition Macros.h:1037
#define EIGEN_STRONG_INLINE
Definition Macros.h:917
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
Definition StaticAssert.h:127
#define EIGEN_DEVICE_REF
Definition TensorMacros.h:50
Scalar Scalar int size
Definition benchVecAdd.cpp:17
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition CwiseBinaryOp.h:84
The tensor base class.
Definition TensorBase.h:973
Definition TensorCostModel.h:25
Definition TensorPadding.h:55
XprType::CoeffReturnType CoeffReturnType
Definition TensorPadding.h:59
Eigen::internal::traits< TensorPaddingOp >::Index Index
Definition TensorPadding.h:62
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorPaddingOp(const XprType &expr, const PaddingDimensions &padding_dims, const Scalar padding_value)
Definition TensorPadding.h:64
const PaddingDimensions m_padding_dims
Definition TensorPadding.h:78
Eigen::internal::traits< TensorPaddingOp >::StorageKind StorageKind
Definition TensorPadding.h:61
XprType::Nested m_xpr
Definition TensorPadding.h:77
Eigen::NumTraits< Scalar >::Real RealScalar
Definition TensorPadding.h:58
const Scalar m_padding_value
Definition TensorPadding.h:79
EIGEN_DEVICE_FUNC const PaddingDimensions & padding() const
Definition TensorPadding.h:68
Eigen::internal::traits< TensorPaddingOp >::Scalar Scalar
Definition TensorPadding.h:57
Eigen::internal::nested< TensorPaddingOp >::type Nested
Definition TensorPadding.h:60
EIGEN_DEVICE_FUNC const internal::remove_all< typenameXprType::Nested >::type & expression() const
Definition TensorPadding.h:74
EIGEN_DEVICE_FUNC Scalar padding_value() const
Definition TensorPadding.h:70
Definition EmulateArray.h:21
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::size_t size()
Definition EmulateArray.h:44
IndexType size() const
Definition TensorBlock.h:301
IndexType offset() const
Definition TensorBlock.h:298
IndexType dimension(int index) const
Definition TensorBlock.h:300
const Dimensions & dimensions() const
Definition TensorBlock.h:299
Definition TensorBlock.h:656
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
Definition gnuplot_common_settings.hh:64
@ ColMajor
Definition Constants.h:319
@ kView
Definition TensorBlock.h:596
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
Definition MathFunctions.h:1091
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
Definition MathFunctions.h:1083
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE internal::enable_if< NumTraits< T >::IsSigned||NumTraits< T >::IsComplex, typenameNumTraits< T >::Real >::type abs(const T &x)
Definition MathFunctions.h:1509
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
Definition BandTriangularSolver.h:13
uint8_t count
Definition ref_serial.h:256
real function second()
SECOND returns nothing
Definition second_NONE.f:39
Definition TensorDimensions.h:263
Definition Constants.h:507
Definition TensorMeta.h:50
Definition TensorForwardDeclarations.h:37
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetRowMajor(Index index) const
Definition TensorPadding.h:623
TensorEvaluator< ArgType, Device > m_impl
Definition TensorPadding.h:695
TensorPaddingOp< PaddingDimensions, ArgType > XprType
Definition TensorPadding.h:87
EIGEN_STRONG_INLINE void cleanup()
Definition TensorPadding.h:167
const Device EIGEN_DEVICE_REF m_device
Definition TensorPadding.h:700
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
Definition TensorPadding.h:239
array< Index, NumDims > m_inputStrides
Definition TensorPadding.h:694
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition TensorPadding.h:93
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const
Definition TensorPadding.h:484
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const
Definition TensorPadding.h:231
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition TensorPadding.h:171
internal::remove_const< Scalar >::type ScalarNoConst
Definition TensorPadding.h:108
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType)
Definition TensorPadding.h:154
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
Definition TensorPadding.h:112
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition TensorPadding.h:216
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition TensorPadding.h:119
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetColMajor(Index index) const
Definition TensorPadding.h:565
StorageMemory< CoeffReturnType, Device > Storage
Definition TensorPadding.h:95
array< Index, NumDims+1 > m_outputStrides
Definition TensorPadding.h:693
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition TensorPadding.h:152
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
Definition TensorPadding.h:111
XprType::CoeffReturnType CoeffReturnType
Definition TensorPadding.h:92
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
Definition TensorPadding.h:681
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock
Definition TensorPadding.h:116
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
Definition TensorPadding.h:208
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition TensorEvaluator.h:29
const Device EIGEN_DEVICE_REF m_device
Definition TensorEvaluator.h:192
Storage::Type EvaluatorPointerType
Definition TensorEvaluator.h:39
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition TensorEvaluator.h:73
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition TensorEvaluator.h:94
@ BlockAccess
Definition TensorEvaluator.h:48
@ PreferBlockAccess
Definition TensorEvaluator.h:49
@ PacketAccess
Definition TensorEvaluator.h:47
@ Layout
Definition TensorEvaluator.h:50
@ IsAligned
Definition TensorEvaluator.h:46
Derived::Index Index
Definition TensorEvaluator.h:30
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
Definition TensorEvaluator.h:63
Derived::Dimensions Dimensions
Definition TensorEvaluator.h:34
static const int PacketSize
Definition TensorEvaluator.h:36
internal::remove_const< Scalar >::type ScalarNoConst
Definition TensorEvaluator.h:55
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlockResourceRequirements merge(const TensorBlockResourceRequirements &lhs, const TensorBlockResourceRequirements &rhs)
Definition TensorBlock.h:138
Definition Meta.h:445
const TensorPaddingOp< PaddingDimensions, XprType > & type
Definition TensorPadding.h:40
Definition XprHelper.h:332
Definition TensorTraits.h:175
XprTraits::StorageKind StorageKind
Definition TensorPadding.h:28
traits< XprType > XprTraits
Definition TensorPadding.h:27
XprTraits::PointerType PointerType
Definition TensorPadding.h:34
remove_reference< Nested >::type _Nested
Definition TensorPadding.h:31
Definition ForwardDeclarations.h:17
std::ofstream out("Result.txt")
std::ptrdiff_t j
Definition tut_arithmetic_redux_minmax.cpp:2