TR-mbed/TensorContraction_8h_source.html

// This file is part of Eigen, a lightweight C++ template library

// for linear algebra.

//

// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>

//

// This Source Code Form is subject to the terms of the Mozilla

// Public License v. 2.0. If a copy of the MPL was not distributed

// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.


#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H

#define EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H


namespace Eigen {


namespace internal {


template<typename Dimensions, typename LhsXprType, typename RhsXprType, typename OutputKernelType>


struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType, OutputKernelType> >

{

  // Type promotion to handle the case where the types of the lhs and the rhs are different.

  typedef typename gebp_traits<typename remove_const<typename LhsXprType::Scalar>::type,

                               typename remove_const<typename RhsXprType::Scalar>::type>::ResScalar Scalar;


  typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,

                                        typename traits<RhsXprType>::StorageKind>::ret StorageKind;

  typedef typename promote_index_type<typename traits<LhsXprType>::Index,

                                      typename traits<RhsXprType>::Index>::type Index;

  typedef typename LhsXprType::Nested LhsNested;

  typedef typename RhsXprType::Nested RhsNested;

  typedef typename remove_reference<LhsNested>::type _LhsNested;

  typedef typename remove_reference<RhsNested>::type _RhsNested;


  // From NumDims below.

  static const int NumDimensions = traits<LhsXprType>::NumDimensions + traits<RhsXprType>::NumDimensions - 2 * array_size<Dimensions>::value;

  static const int Layout = traits<LhsXprType>::Layout;

  typedef typename conditional<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,

                               typename traits<LhsXprType>::PointerType,

                               typename traits<RhsXprType>::PointerType>::type

      PointerType;


  enum {

    Flags = 0

  };

};


template<typename Dimensions, typename LhsXprType, typename RhsXprType, typename OutputKernelType>


struct eval<TensorContractionOp<Dimensions, LhsXprType, RhsXprType, OutputKernelType>, Eigen::Dense>

{

  typedef const TensorContractionOp<Dimensions, LhsXprType, RhsXprType, OutputKernelType>& type;

};


template<typename Dimensions, typename LhsXprType, typename RhsXprType, typename OutputKernelType>


struct nested<TensorContractionOp<Dimensions, LhsXprType, RhsXprType, OutputKernelType>, 1, typename eval<TensorContractionOp<Dimensions, LhsXprType, RhsXprType, OutputKernelType> >::type>

{

  typedef TensorContractionOp<Dimensions, LhsXprType, RhsXprType, OutputKernelType> type;

};


template<typename Indices_, typename LeftArgType_, typename RightArgType_, typename OutputKernelType_, typename Device_>


struct traits<TensorEvaluator<const TensorContractionOp<Indices_, LeftArgType_, RightArgType_, OutputKernelType_>, Device_> > {

  typedef Indices_ Indices;

  typedef LeftArgType_ LeftArgType;

  typedef RightArgType_ RightArgType;

  typedef OutputKernelType_ OutputKernelType;

  typedef Device_ Device;


  // From NumDims below.

  static const int NumDimensions = traits<LeftArgType_>::NumDimensions + traits<RightArgType_>::NumDimensions - 2 * array_size<Indices_>::value;

};


// Helper class to allocate and deallocate temporary memory for packed buffers.

template <typename LhsScalar, typename RhsScalar>


struct TensorContractionBlockMemAllocator {

  typedef void* BlockMemHandle;


  template <typename Device>


  EIGEN_DEVICE_FUNC static BlockMemHandle allocate(Device& d, const Index bm,

                                                   const Index bk,

                                                   const Index bn,

                                                   LhsScalar** lhs_block,

                                                   RhsScalar** rhs_block) {

    eigen_assert(lhs_block);

    eigen_assert(rhs_block);

    BlockSizes sz = ComputeLhsRhsBlockSizes(bm, bk, bn);

    char* block_mem = static_cast<char*>(d.allocate(sz.lhs_size + sz.rhs_size));

    eigen_assert(block_mem);

    *lhs_block = reinterpret_cast<LhsScalar*>(block_mem);

    *rhs_block = reinterpret_cast<RhsScalar*>(block_mem + sz.lhs_size);

    return block_mem;

  }


  template <typename Device>


  EIGEN_DEVICE_FUNC static BlockMemHandle allocateSlices(

      Device& d, const Index bm, const Index bk, const Index bn,

      const Index num_lhs, const Index num_rhs, const Index num_slices,

      std::vector<LhsScalar*>* lhs_blocks,

      std::vector<RhsScalar*>* rhs_blocks) {

    eigen_assert(num_slices > 0);

    eigen_assert(num_lhs >= 0 && num_rhs >= 0);

    eigen_assert(num_lhs == 0 || lhs_blocks);

    eigen_assert(num_rhs == 0 || rhs_blocks);

    BlockSizes sz = ComputeLhsRhsBlockSizes(bm, bk, bn);

    void* block_mem = d.allocate(

        (num_lhs * sz.lhs_size + num_rhs * sz.rhs_size) * num_slices);

    eigen_assert(block_mem);

    char* mem = static_cast<char*>(block_mem);


    for (Index x = 0; x < num_slices; x++) {

      if (num_lhs > 0) lhs_blocks[x].resize(num_lhs);

      for (Index m = 0; m < num_lhs; m++) {

        lhs_blocks[x][m] = reinterpret_cast<LhsScalar*>(mem);

        mem += sz.lhs_size;

      }

      if (num_rhs > 0) rhs_blocks[x].resize(num_rhs);

      for (Index n = 0; n < num_rhs; n++) {

        rhs_blocks[x][n] = reinterpret_cast<RhsScalar*>(mem);

        mem += sz.rhs_size;

      }

    }


    return block_mem;

  }


  template <typename Device>


  EIGEN_DEVICE_FUNC static void deallocate(Device& d, BlockMemHandle handle) {

    d.deallocate(handle);

  }


 private:

  struct BlockSizes {

    Index lhs_size;

    Index rhs_size;

  };

  EIGEN_DEVICE_FUNC static BlockSizes ComputeLhsRhsBlockSizes(const Index bm,

                                                              const Index bk,

                                                              const Index bn) {

    Index align = numext::maxi(EIGEN_MAX_ALIGN_BYTES, 1);

    BlockSizes sz;

    sz.lhs_size = divup<Index>(bm * bk * sizeof(LhsScalar), align) * align;

    sz.rhs_size = divup<Index>(bn * bk * sizeof(RhsScalar), align) * align;

    return sz;

  }

};


// WARNING: In this code we assume that Lhs and Rhs tensor expressions are in

// ColMajor storage order. This property is guaranteed by the

// TensorContractionOp evaluator. TensorContractionKernel specifies how we pack

// blocks of Lhs and Rhs tensor expressions, and how we invoke matrix

// multiplication for these blocks. Default tensor contraction uses

// gemm_pack_rhs, gemm_pack_lhs and gebp_kernel from Eigen Core (see

// GeneralBlocPanelKernel.h for details).

//

// By specializing contraction kernels we can use other low level libraries to

// perform matrix multiplication, and still rely on Eigen contraction evaluator.

// This also includes full support in TensorContractionThreadPool, assuming that

// underlying gemm do not use it's own threading.

//

// - ResScalar/LhsScalar/RhsScalar - scalar type for the result of

//   multiplication, lhs tensor and rhs tensor respectively.

//

// - StorageIndex - index type for the tensor expressions. In practice almost

//   always is Eigen::Index.

//

// - OutputMapper provides access to the memory of the output matrix. In

//   practice it's always column major blas_data_mapper (it must be of ResScalar

//   type).

//

// - LhsMapper/RhsMapper similarly to blas_data_mapper provide a two dimensional

//   view into the Lhs/Rhs tensor expressions. In practice it's

//   TensorContractionInputMapper, or some specialization of it based on the

//   type of tensor expression (e.g. TensorImagePatchOp has optimized input

//   mapper).

template <typename ResScalar, typename LhsScalar, typename RhsScalar,

    typename StorageIndex, typename OutputMapper, typename LhsMapper,

    typename RhsMapper>


struct TensorContractionKernel {

  // True if `invoke()` supports `beta` in `C <- alpha * A * B + beta * C`

  // (otherwise beta should be always equal to 1).

  enum { HasBeta = false };


  EIGEN_DEVICE_FUNC


  TensorContractionKernel(StorageIndex m_, StorageIndex k_, StorageIndex n_,

                          StorageIndex bm_, StorageIndex bk_, StorageIndex bn_)

      : m(m_), k(k_), n(n_), bm(bm_), bk(bk_), bn(bn_) {}


  // Pack blocks of Lhs and Rhs into contiguous blocks in memory.

  typedef LhsScalar* LhsBlock;

  typedef RhsScalar* RhsBlock;


  // Packed Lhs/Rhs block memory allocator.

  typedef TensorContractionBlockMemAllocator<LhsScalar, RhsScalar>

      BlockMemAllocator;

  typedef typename BlockMemAllocator::BlockMemHandle BlockMemHandle;


  typedef typename internal::gebp_traits<LhsScalar, RhsScalar> Traits;


  typedef internal::gemm_pack_lhs<

      LhsScalar, StorageIndex, typename LhsMapper::SubMapper, Traits::mr,

      Traits::LhsProgress, typename Traits::LhsPacket4Packing, ColMajor>

      LhsPacker;


  typedef internal::gemm_pack_rhs<RhsScalar, StorageIndex,

                                  typename RhsMapper::SubMapper, Traits::nr,

                                  ColMajor>

      RhsPacker;


  typedef internal::gebp_kernel<LhsScalar, RhsScalar, StorageIndex,

                                OutputMapper, Traits::mr, Traits::nr,

      /*ConjugateLhs*/ false, /*ConjugateRhs*/ false>

      GebpKernel;


  template <typename Device>


  EIGEN_DEVICE_FUNC BlockMemHandle allocate(Device& d, LhsBlock* lhs_block,

                                            RhsBlock* rhs_block) {

    return BlockMemAllocator::allocate(d, bm, bk, bn, lhs_block, rhs_block);

  }


  template <typename Device>


  EIGEN_DEVICE_FUNC BlockMemHandle allocateSlices(

      Device& d, const StorageIndex num_lhs, const StorageIndex num_rhs,

      const StorageIndex num_slices, std::vector<LhsBlock>* lhs_blocks,

      std::vector<RhsBlock>* rhs_blocks) {

    return BlockMemAllocator::allocateSlices(

        d, bm, bk, bn, num_lhs, num_rhs, num_slices, lhs_blocks, rhs_blocks);

  }


  template <typename Device>


  EIGEN_DEVICE_FUNC static void deallocate(Device& d, BlockMemHandle handle) {

    BlockMemAllocator::deallocate(d, handle);

  }


  EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void packLhs(

      LhsBlock* lhsBlock, const typename LhsMapper::SubMapper& data_mapper,

      const StorageIndex depth, const StorageIndex rows) {

    LhsPacker()(*lhsBlock, data_mapper, depth, rows, /*stride*/ 0,

        /*offset*/ 0);

  }


  EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void packRhs(

      RhsBlock* rhsBlock, const typename RhsMapper::SubMapper& data_mapper,

      const StorageIndex depth, const StorageIndex cols) {

    RhsPacker()(*rhsBlock, data_mapper, depth, cols);

  }


  EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void invoke(

      const OutputMapper& output_mapper, const LhsBlock& lhsBlock,

      const RhsBlock& rhsBlock, const StorageIndex rows,

      const StorageIndex depth, const StorageIndex cols,

      const ResScalar alpha, const ResScalar beta) {

    // Default GEBP kernel does not support beta.

    eigen_assert(beta == ResScalar(1));

    static const int kComputeStrideFromBlockDimensions = -1;

    GebpKernel()(output_mapper, lhsBlock, rhsBlock, rows, depth, cols, alpha,

        /*strideA*/ kComputeStrideFromBlockDimensions,

        /*strideB*/ kComputeStrideFromBlockDimensions,

        /*offsetA*/ 0, /*offsetB*/ 0);

  }


 private:

  // These are dimensions of the original Tensors, and selected block sizes. The

  // actual block sizes passed to all function above might be smaller because of

  // the partial blocks at the end.

  const StorageIndex m;

  const StorageIndex k;

  const StorageIndex n;

  const StorageIndex bm;

  const StorageIndex bk;

  const StorageIndex bn;

};


}  // end namespace internal


// Tensor contraction params that should enable to get from output matrix

// 2-dimensional coordinates to the output tensor dimensions.


struct TensorContractionParams {

  // TensorContraction evaluator assumes that both tensors are in ColMajor

  // layout, if tensors are in RowMajor evaluator swap lhs with rhs.

  bool swapped_arguments;

};


// Output kernel allows to fuse operations into the tensor contraction.

//

// Examples:

//   1. Elementwise Relu transformation following Conv2D.

//   2. AddBias to the Conv2D output channels dimension.

//

// The NoOpOutputKernel implements an output kernel that does absolutely nothing.


struct NoOpOutputKernel {

  template <typename Index, typename Scalar>


  EIGEN_ALWAYS_INLINE void operator()(

      const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper,

      const TensorContractionParams& params, Index i,

      Index j, Index num_rows, Index num_cols) const {

    EIGEN_UNUSED_VARIABLE(output_mapper);

    EIGEN_UNUSED_VARIABLE(params);

    EIGEN_UNUSED_VARIABLE(i);

    EIGEN_UNUSED_VARIABLE(j);

    EIGEN_UNUSED_VARIABLE(num_rows);

    EIGEN_UNUSED_VARIABLE(num_cols);

  }


};


template<typename Indices, typename LhsXprType, typename RhsXprType, typename OutputKernelType = const NoOpOutputKernel>


class TensorContractionOp : public TensorBase<TensorContractionOp<Indices, LhsXprType, RhsXprType, OutputKernelType>, ReadOnlyAccessors>

{

  public:

  typedef typename Eigen::internal::traits<TensorContractionOp>::Scalar Scalar;

  typedef typename internal::gebp_traits<typename LhsXprType::CoeffReturnType,

                                         typename RhsXprType::CoeffReturnType>::ResScalar CoeffReturnType;

  typedef typename Eigen::internal::nested<TensorContractionOp>::type Nested;

  typedef typename Eigen::internal::traits<TensorContractionOp>::StorageKind StorageKind;

  typedef typename Eigen::internal::traits<TensorContractionOp>::Index Index;


  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionOp(

      const LhsXprType& lhs, const RhsXprType& rhs, const Indices& dims,

      const OutputKernelType& output_kernel = OutputKernelType())

      : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_indices(dims),

        m_output_kernel(output_kernel) {}


  EIGEN_DEVICE_FUNC

  const Indices& indices() const { return m_indices; }


  EIGEN_DEVICE_FUNC

  const typename internal::remove_all<typename LhsXprType::Nested>::type&

  lhsExpression() const { return m_lhs_xpr; }


  EIGEN_DEVICE_FUNC

  const typename internal::remove_all<typename RhsXprType::Nested>::type&

  rhsExpression() const { return m_rhs_xpr; }


  EIGEN_DEVICE_FUNC

  const OutputKernelType& outputKernel() const { return m_output_kernel; }


  protected:

    typename LhsXprType::Nested m_lhs_xpr;

    typename RhsXprType::Nested m_rhs_xpr;

    const Indices m_indices;

    const OutputKernelType m_output_kernel;

};


template<typename Derived>


struct TensorContractionEvaluatorBase : internal::no_assignment_operator

{

  typedef typename internal::traits<Derived>::Indices Indices;

  typedef typename internal::traits<Derived>::LeftArgType LeftArgType;

  typedef typename internal::traits<Derived>::RightArgType RightArgType;

  typedef typename internal::traits<Derived>::OutputKernelType OutputKernelType;

  typedef typename internal::traits<Derived>::Device Device;


  typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;

  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;

  typedef typename XprType::Index Index;

  typedef typename XprType::CoeffReturnType CoeffReturnType;

  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;

  typedef StorageMemory<Scalar, Device> Storage;

  typedef typename Storage::Type EvaluatorPointerType;


  enum {

    IsAligned         = true,

    PacketAccess      = (PacketType<CoeffReturnType, Device>::size > 1),

    BlockAccess       = false,

    PreferBlockAccess = false,

    Layout            = TensorEvaluator<LeftArgType, Device>::Layout,

    CoordAccess       = false,  // to be implemented

    RawAccess         = true

  };


  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//

  typedef internal::TensorBlockNotImplemented TensorBlock;

  //===--------------------------------------------------------------------===//


  // Most of the code is assuming that both input tensors are ColMajor. If the

  // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:

  // If we want to compute A * B = C, where A is LHS and B is RHS, the code

  // will pretend B is LHS and A is RHS.

  typedef typename internal::conditional<

    static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;

  typedef typename internal::conditional<

    static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;


  typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluatorType;

  typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluatorType;


  static const int LDims =

      internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;

  static const int RDims =

      internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;

  static const int ContractDims = internal::array_size<Indices>::value;

  static const int NumDims = LDims + RDims - 2 * ContractDims;


  typedef array<Index, ContractDims> contract_t;

  typedef array<Index, LDims - ContractDims> left_nocontract_t;

  typedef array<Index, RDims - ContractDims> right_nocontract_t;


  typedef DSizes<Index, NumDims> Dimensions;


  EIGEN_STRONG_INLINE


  TensorContractionEvaluatorBase(const XprType& op, const Device& device)

      : m_leftImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(),

                          op.lhsExpression(), op.rhsExpression()), device),

        m_rightImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(),

                           op.rhsExpression(), op.lhsExpression()), device),

        m_device(device),

        m_output_kernel(op.outputKernel()),

        m_result(NULL) {

    EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) ==

         static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)),

                        YOU_MADE_A_PROGRAMMING_MISTAKE);


    DSizes<Index, LDims> eval_left_dims;

    DSizes<Index, RDims> eval_right_dims;

    array<IndexPair<Index>, ContractDims> eval_op_indices;

    if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {

      // For ColMajor, we keep using the existing dimensions

      for (int i = 0; i < LDims; i++) {

        eval_left_dims[i] = m_leftImpl.dimensions()[i];

      }

      for (int i = 0; i < RDims; i++) {

        eval_right_dims[i] = m_rightImpl.dimensions()[i];

      }

      // We keep the pairs of contracting indices.

      for (int i = 0; i < ContractDims; i++) {

        eval_op_indices[i].first = op.indices()[i].first;

        eval_op_indices[i].second = op.indices()[i].second;

      }

    } else {

      // For RowMajor, we need to reverse the existing dimensions

      for (int i = 0; i < LDims; i++) {

        eval_left_dims[i] = m_leftImpl.dimensions()[LDims - i - 1];

      }

      for (int i = 0; i < RDims; i++) {

        eval_right_dims[i] = m_rightImpl.dimensions()[RDims - i - 1];

      }

      // We need to flip all the pairs of contracting indices as well as

      // reversing the dimensions.

      for (int i = 0; i < ContractDims; i++) {

        eval_op_indices[i].first = LDims - 1 - op.indices()[ContractDims - 1 - i].second;

        eval_op_indices[i].second = RDims - 1 - op.indices()[ContractDims - 1 - i].first;

      }

    }


    // Check for duplicate axes and make sure the first index in eval_op_indices

    // is increasing. Using O(n^2) sorting is OK since ContractDims is small

    for (int i = 0; i < ContractDims; i++) {

      for (int j = i + 1; j < ContractDims; j++) {

        eigen_assert(eval_op_indices[j].first != eval_op_indices[i].first &&

                     eval_op_indices[j].second != eval_op_indices[i].second &&

                     "contraction axes should be unique");

        if (eval_op_indices[j].first < eval_op_indices[i].first) {

          numext::swap(eval_op_indices[j], eval_op_indices[i]);

        }

      }

    }


    array<Index, LDims> lhs_strides;

    lhs_strides[0] = 1;

    for (int i = 0; i < LDims-1; ++i) {

      lhs_strides[i+1] = lhs_strides[i] * eval_left_dims[i];

    }


    array<Index, RDims> rhs_strides;

    rhs_strides[0] = 1;

    for (int i = 0; i < RDims-1; ++i) {

      rhs_strides[i+1] = rhs_strides[i] * eval_right_dims[i];

    }


    if (m_i_strides.size() > 0) m_i_strides[0] = 1;

    if (m_j_strides.size() > 0) m_j_strides[0] = 1;

    if (m_k_strides.size() > 0) m_k_strides[0] = 1;


    m_i_size = 1;

    m_j_size = 1;

    m_k_size = 1;


    // To compute the dimension, we simply concatenate the non-contracting

    // dimensions of the left and then the right tensor. Additionally, we also

    // compute the strides corresponding to the left non-contracting

    // dimensions and right non-contracting dimensions.

    m_lhs_inner_dim_contiguous = true;

    int dim_idx = 0;

    Index nocontract_idx = 0;


    for (int i = 0; i < LDims; i++) {

      // find if we are contracting on index i of left tensor

      bool contracting = false;

      for (int j = 0; j < ContractDims; j++) {

        if (eval_op_indices[j].first == i) {

          contracting = true;

          break;

        }

      }

      if (!contracting) {

        // add dimension size to output dimensions

        m_dimensions[dim_idx] = eval_left_dims[i];

        m_left_nocontract_strides[nocontract_idx] = lhs_strides[i];

        if (dim_idx != i) {

          m_lhs_inner_dim_contiguous = false;

        }

        if (nocontract_idx+1 < internal::array_size<left_nocontract_t>::value) {

          m_i_strides[nocontract_idx+1] =

              m_i_strides[nocontract_idx] * eval_left_dims[i];

        } else {

          m_i_size = m_i_strides[nocontract_idx] * eval_left_dims[i];

        }

        dim_idx++;

        nocontract_idx++;

      }

    }


    nocontract_idx = 0;

    for (int i = 0; i < RDims; i++) {

      bool contracting = false;

      // find if we are contracting on index i of right tensor

      for (int j = 0; j < ContractDims; j++) {

        if (eval_op_indices[j].second == i) {

          contracting = true;

          break;

        }

      }

      if (!contracting) {

        m_dimensions[dim_idx] = eval_right_dims[i];

        if (nocontract_idx+1 < internal::array_size<right_nocontract_t>::value) {

          m_j_strides[nocontract_idx+1] =

              m_j_strides[nocontract_idx] * eval_right_dims[i];

        } else {

          m_j_size = m_j_strides[nocontract_idx] * eval_right_dims[i];

        }

        m_right_nocontract_strides[nocontract_idx] = rhs_strides[i];

        dim_idx++;

        nocontract_idx++;

      }

    }


    // Now compute the strides corresponding to the contracting dimensions. We

    // assumed above that non-contracting axes are represented in the same order

    // in the matrix as they are in the tensor. This is not the case for

    // contracting axes. As the contracting axes must be of the same size in

    // each tensor, we'll only look at the first tensor here.

    m_rhs_inner_dim_contiguous = true;

    m_rhs_inner_dim_reordered = false;

    for (int i = 0; i < ContractDims; i++) {

      Index left = eval_op_indices[i].first;

      Index right = eval_op_indices[i].second;


      Index size = eval_left_dims[left];

      eigen_assert(size == eval_right_dims[right] &&

                   "Contraction axes must be same size");


      if (i+1 < static_cast<int>(internal::array_size<contract_t>::value)) {

        m_k_strides[i+1] = m_k_strides[i] * size;

      } else {

        m_k_size = m_k_strides[i] * size;

      }

      m_left_contracting_strides[i] = lhs_strides[left];

      m_right_contracting_strides[i] = rhs_strides[right];


      if (i > 0 && right < eval_op_indices[i-1].second) {

        m_rhs_inner_dim_reordered = true;

      }

      if (right != i) {

        m_rhs_inner_dim_contiguous = false;

      }

    }


    // If the layout is RowMajor, we need to reverse the m_dimensions

    if (static_cast<int>(Layout) == static_cast<int>(RowMajor)) {

      for (int i = 0, j = NumDims - 1; i < j; i++, j--) {

        numext::swap(m_dimensions[i], m_dimensions[j]);

      }

    }


    // A set of parameters that will allow output kernel to get from output

    // tensor dimensions (i, j) into the original tensor dimensions.

    // TODO(ezhulenev): Add parameters required to infer output tensor index for

    // more complex contractions than 2x2 on internal dimension.

    m_tensor_contraction_params.swapped_arguments = static_cast<int>(Layout) == RowMajor;

  }


  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }


  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {

    m_leftImpl.evalSubExprsIfNeeded(NULL);

    m_rightImpl.evalSubExprsIfNeeded(NULL);

    if (data) {

      evalTo(data);

      return false;

    } else {

      m_result = static_cast<EvaluatorPointerType>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));

      evalTo(m_result);

      return true;

    }

  }


#ifdef EIGEN_USE_THREADS

  template <typename EvalSubExprsCallback>

  EIGEN_STRONG_INLINE void evalSubExprsIfNeededAsync(

      EvaluatorPointerType dest, EvalSubExprsCallback done) {

    m_leftImpl.evalSubExprsIfNeededAsync(nullptr, [this, done, dest](bool) {

      m_rightImpl.evalSubExprsIfNeededAsync(nullptr, [this, done, dest](bool) {

        if (dest) {

          evalToAsync(dest, [done]() { done(false); });

        } else {

          m_result = static_cast<EvaluatorPointerType>(

              m_device.allocate(dimensions().TotalSize() * sizeof(Scalar)));

          evalToAsync(m_result, [done]() { done(true); });

        }

      });

    });

  }

#endif  // EIGEN_USE_THREADS


#ifndef TENSOR_CONTRACTION_DISPATCH


#define TENSOR_CONTRACTION_DISPATCH(METHOD, ALIGNMENT, ARGS) \

  if (this->m_lhs_inner_dim_contiguous) {                    \

    if (this->m_rhs_inner_dim_contiguous) {                  \

      if (this->m_rhs_inner_dim_reordered) {                 \

        METHOD<true, true, true, ALIGNMENT> ARGS;            \

      } else {                                               \

        METHOD<true, true, false, ALIGNMENT> ARGS;           \

      }                                                      \

    } else {                                                 \

      if (this->m_rhs_inner_dim_reordered) {                 \

        METHOD<true, false, true, ALIGNMENT> ARGS;           \

      } else {                                               \

        METHOD<true, false, false, ALIGNMENT> ARGS;          \

      }                                                      \

    }                                                        \

  } else {                                                   \

    if (this->m_rhs_inner_dim_contiguous) {                  \

      if (this->m_rhs_inner_dim_reordered) {                 \

        METHOD<false, true, true, ALIGNMENT> ARGS;           \

      } else {                                               \

        METHOD<false, true, false, ALIGNMENT> ARGS;          \

      }                                                      \

    } else {                                                 \

      if (this->m_rhs_inner_dim_reordered) {                 \

        METHOD<false, false, true, ALIGNMENT> ARGS;          \

      } else {                                               \

        METHOD<false, false, false, ALIGNMENT> ARGS;         \

      }                                                      \

    }                                                        \

  }


#endif


#ifndef TENSOR_CONTRACTION_ASYNC_DISPATCH


#define TENSOR_CONTRACTION_ASYNC_DISPATCH(METHOD, DONE, ALIGNMENT, ARGS, FN) \

  if (this->m_lhs_inner_dim_contiguous) {                                    \

    if (this->m_rhs_inner_dim_contiguous) {                                  \

      if (this->m_rhs_inner_dim_reordered) {                                 \

        (new METHOD<DONE, true, true, true, ALIGNMENT> ARGS)->FN;            \

      } else {                                                               \

        (new METHOD<DONE, true, true, false, ALIGNMENT> ARGS)->FN;           \

      }                                                                      \

    } else {                                                                 \

      if (this->m_rhs_inner_dim_reordered) {                                 \

        (new METHOD<DONE, true, false, true, ALIGNMENT> ARGS)->FN;           \

      } else {                                                               \

        (new METHOD<DONE, true, false, false, ALIGNMENT> ARGS)->FN;          \

      }                                                                      \

    }                                                                        \

  } else {                                                                   \

    if (this->m_rhs_inner_dim_contiguous) {                                  \

      if (this->m_rhs_inner_dim_reordered) {                                 \

        (new METHOD<DONE, false, true, true, ALIGNMENT> ARGS)->FN;           \

      } else {                                                               \

        (new METHOD<DONE, false, true, false, ALIGNMENT> ARGS)->FN;          \

      }                                                                      \

    } else {                                                                 \

      if (this->m_rhs_inner_dim_reordered) {                                 \

        (new METHOD<DONE, false, false, true, ALIGNMENT> ARGS)->FN;          \

      } else {                                                               \

        (new METHOD<DONE, false, false, false, ALIGNMENT> ARGS)->FN;         \

      }                                                                      \

    }                                                                        \

  }


#endif


  EIGEN_DEVICE_FUNC void evalTo(Scalar* buffer) const {

   static_cast<const Derived*>(this)->template evalProduct<Unaligned>(buffer);

  }


#ifdef EIGEN_USE_THREADS

  template <typename EvalToCallback>

  void evalToAsync(Scalar* buffer, EvalToCallback done) const {

    static_cast<const Derived*>(this)

        ->template evalProductAsync<EvalToCallback, Unaligned>(buffer,

                                                               std::move(done));

  }

#endif  // EIGEN_USE_THREADS


  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous,

            bool rhs_inner_dim_reordered, int Alignment>


  void evalProductSequential(Scalar* buffer) const {

    if (this->m_j_size == 1) {

      this->template evalGemv<lhs_inner_dim_contiguous,

                              rhs_inner_dim_contiguous, rhs_inner_dim_reordered,

                              Alignment>(buffer);

    } else {

      this->template evalGemm<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous,

                              rhs_inner_dim_reordered, Alignment>(buffer);

    }

  }


  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>

  #if !defined(EIGEN_HIPCC)

  EIGEN_DEVICE_FUNC

  #endif


  void evalGemv(Scalar* buffer) const {

    const Index rows = m_i_size;

    const Index cols = m_k_size;


    typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;

    typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;

    typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;

    typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;

    const Index lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size;

    const Index rhs_packet_size = internal::unpacket_traits<typename RightEvaluator::PacketReturnType>::size;

    const int lhs_alignment = LeftEvaluator::IsAligned ? Aligned : Unaligned;

    const int rhs_alignment = RightEvaluator::IsAligned ? Aligned : Unaligned;

    typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,

                                                   LeftEvaluator, left_nocontract_t,

                                                   contract_t, lhs_packet_size,

                                                   lhs_inner_dim_contiguous,

                                                   false, lhs_alignment> LhsMapper;


    typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs,

                                                   RightEvaluator, right_nocontract_t,

                                                   contract_t, rhs_packet_size,

                                                   rhs_inner_dim_contiguous,

                                                   rhs_inner_dim_reordered, rhs_alignment> RhsMapper;


    LhsMapper lhs(m_leftImpl, m_left_nocontract_strides, m_i_strides,

                  m_left_contracting_strides, m_k_strides);

    RhsMapper rhs(m_rightImpl, m_right_nocontract_strides, m_j_strides,

                  m_right_contracting_strides, m_k_strides);


    const Scalar alpha(1);

    const Index resIncr(1);


    // zero out the result buffer (which must be of size at least rows * sizeof(Scalar)

    m_device.memset(buffer, 0, rows * sizeof(Scalar));


    internal::general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,false,RhsScalar,RhsMapper,false>::run(

        rows, cols, lhs, rhs,

        buffer, resIncr, alpha);


    typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;

    m_output_kernel(OutputMapper(buffer, rows), m_tensor_contraction_params,

                    static_cast<Index>(0), static_cast<Index>(0), rows,

                    static_cast<Index>(1));

  }


  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>

  #if !defined(EIGEN_HIPCC)

  EIGEN_DEVICE_FUNC

  #endif


  void evalGemm(Scalar* buffer) const {

    // columns in left side, rows in right side

    const Index k = this->m_k_size;

    this->template evalGemmPartial<lhs_inner_dim_contiguous,

                                   rhs_inner_dim_contiguous,

                                   rhs_inner_dim_reordered,

                                   Alignment, true>(buffer, 0, k, 1);

  }


  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous,

      bool rhs_inner_dim_reordered, int Alignment>


  EIGEN_DEVICE_FUNC void evalGemmPartialWithoutOutputKernel(

      Scalar* buffer, Index k_start, Index k_end, int num_threads) const {

    evalGemmPartial<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous,

                    rhs_inner_dim_reordered, Alignment,

        /*use_output_kernel*/ false>(buffer, k_start, k_end,

                                     num_threads);

  }


  template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment, bool use_output_kernel>


  EIGEN_DEVICE_FUNC void evalGemmPartial(Scalar* buffer, Index k_start, Index k_end, int num_threads) const {

    eigen_assert(k_end >= k_start && k_start >= 0 && k_end <= this->m_k_size);

    // columns in slice on left side, rows on right side

    const Index k_slice = k_end - k_start;


    // rows in left side

    const Index m = this->m_i_size;


    // columns in right side

    const Index n = this->m_j_size;


    // define data mappers for Lhs and Rhs

    typedef typename internal::remove_const<typename EvalLeftArgType::Scalar>::type LhsScalar;

    typedef typename internal::remove_const<typename EvalRightArgType::Scalar>::type RhsScalar;


    typedef TensorEvaluator<EvalLeftArgType, Device> LeftEvaluator;

    typedef TensorEvaluator<EvalRightArgType, Device> RightEvaluator;


    const Index lhs_packet_size = internal::unpacket_traits<typename LeftEvaluator::PacketReturnType>::size;

    const Index rhs_packet_size = internal::unpacket_traits<typename RightEvaluator::PacketReturnType>::size;


    typedef internal::TensorContractionInputMapper<LhsScalar, Index, internal::Lhs,

                                                   LeftEvaluator, left_nocontract_t,

                                                   contract_t, lhs_packet_size,

                                                   lhs_inner_dim_contiguous,

                                                   false, Unaligned> LhsMapper;


    typedef internal::TensorContractionInputMapper<RhsScalar, Index, internal::Rhs,

                                                   RightEvaluator, right_nocontract_t,

                                                   contract_t, rhs_packet_size,

                                                   rhs_inner_dim_contiguous,

                                                   rhs_inner_dim_reordered, Unaligned> RhsMapper;


    typedef internal::blas_data_mapper<Scalar, Index, ColMajor> OutputMapper;


    typedef internal::TensorContractionKernel<

        Scalar, LhsScalar, RhsScalar, Index, OutputMapper, LhsMapper, RhsMapper>

        TensorContractionKernel;


    // initialize data mappers

    LhsMapper lhs(this->m_leftImpl, this->m_left_nocontract_strides, this->m_i_strides,

                  this->m_left_contracting_strides, this->m_k_strides);


    RhsMapper rhs(this->m_rightImpl, this->m_right_nocontract_strides, this->m_j_strides,

                  this->m_right_contracting_strides, this->m_k_strides);


    OutputMapper output(buffer, m);


    // Sizes of the blocks to load in cache. See the Goto paper for details.

    internal::TensorContractionBlocking<Scalar, LhsScalar, RhsScalar,

                                        Index, internal::ShardByCol>

        blocking(k_slice, m, n, num_threads);

    const Index kc = blocking.kc();

    const Index mc = numext::mini(m, blocking.mc());

    const Index nc = numext::mini(n, blocking.nc());


    typedef typename TensorContractionKernel::LhsBlock LhsBlock;

    typedef typename TensorContractionKernel::RhsBlock RhsBlock;


    LhsBlock blockA;

    RhsBlock blockB;


    TensorContractionKernel kernel(m, k_slice, n, mc, kc, nc);


    typedef typename TensorContractionKernel::BlockMemHandle BlockMemHandle;

    const BlockMemHandle packed_mem =

        kernel.allocate(this->m_device, &blockA, &blockB);


    // If a contraction kernel does not support beta, explicitly initialize

    // output buffer with zeroes.

    if (!TensorContractionKernel::HasBeta) {

      this->m_device.memset(buffer, 0, m * n * sizeof(Scalar));

    }


    for(Index i2=0; i2<m; i2+=mc)

    {

      const Index actual_mc = numext::mini(i2+mc,m)-i2;

      for (Index k2 = k_start; k2 < k_end; k2 += kc) {

        // make sure we don't overshoot right edge of left matrix, then pack vertical panel

        const Index actual_kc = numext::mini(k2 + kc, k_end) - k2;

        kernel.packLhs(&blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);


        // If kernel supports beta, there is no need to initialize output

        // buffer with zeroes.

        const Scalar alpha = Scalar(1);

        const Scalar beta = (TensorContractionKernel::HasBeta && k2 == k_start)

                                ? Scalar(0)

                                : Scalar(1);


        // series of horizontal blocks

        for (Index j2 = 0; j2 < n; j2 += nc) {

          // make sure we don't overshoot right edge of right matrix, then pack block

          const Index actual_nc = numext::mini(j2 + nc, n) - j2;

          kernel.packRhs(&blockB, rhs.getSubMapper(k2, j2), actual_kc,

                         actual_nc);


          // call gebp (matrix kernel)

          // The parameters here are copied from Eigen's GEMM implementation

          const OutputMapper output_mapper = output.getSubMapper(i2, j2);

          kernel.invoke(output_mapper, blockA, blockB, actual_mc, actual_kc,

                        actual_nc, alpha, beta);


          // We are done with this [i2, j2] output block.

          if (use_output_kernel && k2 + kc >= k_end) {

            m_output_kernel(output_mapper, m_tensor_contraction_params, i2, j2,

                            actual_mc, actual_nc);

          }

        }

      }

    }


    kernel.deallocate(this->m_device, packed_mem);

  }


  EIGEN_STRONG_INLINE void cleanup() {

    m_leftImpl.cleanup();

    m_rightImpl.cleanup();


    if (m_result != NULL) {

      m_device.deallocate(m_result);

      m_result = NULL;

    }

  }


  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {

    return m_result[index];

  }


  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const {

    return TensorOpCost(sizeof(CoeffReturnType), 0, 0);

  }


  template<int LoadMode>


  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const {

    return internal::ploadt<PacketReturnType, LoadMode>(m_result + index);

  }


  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const { return m_result; }


protected:

  Dimensions m_dimensions;


  contract_t m_k_strides;

  contract_t m_left_contracting_strides;

  contract_t m_right_contracting_strides;


  bool m_lhs_inner_dim_contiguous;

  bool m_rhs_inner_dim_contiguous;

  bool m_rhs_inner_dim_reordered;


  left_nocontract_t m_i_strides;

  right_nocontract_t m_j_strides;

  left_nocontract_t m_left_nocontract_strides;

  right_nocontract_t m_right_nocontract_strides;


  Index m_i_size;

  Index m_j_size;

  Index m_k_size;


  TensorContractionParams m_tensor_contraction_params;


  TensorEvaluator<EvalLeftArgType, Device> m_leftImpl;

  TensorEvaluator<EvalRightArgType, Device> m_rightImpl;

  const Device EIGEN_DEVICE_REF m_device;

  OutputKernelType m_output_kernel;

  EvaluatorPointerType m_result;

};


// evaluator for default device

template<typename Indices, typename LeftArgType, typename RightArgType, typename OutputKernelType, typename Device>


struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, Device> :

    public TensorContractionEvaluatorBase<

      TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, Device> > {

  typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, Device> Self;

  typedef TensorContractionEvaluatorBase<Self> Base;


  typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;

  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;

  typedef typename XprType::Index Index;

  typedef typename XprType::CoeffReturnType CoeffReturnType;

  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;


  enum {

    Layout = TensorEvaluator<LeftArgType, Device>::Layout

  };


  // Most of the code is assuming that both input tensors are ColMajor. If the

  // inputs are RowMajor, we will "cheat" by swapping the LHS and RHS:

  // If we want to compute A * B = C, where A is LHS and B is RHS, the code

  // will pretend B is LHS and A is RHS.

  typedef typename internal::conditional<

    static_cast<int>(Layout) == static_cast<int>(ColMajor), LeftArgType, RightArgType>::type EvalLeftArgType;

  typedef typename internal::conditional<

    static_cast<int>(Layout) == static_cast<int>(ColMajor), RightArgType, LeftArgType>::type EvalRightArgType;


  static const int LDims =

      internal::array_size<typename TensorEvaluator<EvalLeftArgType, Device>::Dimensions>::value;

  static const int RDims =

      internal::array_size<typename TensorEvaluator<EvalRightArgType, Device>::Dimensions>::value;

  static const int ContractDims = internal::array_size<Indices>::value;


  typedef array<Index, ContractDims> contract_t;

  typedef array<Index, LDims - ContractDims> left_nocontract_t;

  typedef array<Index, RDims - ContractDims> right_nocontract_t;


  static const int NumDims = LDims + RDims - 2 * ContractDims;


  // Could we use NumDimensions here?

  typedef DSizes<Index, NumDims> Dimensions;


  TensorEvaluator(const XprType& op, const Device& device) :

      Base(op, device) { }


  template <int Alignment>


  void evalProduct(Scalar* buffer) const {

    TENSOR_CONTRACTION_DISPATCH(this->template evalProductSequential, Alignment, (buffer));

  }


};


} // end namespace Eigen


#endif // EIGEN_CXX11_TENSOR_TENSOR_CONTRACTION_H

m
Matrix3f m
Definition AngleAxis_mimic_euler.cpp:1

n
int n
Definition BiCGSTAB_simple.cpp:1

i
int i
Definition BiCGSTAB_step_by_step.cpp:9

EIGEN_MAX_ALIGN_BYTES
#define EIGEN_MAX_ALIGN_BYTES
Definition ConfigureVectorization.h:175

EIGEN_ALWAYS_INLINE
#define EIGEN_ALWAYS_INLINE
Definition Macros.h:932

EIGEN_UNUSED_VARIABLE
#define EIGEN_UNUSED_VARIABLE(var)
Definition Macros.h:1076

EIGEN_DEVICE_FUNC
#define EIGEN_DEVICE_FUNC
Definition Macros.h:976

EIGEN_DONT_INLINE
#define EIGEN_DONT_INLINE
Definition Macros.h:940

eigen_assert
#define eigen_assert(x)
Definition Macros.h:1037

EIGEN_STRONG_INLINE
#define EIGEN_STRONG_INLINE
Definition Macros.h:917

EIGEN_STATIC_ASSERT
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
Definition StaticAssert.h:127

TENSOR_CONTRACTION_DISPATCH
#define TENSOR_CONTRACTION_DISPATCH(METHOD, ALIGNMENT, ARGS)
Definition TensorContraction.h:637

EIGEN_DEVICE_REF
#define EIGEN_DEVICE_REF
Definition TensorMacros.h:50

rows
int rows
Definition Tutorial_commainit_02.cpp:1

cols
int cols
Definition Tutorial_commainit_02.cpp:1

size
Scalar Scalar int size
Definition benchVecAdd.cpp:17

Scalar
SCALAR Scalar
Definition bench_gemm.cpp:46

Eigen::TensorBase
The tensor base class.
Definition TensorBase.h:973

Eigen::TensorContractionOp
Definition TensorContraction.h:326

Eigen::TensorContractionOp::outputKernel
EIGEN_DEVICE_FUNC const OutputKernelType & outputKernel() const
Definition TensorContraction.h:354

Eigen::TensorContractionOp::m_output_kernel
const OutputKernelType m_output_kernel
Definition TensorContraction.h:360

Eigen::TensorContractionOp::Index
Eigen::internal::traits< TensorContractionOp >::Index Index
Definition TensorContraction.h:333

Eigen::TensorContractionOp::Nested
Eigen::internal::nested< TensorContractionOp >::type Nested
Definition TensorContraction.h:331

Eigen::TensorContractionOp::rhsExpression
EIGEN_DEVICE_FUNC const internal::remove_all< typenameRhsXprType::Nested >::type & rhsExpression() const
Definition TensorContraction.h:351

Eigen::TensorContractionOp::StorageKind
Eigen::internal::traits< TensorContractionOp >::StorageKind StorageKind
Definition TensorContraction.h:332

Eigen::TensorContractionOp::lhsExpression
EIGEN_DEVICE_FUNC const internal::remove_all< typenameLhsXprType::Nested >::type & lhsExpression() const
Definition TensorContraction.h:347

Eigen::TensorContractionOp::indices
EIGEN_DEVICE_FUNC const Indices & indices() const
Definition TensorContraction.h:342

Eigen::TensorContractionOp::Scalar
Eigen::internal::traits< TensorContractionOp >::Scalar Scalar
Definition TensorContraction.h:328

Eigen::TensorContractionOp::m_indices
const Indices m_indices
Definition TensorContraction.h:359

Eigen::TensorContractionOp::m_lhs_xpr
LhsXprType::Nested m_lhs_xpr
Definition TensorContraction.h:357

Eigen::TensorContractionOp::m_rhs_xpr
RhsXprType::Nested m_rhs_xpr
Definition TensorContraction.h:358

Eigen::TensorContractionOp::CoeffReturnType
internal::gebp_traits< typenameLhsXprType::CoeffReturnType, typenameRhsXprType::CoeffReturnType >::ResScalar CoeffReturnType
Definition TensorContraction.h:330

Eigen::TensorContractionOp::TensorContractionOp
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionOp(const LhsXprType &lhs, const RhsXprType &rhs, const Indices &dims, const OutputKernelType &output_kernel=OutputKernelType())
Definition TensorContraction.h:335

Eigen::TensorOpCost
Definition TensorCostModel.h:25

Eigen::Triplet< double >

Eigen::array
Definition EmulateArray.h:21

Eigen::array::size
EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::size_t size()
Definition EmulateArray.h:44

Eigen::internal::TensorBlockNotImplemented
Definition TensorBlock.h:617

Eigen::internal::TensorContractionBlocking
Definition TensorContractionBlocking.h:25

Eigen::internal::TensorContractionInputMapper
Definition TensorContractionMapper.h:525

Eigen::internal::TensorLazyEvaluatorWritable
Definition TensorRef.h:81

Eigen::internal::gebp_traits
Definition GeneralBlockPanelKernel.h:419

Eigen::internal::no_assignment_operator
Definition XprHelper.h:110

x
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
Definition gnuplot_common_settings.hh:12

Eigen::Unaligned
@ Unaligned
Definition Constants.h:233

Eigen::Aligned
@ Aligned
Definition Constants.h:240

Eigen::ColMajor
@ ColMajor
Definition Constants.h:319

Eigen::RowMajor
@ RowMajor
Definition Constants.h:321

int
return int(ret)+1

alpha
RealScalar alpha
Definition level1_cplx_impl.h:147

ret
DenseIndex ret
Definition level1_cplx_impl.h:44

Eigen::internal::ShardByCol
@ ShardByCol
Definition TensorContractionBlocking.h:19

Eigen::internal::Lhs
@ Lhs
Definition TensorContractionMapper.h:19

Eigen::internal::Rhs
@ Rhs
Definition TensorContractionMapper.h:18

Eigen::numext::maxi
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
Definition MathFunctions.h:1091

Eigen::numext::swap
EIGEN_STRONG_INLINE void swap(T &a, T &b)
Definition Meta.h:766

Eigen::numext::mini
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
Definition MathFunctions.h:1083

Eigen
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85

Eigen::choose
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE const T1 & choose(Cond< true >, const T1 &first, const T2 &)
Definition TensorMeta.h:18

Eigen::Index
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74

internal
Definition BandTriangularSolver.h:13

second
real function second()
SECOND returns nothing
Definition second_NONE.f:39

Eigen::Cond
Definition TensorMeta.h:15

Eigen::DSizes
Definition TensorDimensions.h:263

Eigen::DSizes::TotalSize
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DenseIndex TotalSize() const
Definition TensorDimensions.h:271

Eigen::Dense
Definition Constants.h:507

Eigen::NoOpOutputKernel
Definition TensorContraction.h:294

Eigen::NoOpOutputKernel::operator()
EIGEN_ALWAYS_INLINE void operator()(const internal::blas_data_mapper< Scalar, Index, ColMajor > &output_mapper, const TensorContractionParams &params, Index i, Index j, Index num_rows, Index num_cols) const
Definition TensorContraction.h:311

Eigen::PacketType
Definition TensorMeta.h:50

Eigen::StorageMemory
Definition TensorForwardDeclarations.h:37

Eigen::TensorContractionEvaluatorBase
Definition TensorContraction.h:366

Eigen::TensorContractionEvaluatorBase::CoeffReturnType
XprType::CoeffReturnType CoeffReturnType
Definition TensorContraction.h:376

Eigen::TensorContractionEvaluatorBase::LDims
static const int LDims
Definition TensorContraction.h:407

Eigen::TensorContractionEvaluatorBase::RightEvaluatorType
TensorEvaluator< EvalRightArgType, Device > RightEvaluatorType
Definition TensorContraction.h:405

Eigen::TensorContractionEvaluatorBase::cleanup
EIGEN_STRONG_INLINE void cleanup()
Definition TensorContraction.h:915

Eigen::TensorContractionEvaluatorBase::m_i_size
Index m_i_size
Definition TensorContraction.h:956

Eigen::TensorContractionEvaluatorBase::Dimensions
DSizes< Index, NumDims > Dimensions
Definition TensorContraction.h:418

Eigen::TensorContractionEvaluatorBase::TensorContractionEvaluatorBase
EIGEN_STRONG_INLINE TensorContractionEvaluatorBase(const XprType &op, const Device &device)
Definition TensorContraction.h:421

Eigen::TensorContractionEvaluatorBase::Storage
StorageMemory< Scalar, Device > Storage
Definition TensorContraction.h:378

Eigen::TensorContractionEvaluatorBase::packet
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
Definition TensorContraction.h:934

Eigen::TensorContractionEvaluatorBase::m_j_strides
right_nocontract_t m_j_strides
Definition TensorContraction.h:952

Eigen::TensorContractionEvaluatorBase::Device
internal::traits< Derived >::Device Device
Definition TensorContraction.h:371

Eigen::TensorContractionEvaluatorBase::m_right_nocontract_strides
right_nocontract_t m_right_nocontract_strides
Definition TensorContraction.h:954

Eigen::TensorContractionEvaluatorBase::dimensions
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition TensorContraction.h:603

Eigen::TensorContractionEvaluatorBase::PacketReturnType
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition TensorContraction.h:377

Eigen::TensorContractionEvaluatorBase::LeftArgType
internal::traits< Derived >::LeftArgType LeftArgType
Definition TensorContraction.h:368

Eigen::TensorContractionEvaluatorBase::m_right_contracting_strides
contract_t m_right_contracting_strides
Definition TensorContraction.h:945

Eigen::TensorContractionEvaluatorBase::m_device
const Device EIGEN_DEVICE_REF m_device
Definition TensorContraction.h:964

Eigen::TensorContractionEvaluatorBase::right_nocontract_t
array< Index, RDims - ContractDims > right_nocontract_t
Definition TensorContraction.h:416

Eigen::TensorContractionEvaluatorBase::EvalLeftArgType
internal::conditional< static_cast< int >(Layout)==static_cast< int >(ColMajor), LeftArgType, RightArgType >::type EvalLeftArgType
Definition TensorContraction.h:400

Eigen::TensorContractionEvaluatorBase::LeftEvaluatorType
TensorEvaluator< EvalLeftArgType, Device > LeftEvaluatorType
Definition TensorContraction.h:404

Eigen::TensorContractionEvaluatorBase::m_left_nocontract_strides
left_nocontract_t m_left_nocontract_strides
Definition TensorContraction.h:953

Eigen::TensorContractionEvaluatorBase::m_result
EvaluatorPointerType m_result
Definition TensorContraction.h:966

Eigen::TensorContractionEvaluatorBase::evalGemv
EIGEN_DEVICE_FUNC void evalGemv(Scalar *buffer) const
Definition TensorContraction.h:732

Eigen::TensorContractionEvaluatorBase::Index
XprType::Index Index
Definition TensorContraction.h:375

Eigen::TensorContractionEvaluatorBase::EvaluatorPointerType
Storage::Type EvaluatorPointerType
Definition TensorContraction.h:379

Eigen::TensorContractionEvaluatorBase::m_rhs_inner_dim_reordered
bool m_rhs_inner_dim_reordered
Definition TensorContraction.h:949

Eigen::TensorContractionEvaluatorBase::RightArgType
internal::traits< Derived >::RightArgType RightArgType
Definition TensorContraction.h:369

Eigen::TensorContractionEvaluatorBase::m_rhs_inner_dim_contiguous
bool m_rhs_inner_dim_contiguous
Definition TensorContraction.h:948

Eigen::TensorContractionEvaluatorBase::evalGemmPartial
EIGEN_DEVICE_FUNC void evalGemmPartial(Scalar *buffer, Index k_start, Index k_end, int num_threads) const
Definition TensorContraction.h:801

Eigen::TensorContractionEvaluatorBase::m_left_contracting_strides
contract_t m_left_contracting_strides
Definition TensorContraction.h:944

Eigen::TensorContractionEvaluatorBase::evalSubExprsIfNeeded
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
Definition TensorContraction.h:605

Eigen::TensorContractionEvaluatorBase::XprType
TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType > XprType
Definition TensorContraction.h:373

Eigen::TensorContractionEvaluatorBase::evalTo
EIGEN_DEVICE_FUNC void evalTo(Scalar *buffer) const
Definition TensorContraction.h:702

Eigen::TensorContractionEvaluatorBase::m_j_size
Index m_j_size
Definition TensorContraction.h:957

Eigen::TensorContractionEvaluatorBase::m_rightImpl
TensorEvaluator< EvalRightArgType, Device > m_rightImpl
Definition TensorContraction.h:963

Eigen::TensorContractionEvaluatorBase::EvalRightArgType
internal::conditional< static_cast< int >(Layout)==static_cast< int >(ColMajor), RightArgType, LeftArgType >::type EvalRightArgType
Definition TensorContraction.h:402

Eigen::TensorContractionEvaluatorBase::NumDims
static const int NumDims
Definition TensorContraction.h:412

Eigen::TensorContractionEvaluatorBase::Scalar
internal::remove_const< typenameXprType::Scalar >::type Scalar
Definition TensorContraction.h:374

Eigen::TensorContractionEvaluatorBase::m_k_size
Index m_k_size
Definition TensorContraction.h:958

Eigen::TensorContractionEvaluatorBase::PreferBlockAccess
@ PreferBlockAccess
Definition TensorContraction.h:385

Eigen::TensorContractionEvaluatorBase::PacketAccess
@ PacketAccess
Definition TensorContraction.h:383

Eigen::TensorContractionEvaluatorBase::RawAccess
@ RawAccess
Definition TensorContraction.h:388

Eigen::TensorContractionEvaluatorBase::CoordAccess
@ CoordAccess
Definition TensorContraction.h:387

Eigen::TensorContractionEvaluatorBase::IsAligned
@ IsAligned
Definition TensorContraction.h:382

Eigen::TensorContractionEvaluatorBase::BlockAccess
@ BlockAccess
Definition TensorContraction.h:384

Eigen::TensorContractionEvaluatorBase::Layout
@ Layout
Definition TensorContraction.h:386

Eigen::TensorContractionEvaluatorBase::TensorBlock
internal::TensorBlockNotImplemented TensorBlock
Definition TensorContraction.h:392

Eigen::TensorContractionEvaluatorBase::evalProductSequential
void evalProductSequential(Scalar *buffer) const
Definition TensorContraction.h:717

Eigen::TensorContractionEvaluatorBase::m_output_kernel
OutputKernelType m_output_kernel
Definition TensorContraction.h:965

Eigen::TensorContractionEvaluatorBase::m_tensor_contraction_params
TensorContractionParams m_tensor_contraction_params
Definition TensorContraction.h:960

Eigen::TensorContractionEvaluatorBase::m_lhs_inner_dim_contiguous
bool m_lhs_inner_dim_contiguous
Definition TensorContraction.h:947

Eigen::TensorContractionEvaluatorBase::m_dimensions
Dimensions m_dimensions
Definition TensorContraction.h:941

Eigen::TensorContractionEvaluatorBase::costPerCoeff
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const
Definition TensorContraction.h:929

Eigen::TensorContractionEvaluatorBase::contract_t
array< Index, ContractDims > contract_t
Definition TensorContraction.h:414

Eigen::TensorContractionEvaluatorBase::OutputKernelType
internal::traits< Derived >::OutputKernelType OutputKernelType
Definition TensorContraction.h:370

Eigen::TensorContractionEvaluatorBase::m_leftImpl
TensorEvaluator< EvalLeftArgType, Device > m_leftImpl
Definition TensorContraction.h:962

Eigen::TensorContractionEvaluatorBase::left_nocontract_t
array< Index, LDims - ContractDims > left_nocontract_t
Definition TensorContraction.h:415

Eigen::TensorContractionEvaluatorBase::evalGemm
EIGEN_DEVICE_FUNC void evalGemm(Scalar *buffer) const
Definition TensorContraction.h:781

Eigen::TensorContractionEvaluatorBase::m_k_strides
contract_t m_k_strides
Definition TensorContraction.h:943

Eigen::TensorContractionEvaluatorBase::data
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvaluatorPointerType data() const
Definition TensorContraction.h:938

Eigen::TensorContractionEvaluatorBase::coeff
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
Definition TensorContraction.h:925

Eigen::TensorContractionEvaluatorBase::RDims
static const int RDims
Definition TensorContraction.h:409

Eigen::TensorContractionEvaluatorBase::evalGemmPartialWithoutOutputKernel
EIGEN_DEVICE_FUNC void evalGemmPartialWithoutOutputKernel(Scalar *buffer, Index k_start, Index k_end, int num_threads) const
Definition TensorContraction.h:792

Eigen::TensorContractionEvaluatorBase::ContractDims
static const int ContractDims
Definition TensorContraction.h:411

Eigen::TensorContractionEvaluatorBase::Indices
internal::traits< Derived >::Indices Indices
Definition TensorContraction.h:367

Eigen::TensorContractionEvaluatorBase::m_i_strides
left_nocontract_t m_i_strides
Definition TensorContraction.h:951

Eigen::TensorContractionParams
Definition TensorContraction.h:281

Eigen::TensorContractionParams::swapped_arguments
bool swapped_arguments
Definition TensorContraction.h:284

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::EvalLeftArgType
internal::conditional< static_cast< int >(Layout)==static_cast< int >(ColMajor), LeftArgType, RightArgType >::type EvalLeftArgType
Definition TensorContraction.h:993

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::EvalRightArgType
internal::conditional< static_cast< int >(Layout)==static_cast< int >(ColMajor), RightArgType, LeftArgType >::type EvalRightArgType
Definition TensorContraction.h:995

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::XprType
TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType > XprType
Definition TensorContraction.h:978

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::Index
XprType::Index Index
Definition TensorContraction.h:980

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::PacketReturnType
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition TensorContraction.h:982

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::Self
TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device > Self
Definition TensorContraction.h:975

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::Dimensions
DSizes< Index, NumDims > Dimensions
Definition TensorContraction.h:1010

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::left_nocontract_t
array< Index, LDims - ContractDims > left_nocontract_t
Definition TensorContraction.h:1004

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::TensorEvaluator
TensorEvaluator(const XprType &op, const Device &device)
Definition TensorContraction.h:1012

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::CoeffReturnType
XprType::CoeffReturnType CoeffReturnType
Definition TensorContraction.h:981

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::Base
TensorContractionEvaluatorBase< Self > Base
Definition TensorContraction.h:976

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::Scalar
internal::remove_const< typenameXprType::Scalar >::type Scalar
Definition TensorContraction.h:979

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::evalProduct
void evalProduct(Scalar *buffer) const
Definition TensorContraction.h:1016

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::contract_t
array< Index, ContractDims > contract_t
Definition TensorContraction.h:1003

Eigen::TensorEvaluator< const TensorContractionOp< Indices, LeftArgType, RightArgType, OutputKernelType >, Device >::right_nocontract_t
array< Index, RDims - ContractDims > right_nocontract_t
Definition TensorContraction.h:1005

Eigen::TensorEvaluator
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition TensorEvaluator.h:29

Eigen::TensorEvaluator::evalSubExprsIfNeeded
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType dest)
Definition TensorEvaluator.h:75

Eigen::TensorEvaluator::dimensions
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition TensorEvaluator.h:73

Eigen::TensorEvaluator::cleanup
EIGEN_STRONG_INLINE void cleanup()
Definition TensorEvaluator.h:92

Eigen::TensorEvaluator::Layout
@ Layout
Definition TensorEvaluator.h:50

Eigen::TensorEvaluator::Index
Derived::Index Index
Definition TensorEvaluator.h:30

Eigen::internal::TensorContractionBlockMemAllocator
Definition TensorContraction.h:79

Eigen::internal::TensorContractionBlockMemAllocator::allocate
static EIGEN_DEVICE_FUNC BlockMemHandle allocate(Device &d, const Index bm, const Index bk, const Index bn, LhsScalar **lhs_block, RhsScalar **rhs_block)
Definition TensorContraction.h:83

Eigen::internal::TensorContractionBlockMemAllocator::deallocate
static EIGEN_DEVICE_FUNC void deallocate(Device &d, BlockMemHandle handle)
Definition TensorContraction.h:131

Eigen::internal::TensorContractionBlockMemAllocator::BlockMemHandle
void * BlockMemHandle
Definition TensorContraction.h:80

Eigen::internal::TensorContractionBlockMemAllocator::allocateSlices
static EIGEN_DEVICE_FUNC BlockMemHandle allocateSlices(Device &d, const Index bm, const Index bk, const Index bn, const Index num_lhs, const Index num_rhs, const Index num_slices, std::vector< LhsScalar * > *lhs_blocks, std::vector< RhsScalar * > *rhs_blocks)
Definition TensorContraction.h:99

Eigen::internal::TensorContractionKernel
Definition TensorContraction.h:182

Eigen::internal::TensorContractionKernel::LhsBlock
LhsScalar * LhsBlock
Definition TensorContraction.h:193

Eigen::internal::TensorContractionKernel::RhsBlock
RhsScalar * RhsBlock
Definition TensorContraction.h:194

Eigen::internal::TensorContractionKernel::allocateSlices
EIGEN_DEVICE_FUNC BlockMemHandle allocateSlices(Device &d, const StorageIndex num_lhs, const StorageIndex num_rhs, const StorageIndex num_slices, std::vector< LhsBlock > *lhs_blocks, std::vector< RhsBlock > *rhs_blocks)
Definition TensorContraction.h:225

Eigen::internal::TensorContractionKernel::TensorContractionKernel
EIGEN_DEVICE_FUNC TensorContractionKernel(StorageIndex m_, StorageIndex k_, StorageIndex n_, StorageIndex bm_, StorageIndex bk_, StorageIndex bn_)
Definition TensorContraction.h:188

Eigen::internal::TensorContractionKernel::packLhs
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void packLhs(LhsBlock *lhsBlock, const typename LhsMapper::SubMapper &data_mapper, const StorageIndex depth, const StorageIndex rows)
Definition TensorContraction.h:238

Eigen::internal::TensorContractionKernel::GebpKernel
internal::gebp_kernel< LhsScalar, RhsScalar, StorageIndex, OutputMapper, Traits::mr, Traits::nr, false, false > GebpKernel
Definition TensorContraction.h:216

Eigen::internal::TensorContractionKernel::LhsPacker
internal::gemm_pack_lhs< LhsScalar, StorageIndex, typename LhsMapper::SubMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, ColMajor > LhsPacker
Definition TensorContraction.h:206

Eigen::internal::TensorContractionKernel::HasBeta
@ HasBeta
Definition TensorContraction.h:185

Eigen::internal::TensorContractionKernel::RhsPacker
internal::gemm_pack_rhs< RhsScalar, StorageIndex, typename RhsMapper::SubMapper, Traits::nr, ColMajor > RhsPacker
Definition TensorContraction.h:211

Eigen::internal::TensorContractionKernel::invoke
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void invoke(const OutputMapper &output_mapper, const LhsBlock &lhsBlock, const RhsBlock &rhsBlock, const StorageIndex rows, const StorageIndex depth, const StorageIndex cols, const ResScalar alpha, const ResScalar beta)
Definition TensorContraction.h:251

Eigen::internal::TensorContractionKernel::deallocate
static EIGEN_DEVICE_FUNC void deallocate(Device &d, BlockMemHandle handle)
Definition TensorContraction.h:234

Eigen::internal::TensorContractionKernel::BlockMemHandle
BlockMemAllocator::BlockMemHandle BlockMemHandle
Definition TensorContraction.h:199

Eigen::internal::TensorContractionKernel::Traits
internal::gebp_traits< LhsScalar, RhsScalar > Traits
Definition TensorContraction.h:201

Eigen::internal::TensorContractionKernel::allocate
EIGEN_DEVICE_FUNC BlockMemHandle allocate(Device &d, LhsBlock *lhs_block, RhsBlock *rhs_block)
Definition TensorContraction.h:219

Eigen::internal::TensorContractionKernel::BlockMemAllocator
TensorContractionBlockMemAllocator< LhsScalar, RhsScalar > BlockMemAllocator
Definition TensorContraction.h:198

Eigen::internal::TensorContractionKernel::packRhs
EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void packRhs(RhsBlock *rhsBlock, const typename RhsMapper::SubMapper &data_mapper, const StorageIndex depth, const StorageIndex cols)
Definition TensorContraction.h:245

Eigen::internal::array_size
Definition Meta.h:445

Eigen::internal::conditional
Definition Meta.h:109

Eigen::internal::eval< TensorContractionOp< Dimensions, LhsXprType, RhsXprType, OutputKernelType >, Eigen::Dense >::type
const TensorContractionOp< Dimensions, LhsXprType, RhsXprType, OutputKernelType > & type
Definition TensorContraction.h:56

Eigen::internal::eval
Definition XprHelper.h:332

Eigen::internal::gebp_kernel
Definition GeneralBlockPanelKernel.h:1058

Eigen::internal::gemm_pack_lhs
Definition BlasUtil.h:28

Eigen::internal::gemm_pack_rhs
Definition BlasUtil.h:25

Eigen::internal::general_matrix_vector_product
Definition BlasUtil.h:40

Eigen::internal::nested< TensorContractionOp< Dimensions, LhsXprType, RhsXprType, OutputKernelType >, 1, typename eval< TensorContractionOp< Dimensions, LhsXprType, RhsXprType, OutputKernelType > >::type >::type
TensorContractionOp< Dimensions, LhsXprType, RhsXprType, OutputKernelType > type
Definition TensorContraction.h:62

Eigen::internal::nested
Definition TensorTraits.h:175

Eigen::internal::traits< TensorContractionOp< Dimensions, LhsXprType, RhsXprType, OutputKernelType > >::StorageKind
promote_storage_type< typenametraits< LhsXprType >::StorageKind, typenametraits< RhsXprType >::StorageKind >::ret StorageKind
Definition TensorContraction.h:32

Eigen::internal::traits< TensorContractionOp< Dimensions, LhsXprType, RhsXprType, OutputKernelType > >::_LhsNested
remove_reference< LhsNested >::type _LhsNested
Definition TensorContraction.h:37

Eigen::internal::traits< TensorContractionOp< Dimensions, LhsXprType, RhsXprType, OutputKernelType > >::RhsNested
RhsXprType::Nested RhsNested
Definition TensorContraction.h:36

Eigen::internal::traits< TensorContractionOp< Dimensions, LhsXprType, RhsXprType, OutputKernelType > >::Scalar
gebp_traits< typenameremove_const< typenameLhsXprType::Scalar >::type, typenameremove_const< typenameRhsXprType::Scalar >::type >::ResScalar Scalar
Definition TensorContraction.h:29

Eigen::internal::traits< TensorContractionOp< Dimensions, LhsXprType, RhsXprType, OutputKernelType > >::_RhsNested
remove_reference< RhsNested >::type _RhsNested
Definition TensorContraction.h:38

Eigen::internal::traits< TensorContractionOp< Dimensions, LhsXprType, RhsXprType, OutputKernelType > >::LhsNested
LhsXprType::Nested LhsNested
Definition TensorContraction.h:35

Eigen::internal::traits< TensorContractionOp< Dimensions, LhsXprType, RhsXprType, OutputKernelType > >::PointerType
conditional< Pointer_type_promotion< typenameLhsXprType::Scalar, Scalar >::val, typenametraits< LhsXprType >::PointerType, typenametraits< RhsXprType >::PointerType >::type PointerType
Definition TensorContraction.h:46

Eigen::internal::traits< TensorContractionOp< Dimensions, LhsXprType, RhsXprType, OutputKernelType > >::Index
promote_index_type< typenametraits< LhsXprType >::Index, typenametraits< RhsXprType >::Index >::type Index
Definition TensorContraction.h:34

Eigen::internal::traits< TensorEvaluator< const TensorContractionOp< Indices_, LeftArgType_, RightArgType_, OutputKernelType_ >, Device_ > >::Device
Device_ Device
Definition TensorContraction.h:71

Eigen::internal::traits< TensorEvaluator< const TensorContractionOp< Indices_, LeftArgType_, RightArgType_, OutputKernelType_ >, Device_ > >::RightArgType
RightArgType_ RightArgType
Definition TensorContraction.h:69

Eigen::internal::traits< TensorEvaluator< const TensorContractionOp< Indices_, LeftArgType_, RightArgType_, OutputKernelType_ >, Device_ > >::LeftArgType
LeftArgType_ LeftArgType
Definition TensorContraction.h:68

Eigen::internal::traits< TensorEvaluator< const TensorContractionOp< Indices_, LeftArgType_, RightArgType_, OutputKernelType_ >, Device_ > >::Indices
Indices_ Indices
Definition TensorContraction.h:67

Eigen::internal::traits< TensorEvaluator< const TensorContractionOp< Indices_, LeftArgType_, RightArgType_, OutputKernelType_ >, Device_ > >::OutputKernelType
OutputKernelType_ OutputKernelType
Definition TensorContraction.h:70

Eigen::internal::traits
Definition ForwardDeclarations.h:17

Eigen::internal::unpacket_traits
Definition GenericPacketMath.h:133

j
std::ptrdiff_t j
Definition tut_arithmetic_redux_minmax.cpp:2