10#ifndef EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
11#define EIGEN_CXX11_TENSOR_TENSOR_COST_MODEL_H
30 template <
typename ArgType>
35 template <
typename ArgType>
39 template <
typename ArgType>
44 template <
typename ArgType>
48 template <
typename SrcType,
typename TargetType>
55 TensorOpCost() : bytes_loaded_(0), bytes_stored_(0), compute_cycles_(0) {}
64 bool vectorized,
double packet_size)
81 return compute_cycles_;
84 double load_cost,
double store_cost,
double compute_cost)
const {
85 return load_cost * bytes_loaded_ + store_cost * bytes_stored_ +
86 compute_cost * compute_cycles_;
123 bytes_loaded_ *= rhs;
124 bytes_stored_ *= rhs;
125 compute_cycles_ *= rhs;
152 double bytes_loaded_;
153 double bytes_stored_;
154 double compute_cycles_;
160template <
typename Device>
175 double output_size,
const TensorOpCost& cost_per_coeff,
int max_threads) {
176 double cost =
totalCost(output_size, cost_per_coeff);
181 numext::maxi<int>(1,
static_cast<int>(threads)));
188 double output_size,
const TensorOpCost& cost_per_coeff) {
193 double output_size,
const TensorOpCost& cost_per_coeff) {
203 const double kLoadCycles = 1.0 / 64 * 11;
204 const double kStoreCycles = 1.0 / 64 * 11;
207 cost_per_coeff.
total_cost(kLoadCycles, kStoreCycles,
#define EIGEN_DEVICE_FUNC
Definition Macros.h:976
#define eigen_assert(x)
Definition Macros.h:1037
#define EIGEN_STRONG_INLINE
Definition Macros.h:917
Definition TensorCostModel.h:161
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int numThreads(double output_size, const TensorOpCost &cost_per_coeff, int max_threads)
Definition TensorCostModel.h:174
static const int kDeviceCyclesPerComputeCycle
Definition TensorCostModel.h:164
static const int kPerThreadCycles
Definition TensorCostModel.h:168
static const int kStartupCycles
Definition TensorCostModel.h:167
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double taskSize(double output_size, const TensorOpCost &cost_per_coeff)
Definition TensorCostModel.h:187
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double totalCost(double output_size, const TensorOpCost &cost_per_coeff)
Definition TensorCostModel.h:192
static const int kTaskSize
Definition TensorCostModel.h:169
Definition TensorCostModel.h:25
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMin(const TensorOpCost &rhs) const
Definition TensorCostModel.h:97
EIGEN_DEVICE_FUNC TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles)
Definition TensorCostModel.h:57
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMax(const TensorOpCost &rhs) const
Definition TensorCostModel.h:106
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator+(TensorOpCost lhs, const TensorOpCost &rhs)
Definition TensorCostModel.h:129
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_stored() const
Definition TensorCostModel.h:77
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*(double lhs, TensorOpCost rhs)
Definition TensorCostModel.h:139
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int MulCost()
Definition TensorCostModel.h:31
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int ModCost()
Definition TensorCostModel.h:45
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost & operator*=(double rhs)
Definition TensorCostModel.h:122
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost & operator+=(const TensorOpCost &rhs)
Definition TensorCostModel.h:114
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int AddCost()
Definition TensorCostModel.h:36
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE friend TensorOpCost operator*(TensorOpCost lhs, double rhs)
Definition TensorCostModel.h:134
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int CastCost()
Definition TensorCostModel.h:49
EIGEN_DEVICE_FUNC TensorOpCost()
Definition TensorCostModel.h:55
friend std::ostream & operator<<(std::ostream &os, const TensorOpCost &tc)
Definition TensorCostModel.h:145
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bytes_loaded() const
Definition TensorCostModel.h:74
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int DivCost()
Definition TensorCostModel.h:40
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double compute_cycles() const
Definition TensorCostModel.h:80
EIGEN_DEVICE_FUNC TensorOpCost(double bytes_loaded, double bytes_stored, double compute_cycles, bool vectorized, double packet_size)
Definition TensorCostModel.h:63
EIGEN_DEVICE_FUNC void dropMemoryCost()
Definition TensorCostModel.h:91
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double total_cost(double load_cost, double store_cost, double compute_cost) const
Definition TensorCostModel.h:83
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool() isfinite(const Eigen::bfloat16 &h)
Definition BFloat16.h:671
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
Definition MathFunctions.h:1091
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
Definition MathFunctions.h:1083
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
Definition NumTraits.h:153
Definition XprHelper.h:176
Definition ForwardDeclarations.h:17