28#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
29#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSOR_REDUCTION_SYCL_HPP
34template <
typename Op,
typename CoeffReturnType,
typename Index,
bool Vectorizable>
36 typedef typename Vectorise<CoeffReturnType, Eigen::SyclDevice, Vectorizable>::PacketReturnType
PacketReturnType;
46template <
typename CoeffReturnType,
typename Index>
56 return quotient_op(accumulator, CoeffReturnType(
scale));
60template <
typename CoeffReturnType,
typename Index>
62 typedef typename Vectorise<CoeffReturnType, Eigen::SyclDevice, true>::PacketReturnType
PacketReturnType;
74template <
typename CoeffReturnType,
typename OpType,
typename InputAccessor,
typename OutputAccessor,
typename Index,
77 typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
96 const Index localid = itemID.get_local_id(0);
97 auto aInPtr =
aI.get_pointer() + localid;
98 auto aOutPtr =
outAcc.get_pointer();
99 CoeffReturnType *scratchptr =
scratch.get_pointer();
100 CoeffReturnType accumulator = *aInPtr;
102 scratchptr[localid] =
op.finalize(accumulator);
104 itemID.barrier(cl::sycl::access::fence_space::local_space);
106 op.reduce(scratchptr[localid +
offset], &accumulator);
107 scratchptr[localid] =
op.finalize(accumulator);
110 if (localid == 0) *aOutPtr =
op.finalize(accumulator);
116template <
typename Evaluator,
typename OpType,
typename Evaluator::Index local_range>
120 typedef typename Evaluator::Index
Index;
121 typedef OpDefiner<OpType,
typename Evaluator::CoeffReturnType,
Index,
122 (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
129 typename ::Eigen::internal::conditional<(Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess),
131 typedef cl::sycl::accessor<OutType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
140 Index rng_, OpType op_)
145 template <
bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
147 const cl::sycl::nd_item<1> &itemID) {
149 Index VectorizedRange = (
rng / Evaluator::PacketSize) * Evaluator::PacketSize;
150 Index globalid = itemID.get_global_id(0);
151 Index localid = itemID.get_local_id(0);
152 Index step = Evaluator::PacketSize * itemID.get_global_range(0);
153 Index start = Evaluator::PacketSize * globalid;
155 PacketReturnType packetAccumulator =
op.template initializePacket<PacketReturnType>();
156 for (
Index i = start;
i < VectorizedRange;
i += step) {
157 op.template reducePacket<PacketReturnType>(
evaluator.impl().template packet<Unaligned>(
i), &packetAccumulator);
159 globalid += VectorizedRange;
161 for (
Index i = globalid;
i <
rng;
i += itemID.get_global_range(0)) {
162 op.template reducePacket<PacketReturnType>(
167 scratch[localid] = packetAccumulator =
172 itemID.barrier(cl::sycl::access::fence_space::local_space);
174 op.template reducePacket<PacketReturnType>(
scratch[localid +
offset], &packetAccumulator);
175 scratch[localid] =
op.template finalizePacket<PacketReturnType>(packetAccumulator);
179 output_ptr[itemID.get_group(0)] =
180 op.finalizeBoth(
op.initialize(),
op.template finalizePacket<PacketReturnType>(packetAccumulator));
184 template <
bool Vect = (Evaluator::ReducerTraits::PacketAccess & Evaluator::InputPacketAccess)>
186 const cl::sycl::nd_item<1> &itemID) {
188 Index globalid = itemID.get_global_id(0);
189 Index localid = itemID.get_local_id(0);
193 for (
Index i = globalid;
i <
rng;
i += itemID.get_global_range(0)) {
201 itemID.barrier(cl::sycl::access::fence_space::local_space);
204 scratch[localid] =
op.finalize(accumulator);
208 output_ptr[itemID.get_group(0)] =
op.finalize(accumulator);
213template <
typename Evaluator,
typename OpType>
218 typedef typename Evaluator::Index
Index;
221 template <
typename Scratch>
224 : evaluator(evaluator_),
225 output_accessor(output_accessor_),
226 functor(
OpDef::get_op(functor_)),
228 num_values_to_reduce(num_values_to_reduce_) {}
231 auto output_accessor_ptr = output_accessor.get_pointer();
233 Index globalid =
static_cast<Index>(itemID.get_global_linear_id());
234 if (globalid < range) {
237 evaluator, evaluator.firstInput(globalid), functor, &accum);
238 output_accessor_ptr[globalid] =
OpDef::finalise_op(functor.finalize(accum), num_values_to_reduce);
247 Index num_values_to_reduce;
252template <
typename Evaluator,
typename OpType,
typename PannelParameters, reduction_dim rt>
256 typedef typename Evaluator::Index
Index;
259 typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
271 const Index preserve_elements_num_groups_,
const Index reduce_elements_num_groups_,
272 const Index num_coeffs_to_preserve_,
const Index num_coeffs_to_reduce_)
289 Index localOffset = globalRId;
292 const Index per_thread_global_stride =
295 op.reduce(
evaluator.impl().coeff(global_offset), &accumulator);
296 localOffset += per_thread_local_stride;
297 global_offset += per_thread_global_stride;
301 const Index linearLocalThreadId = itemID.get_local_id(0);
303 : linearLocalThreadId / PannelParameters::LocalThreadSizeR;
305 : linearLocalThreadId % PannelParameters::LocalThreadSizeR;
311 Index globalPId = pGroupId * PannelParameters::LocalThreadSizeP + pLocalThreadId;
312 const Index globalRId = rGroupId * PannelParameters::LocalThreadSizeR + rLocalThreadId;
313 auto scratchPtr =
scratch.get_pointer().get();
321 scratchPtr[pLocalThreadId + rLocalThreadId * (PannelParameters::LocalThreadSizeP + PannelParameters::BC)] =
324 pLocalThreadId = linearLocalThreadId % PannelParameters::LocalThreadSizeP;
325 rLocalThreadId = linearLocalThreadId / PannelParameters::LocalThreadSizeP;
326 globalPId = pGroupId * PannelParameters::LocalThreadSizeP + pLocalThreadId;
331 auto out_scratch_ptr =
332 scratchPtr + (pLocalThreadId + (rLocalThreadId * (PannelParameters::LocalThreadSizeP + PannelParameters::BC)));
333 itemID.barrier(cl::sycl::access::fence_space::local_space);
335 accumulator = *out_scratch_ptr;
340 if (rLocalThreadId <
offset) {
341 op.reduce(out_scratch_ptr[(PannelParameters::LocalThreadSizeP + PannelParameters::BC) *
offset], &accumulator);
344 *out_scratch_ptr =
op.finalize(accumulator);
351 itemID.barrier(cl::sycl::access::fence_space::local_space);
355 outPtr[globalPId] =
op.finalize(accumulator);
360template <
typename OutScalar,
typename Index,
typename InputAccessor,
typename OutputAccessor,
typename OpType>
364 typedef cl::sycl::accessor<OutScalar, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local>
373 OutputAccessor output_accessor_, OpType op_,
374 const Index num_coeffs_to_preserve_,
375 const Index num_coeffs_to_reduce_)
383 const Index globalId = itemID.get_global_id(0);
389 OutScalar accumulator =
op.initialize();
392 op.reduce(*in_ptr, &accumulator);
399template <
typename Index, Index LTP, Index LTR,
bool BC_>
406template <
typename Self,
typename Op, TensorSycl::
internal::reduction_dim rt>
418 Index num_coeffs_to_reduce,
Index num_coeffs_to_preserve) {
426 "The Local thread size must be a power of 2 for the reduction "
437 const Index reductionPerThread = 64;
438 Index cu = dev.getPowerOfTwo(dev.getNumSyclMultiProcessors(),
true);
440 Index rGroups = (cu + pNumGroups - 1) / pNumGroups;
441 const Index rNumGroups = num_coeffs_to_reduce > reductionPerThread * localRange ? std::min(rGroups, localRange) : 1;
442 const Index globalRange = pNumGroups * rNumGroups * localRange;
446 auto thread_range = cl::sycl::nd_range<1>(cl::sycl::range<1>(globalRange), cl::sycl::range<1>(localRange));
447 if (rNumGroups > 1) {
449 dev.allocate_temp(num_coeffs_to_preserve * rNumGroups *
sizeof(
CoeffReturnType)));
451 dev.template unary_kernel_launcher<CoeffReturnType, SyclReducerKerneType>(
452 self, temp_accessor, thread_range, scratchSize, reducer, pNumGroups, rNumGroups, num_coeffs_to_preserve,
453 num_coeffs_to_reduce);
456 SecondStepPartialReductionKernel;
458 dev.template unary_kernel_launcher<CoeffReturnType, SecondStepPartialReductionKernel>(
459 temp_accessor, output,
460 cl::sycl::nd_range<1>(cl::sycl::range<1>(pNumGroups * localRange), cl::sycl::range<1>(localRange)),
Index(1),
461 reducer, num_coeffs_to_preserve, rNumGroups);
463 self.device().deallocate_temp(temp_pointer);
465 dev.template unary_kernel_launcher<CoeffReturnType, SyclReducerKerneType>(
466 self, output, thread_range, scratchSize, reducer, pNumGroups, rNumGroups, num_coeffs_to_preserve,
467 num_coeffs_to_reduce);
477template <
typename Self,
typename Op,
bool Vectorizable>
487 "The Local thread size must be a power of 2 for the reduction "
491 typename Self::Index
inputSize =
self.impl().dimensions().TotalSize();
505 auto thread_range = cl::sycl::nd_range<1>(cl::sycl::range<1>(global_range), cl::sycl::range<1>(local_range));
531template <
typename Self,
typename Op>
535 static bool run(
const Self &
self,
const Op &reducer,
const Eigen::SyclDevice &dev,
536 typename Self::EvaluatorPointerType
output,
typename Self::Index num_coeffs_to_reduce,
537 typename Self::Index num_coeffs_to_preserve) {
538 return ::Eigen::TensorSycl::internal::PartialReducerLauncher<
540 num_coeffs_to_reduce,
541 num_coeffs_to_preserve);
545template <
typename Self,
typename Op>
549 static bool run(
const Self &
self,
const Op &reducer,
const Eigen::SyclDevice &dev,
550 typename Self::EvaluatorPointerType
output,
typename Self::Index num_coeffs_to_reduce,
551 typename Self::Index num_coeffs_to_preserve) {
552 return ::Eigen::TensorSycl::internal::PartialReducerLauncher<
554 num_coeffs_to_reduce,
555 num_coeffs_to_preserve);
562template <
typename Self,
typename Op>
565 static bool run(
const Self &
self,
const Op &reducer,
const Eigen::SyclDevice &dev,
566 typename Self::EvaluatorPointerType
output,
typename Self::Index num_values_to_reduce,
567 typename Self::Index num_coeffs_to_preserve) {
569 dev.parallel_for_setup(num_coeffs_to_preserve,
tileSize, range,
GRange);
574 reducer, range, (num_values_to_reduce != 0) ? num_values_to_reduce :
static_cast<Index>(1));
int i
Definition BiCGSTAB_step_by_step.cpp:9
#define EIGEN_UNROLL_LOOP
Definition Macros.h:1461
#define EIGEN_CONSTEXPR
Definition Macros.h:787
#define EIGEN_DEVICE_FUNC
Definition Macros.h:976
#define EIGEN_STRONG_INLINE
Definition Macros.h:917
int data[]
Definition Map_placement_new.cpp:1
Definition TensorReductionSycl.h:117
FullReductionKernelFunctor(LocalAccessor scratch_, Evaluator evaluator_, EvaluatorPointerType final_output_, Index rng_, OpType op_)
Definition TensorReductionSycl.h:139
EvaluatorPointerType final_output
Definition TensorReductionSycl.h:135
Evaluator::PacketReturnType PacketReturnType
Definition TensorReductionSycl.h:127
Evaluator::EvaluatorPointerType EvaluatorPointerType
Definition TensorReductionSycl.h:126
cl::sycl::accessor< OutType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local > LocalAccessor
Definition TensorReductionSycl.h:132
typename::Eigen::internal::conditional<(Evaluator::ReducerTraits::PacketAccess &Evaluator::InputPacketAccess), PacketReturnType, CoeffReturnType >::type OutType
Definition TensorReductionSycl.h:130
Evaluator evaluator
Definition TensorReductionSycl.h:134
Evaluator::Index Index
Definition TensorReductionSycl.h:120
Index rng
Definition TensorReductionSycl.h:136
OpDef::type Op
Definition TensorReductionSycl.h:125
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ::Eigen::internal::enable_if<!Vect >::type compute_reduction(const cl::sycl::nd_item< 1 > &itemID)
Definition TensorReductionSycl.h:185
Evaluator::CoeffReturnType CoeffReturnType
Definition TensorReductionSycl.h:119
LocalAccessor scratch
Definition TensorReductionSycl.h:133
void operator()(cl::sycl::nd_item< 1 > itemID)
Definition TensorReductionSycl.h:143
Op op
Definition TensorReductionSycl.h:137
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ::Eigen::internal::enable_if< Vect >::type compute_reduction(const cl::sycl::nd_item< 1 > &itemID)
Definition TensorReductionSycl.h:146
Definition TensorReductionSycl.h:214
GenericNondeterministicReducer(Scratch, Evaluator evaluator_, EvaluatorPointerType output_accessor_, OpType functor_, Index range_, Index num_values_to_reduce_)
Definition TensorReductionSycl.h:222
Evaluator::Index Index
Definition TensorReductionSycl.h:218
void operator()(cl::sycl::nd_item< 1 > itemID)
Definition TensorReductionSycl.h:230
Evaluator::CoeffReturnType CoeffReturnType
Definition TensorReductionSycl.h:216
OpDef::type Op
Definition TensorReductionSycl.h:220
OpDefiner< OpType, CoeffReturnType, Index, false > OpDef
Definition TensorReductionSycl.h:219
Evaluator::EvaluatorPointerType EvaluatorPointerType
Definition TensorReductionSycl.h:217
Definition TensorRef.h:81
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics scale
Definition gnuplot_common_settings.hh:54
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
Definition gnuplot_common_settings.hh:64
reduction_dim
Definition TensorReductionSycl.h:250
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
Definition BandTriangularSolver.h:13
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Eigen::internal::MeanReducer< CoeffReturnType > &)
Definition TensorReductionSycl.h:64
Vectorise< CoeffReturnType, Eigen::SyclDevice, true >::PacketReturnType PacketReturnType
Definition TensorReductionSycl.h:62
Eigen::internal::SumReducer< CoeffReturnType > type
Definition TensorReductionSycl.h:63
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType finalise_op(const PacketReturnType &accumulator, const Index &scale)
Definition TensorReductionSycl.h:68
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Eigen::internal::MeanReducer< CoeffReturnType > &)
Definition TensorReductionSycl.h:49
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType finalise_op(const CoeffReturnType &accumulator, const Index &scale)
Definition TensorReductionSycl.h:53
Eigen::internal::SumReducer< CoeffReturnType > type
Definition TensorReductionSycl.h:48
Definition TensorReductionSycl.h:35
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE type get_op(Op &op)
Definition TensorReductionSycl.h:38
Op type
Definition TensorReductionSycl.h:37
Vectorise< CoeffReturnType, Eigen::SyclDevice, Vectorizable >::PacketReturnType PacketReturnType
Definition TensorReductionSycl.h:36
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType finalise_op(const PacketReturnType &accumulator, const Index &)
Definition TensorReductionSycl.h:40
Definition InteropHeaders.h:130
Definition TensorReductionSycl.h:407
Self::EvaluatorPointerType EvaluatorPointerType
Definition TensorReductionSycl.h:408
static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev, EvaluatorPointerType output, Index num_coeffs_to_reduce, Index num_coeffs_to_preserve)
Definition TensorReductionSycl.h:417
Self::Index Index
Definition TensorReductionSycl.h:411
Self::CoeffReturnType CoeffReturnType
Definition TensorReductionSycl.h:409
PartialReductionKernel< Self, Op, PannelParameters, rt > SyclReducerKerneType
Definition TensorReductionSycl.h:415
Self::Storage Storage
Definition TensorReductionSycl.h:410
ReductionPannel< typename Self::Index, EIGEN_SYCL_LOCAL_THREAD_DIM0, EIGEN_SYCL_LOCAL_THREAD_DIM1, true > PannelParameters
Definition TensorReductionSycl.h:413
Definition TensorReductionSycl.h:253
const Index preserve_elements_num_groups
Definition TensorReductionSycl.h:265
Evaluator::Index Index
Definition TensorReductionSycl.h:256
const Index num_coeffs_to_reduce
Definition TensorReductionSycl.h:268
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void element_wise_reduce(Index globalRId, Index globalPId, CoeffReturnType &accumulator)
Definition TensorReductionSycl.h:282
cl::sycl::accessor< CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local > ScratchAcc
Definition TensorReductionSycl.h:260
ScratchAcc scratch
Definition TensorReductionSycl.h:261
const Index num_coeffs_to_preserve
Definition TensorReductionSycl.h:267
EvaluatorPointerType output_accessor
Definition TensorReductionSycl.h:263
OpDef::type Op
Definition TensorReductionSycl.h:258
const Index reduce_elements_num_groups
Definition TensorReductionSycl.h:266
Evaluator::EvaluatorPointerType EvaluatorPointerType
Definition TensorReductionSycl.h:255
PartialReductionKernel(ScratchAcc scratch_, Evaluator evaluator_, EvaluatorPointerType output_accessor_, OpType op_, const Index preserve_elements_num_groups_, const Index reduce_elements_num_groups_, const Index num_coeffs_to_preserve_, const Index num_coeffs_to_reduce_)
Definition TensorReductionSycl.h:270
Op op
Definition TensorReductionSycl.h:264
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item< 1 > itemID)
Definition TensorReductionSycl.h:300
Evaluator evaluator
Definition TensorReductionSycl.h:262
Evaluator::CoeffReturnType CoeffReturnType
Definition TensorReductionSycl.h:254
OpDefiner< OpType, CoeffReturnType, Index, false > OpDef
Definition TensorReductionSycl.h:257
Definition TensorReductionSycl.h:400
static EIGEN_CONSTEXPR Index LocalThreadSizeR
Definition TensorReductionSycl.h:402
static EIGEN_CONSTEXPR Index LocalThreadSizeP
Definition TensorReductionSycl.h:401
static EIGEN_CONSTEXPR bool BC
Definition TensorReductionSycl.h:403
Definition TensorReductionSycl.h:76
SecondStepFullReducer(LocalAccessor scratch_, InputAccessor aI_, OutputAccessor outAcc_, OpType op_)
Definition TensorReductionSycl.h:85
void operator()(cl::sycl::nd_item< 1 > itemID)
Definition TensorReductionSycl.h:88
OpDefiner< OpType, CoeffReturnType, Index, true > OpDef
Definition TensorReductionSycl.h:79
cl::sycl::accessor< CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local > LocalAccessor
Definition TensorReductionSycl.h:78
Op op
Definition TensorReductionSycl.h:84
OutputAccessor outAcc
Definition TensorReductionSycl.h:83
OpDef::type Op
Definition TensorReductionSycl.h:80
LocalAccessor scratch
Definition TensorReductionSycl.h:81
InputAccessor aI
Definition TensorReductionSycl.h:82
Definition TensorReductionSycl.h:361
OutputAccessor output_accessor
Definition TensorReductionSycl.h:367
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(cl::sycl::nd_item< 1 > itemID)
Definition TensorReductionSycl.h:382
const Index num_coeffs_to_preserve
Definition TensorReductionSycl.h:369
cl::sycl::accessor< OutScalar, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local > ScratchAccessor
Definition TensorReductionSycl.h:365
OpDefiner< OpType, OutScalar, Index, false > OpDef
Definition TensorReductionSycl.h:362
const Index num_coeffs_to_reduce
Definition TensorReductionSycl.h:370
Op op
Definition TensorReductionSycl.h:368
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SecondStepPartialReduction(ScratchAccessor, InputAccessor input_accessor_, OutputAccessor output_accessor_, OpType op_, const Index num_coeffs_to_preserve_, const Index num_coeffs_to_reduce_)
Definition TensorReductionSycl.h:372
InputAccessor input_accessor
Definition TensorReductionSycl.h:366
OpDef::type Op
Definition TensorReductionSycl.h:363
Self::CoeffReturnType CoeffReturnType
Definition TensorReductionSycl.h:479
static void run(const Self &self, Op &reducer, const Eigen::SyclDevice &dev, EvaluatorPointerType data)
Definition TensorReductionSycl.h:483
Self::EvaluatorPointerType EvaluatorPointerType
Definition TensorReductionSycl.h:480
Definition TensorReduction.h:311
static const bool HasOptimizedImplementation
Definition TensorReduction.h:312
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self &self, typename Self::Index firstIndex, Op &reducer, typename Self::CoeffReturnType *accum)
Definition TensorReduction.h:144
static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev, typename Self::EvaluatorPointerType output, typename Self::Index num_values_to_reduce, typename Self::Index num_coeffs_to_preserve)
Definition TensorReductionSycl.h:565
static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev, typename Self::EvaluatorPointerType output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_coeffs_to_preserve)
Definition TensorReductionSycl.h:549
Definition TensorReduction.h:395
static const bool HasOptimizedImplementation
Definition TensorReduction.h:396
static EIGEN_DEVICE_FUNC bool run(const Self &, Op &, const Device &, typename Self::CoeffReturnType *, typename Self::Index, typename Self::Index)
Definition TensorReduction.h:398
Definition TensorFunctors.h:112
static bool run(const Self &self, const Op &reducer, const Eigen::SyclDevice &dev, typename Self::EvaluatorPointerType output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_coeffs_to_preserve)
Definition TensorReductionSycl.h:535
Definition TensorReduction.h:406
static const bool HasOptimizedImplementation
Definition TensorReduction.h:407
static EIGEN_DEVICE_FUNC bool run(const Self &, Op &, const Device &, typename Self::CoeffReturnType *, typename Self::Index, typename Self::Index)
Definition TensorReduction.h:409
Definition ForwardDeclarations.h:17