IndexType num_copied = 0; \
for (num_copied = 0; num_copied < block_total_size; \
num_copied += dst_inner_dim_size) { \
LinCopy::template Run<KIND>( \
typename LinCopy::Dst(output_offset, output_stride, dst.data), \
typename LinCopy::Src(input_offset, input_stride, src.data), \
dst_inner_dim_size); \
\
for (
int j = 0;
j < idx; ++
j) { \
input_offset += it[
j].input_stride; \
output_offset += it[
j].output_stride; \
break; \
} \
input_offset -= it[
j].input_span; \
output_offset -= it[
j].output_span; \
} \
} \
return num_copied;
Scalar Scalar int size
Definition benchVecAdd.cpp:17
uint8_t count
Definition ref_serial.h:256
std::ptrdiff_t j
Definition tut_arithmetic_redux_minmax.cpp:2