10#ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H
11#define EIGEN_CXX11_TENSOR_TENSOR_FFT_H
35 std::complex<T>
operator() (
const T& val)
const {
return std::complex<T>(val, 0); }
41 std::complex<T>
operator() (
const std::complex<T>& val)
const {
return val; }
44template <
int ResultType>
struct PartOf {
45 template <
typename T>
T operator() (
const T& val)
const {
return val; }
49 template <
typename T>
T operator() (
const std::complex<T>& val)
const {
return val.real(); }
53 template <
typename T>
T operator() (
const std::complex<T>& val)
const {
return val.imag(); }
57template <
typename FFT,
typename XprType,
int FFTResultType,
int FFTDir>
65 typedef typename XprTraits::Index
Index;
66 typedef typename XprType::Nested
Nested;
68 static const int NumDimensions = XprTraits::NumDimensions;
69 static const int Layout = XprTraits::Layout;
73template <
typename FFT,
typename XprType,
int FFTResultType,
int FFTDirection>
78template <
typename FFT,
typename XprType,
int FFTResultType,
int FFTDirection>
85template <
typename FFT,
typename XprType,
int FFTResultType,
int FFTDir>
114template <
typename FFT,
typename ArgType,
typename Device,
int FFTResultType,
int FFTDir>
149 for (
int i = 0;
i < NumDims; ++
i) {
151 m_dimensions[
i] = input_dims[
i];
156 for (
int i = 1;
i < NumDims; ++
i) {
157 m_strides[
i] = m_strides[
i - 1] * m_dimensions[
i - 1];
160 m_strides[NumDims - 1] = 1;
161 for (
int i = NumDims - 2;
i >= 0; --
i) {
162 m_strides[
i] = m_strides[
i + 1] * m_dimensions[
i + 1];
165 m_size = m_dimensions.TotalSize();
173 m_impl.evalSubExprsIfNeeded(NULL);
196 template <
int LoadMode>
218 ComplexScalar* buf = write_to_out ? (ComplexScalar*)
data : (ComplexScalar*)
m_device.allocate(sizeof(ComplexScalar) * m_size);
220 for (
Index i = 0;
i < m_size; ++
i) {
221 buf[
i] = MakeComplex<internal::is_same<InputScalar, RealScalar>::value>()(m_impl.coeff(
i));
224 for (
size_t i = 0;
i < m_fft.size(); ++
i) {
227 Index line_len = m_dimensions[dim];
229 ComplexScalar* line_buf = (ComplexScalar*)
m_device.allocate(
sizeof(ComplexScalar) * line_len);
230 const bool is_power_of_two = isPowerOfTwo(line_len);
231 const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len);
232 const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite);
234 ComplexScalar*
a = is_power_of_two ? NULL : (ComplexScalar*)
m_device.allocate(
sizeof(ComplexScalar) * good_composite);
235 ComplexScalar*
b = is_power_of_two ? NULL : (ComplexScalar*)
m_device.allocate(
sizeof(ComplexScalar) * good_composite);
236 ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)
m_device.allocate(
sizeof(ComplexScalar) * (line_len + 1));
237 if (!is_power_of_two) {
264 for (
int j = 0;
j < line_len + 1; ++
j) {
267 pos_j_base_powered[
j] =
static_cast<ComplexScalar
>(tmp);
271 for (
Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) {
272 const Index base_offset = getBaseOffsetFromIndex(partial_index, dim);
275 const Index stride = m_strides[dim];
277 m_device.memcpy(line_buf, &buf[base_offset], line_len*
sizeof(ComplexScalar));
280 for (
int j = 0;
j < line_len; ++
j,
offset += stride) {
286 if (is_power_of_two) {
287 processDataLineCooleyTukey(line_buf, line_len, log_len);
290 processDataLineBluestein(line_buf, line_len, good_composite, log_len,
a,
b, pos_j_base_powered);
295 m_device.memcpy(&buf[base_offset], line_buf, line_len*
sizeof(ComplexScalar));
298 const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0);
299 for (
int j = 0;
j < line_len; ++
j,
offset += stride) {
305 if (!is_power_of_two) {
308 m_device.deallocate(pos_j_base_powered);
313 for (
Index i = 0;
i < m_size; ++
i) {
314 data[
i] = PartOf<FFTResultType>()(buf[
i]);
322 return !(
x & (
x - 1));
328 while (
i < 2 *
n - 1)
i *= 2;
334 while (
m >>= 1) log2m++;
341 scramble_FFT(line_buf, line_len);
342 compute_1D_Butterfly<FFTDir>(line_buf, line_len, log_len);
349 ComplexScalar*
data = line_buf;
353 a[
i] =
data[
i] * numext::conj(pos_j_base_powered[
i]);
356 a[
i] =
data[
i] * pos_j_base_powered[
i];
360 a[
i] = ComplexScalar(0, 0);
365 b[
i] = pos_j_base_powered[
i];
368 b[
i] = numext::conj(pos_j_base_powered[
i]);
372 b[
i] = ComplexScalar(0, 0);
376 b[
i] = pos_j_base_powered[
m-
i];
379 b[
i] = numext::conj(pos_j_base_powered[
m-
i]);
384 compute_1D_Butterfly<FFT_FORWARD>(
a,
m, log_len);
387 compute_1D_Butterfly<FFT_FORWARD>(
b,
m, log_len);
394 compute_1D_Butterfly<FFT_REVERSE>(
a,
m, log_len);
403 data[
i] =
a[
i] * numext::conj(pos_j_base_powered[
i]);
406 data[
i] =
a[
i] * pos_j_base_powered[
i];
419 while (
m >= 2 &&
j >
m) {
429 ComplexScalar tmp =
data[1];
436 ComplexScalar tmp[4];
441 tmp[3] = ComplexScalar(0.0, -1.0) * (
data[2] -
data[3]);
443 tmp[3] = ComplexScalar(0.0, 1.0) * (
data[2] -
data[3]);
445 data[0] = tmp[0] + tmp[2];
446 data[1] = tmp[1] + tmp[3];
447 data[2] = tmp[0] - tmp[2];
448 data[3] = tmp[1] - tmp[3];
453 ComplexScalar tmp_1[8];
454 ComplexScalar tmp_2[8];
460 tmp_1[3] = (
data[2] -
data[3]) * ComplexScalar(0, -1);
462 tmp_1[3] = (
data[2] -
data[3]) * ComplexScalar(0, 1);
468 tmp_1[7] = (
data[6] -
data[7]) * ComplexScalar(0, -1);
470 tmp_1[7] = (
data[6] -
data[7]) * ComplexScalar(0, 1);
472 tmp_2[0] = tmp_1[0] + tmp_1[2];
473 tmp_2[1] = tmp_1[1] + tmp_1[3];
474 tmp_2[2] = tmp_1[0] - tmp_1[2];
475 tmp_2[3] = tmp_1[1] - tmp_1[3];
476 tmp_2[4] = tmp_1[4] + tmp_1[6];
478#define SQRT2DIV2 0.7071067811865476
481 tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1);
485 tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1);
488 data[0] = tmp_2[0] + tmp_2[4];
489 data[1] = tmp_2[1] + tmp_2[5];
490 data[2] = tmp_2[2] + tmp_2[6];
491 data[3] = tmp_2[3] + tmp_2[7];
492 data[4] = tmp_2[0] - tmp_2[4];
493 data[5] = tmp_2[1] - tmp_2[5];
494 data[6] = tmp_2[2] - tmp_2[6];
495 data[7] = tmp_2[3] - tmp_2[7];
504 const RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2];
506 ? m_minus_sin_2_PI_div_n_LUT[n_power_of_2]
507 : -m_minus_sin_2_PI_div_n_LUT[n_power_of_2];
509 const ComplexScalar wp(wtemp, wpi);
510 const ComplexScalar wp_one = wp + ComplexScalar(1, 0);
511 const ComplexScalar wp_one_2 = wp_one * wp_one;
512 const ComplexScalar wp_one_3 = wp_one_2 * wp_one;
513 const ComplexScalar wp_one_4 = wp_one_3 * wp_one;
515 ComplexScalar
w(1.0, 0.0);
517 ComplexScalar temp0(
data[
i + n2] *
w);
518 ComplexScalar temp1(
data[
i + 1 + n2] *
w * wp_one);
519 ComplexScalar temp2(
data[
i + 2 + n2] *
w * wp_one_2);
520 ComplexScalar temp3(
data[
i + 3 + n2] *
w * wp_one_3);
527 data[
i + 1] += temp1;
530 data[
i + 2] += temp2;
533 data[
i + 3] += temp3;
542 compute_1D_Butterfly<Dir>(
data,
n / 2, n_power_of_2 - 1);
543 compute_1D_Butterfly<Dir>(
data +
n / 2,
n / 2, n_power_of_2 - 1);
544 butterfly_1D_merge<Dir>(
data,
n, n_power_of_2);
546 butterfly_8<Dir>(
data);
548 butterfly_4<Dir>(
data);
550 butterfly_2<Dir>(
data);
558 for (
int i = NumDims - 1;
i > omitted_dim; --
i) {
559 const Index partial_m_stride = m_strides[
i] / m_dimensions[omitted_dim];
560 const Index idx = index / partial_m_stride;
561 index -= idx * partial_m_stride;
562 result += idx * m_strides[
i];
567 for (
Index i = 0;
i < omitted_dim; ++
i) {
568 const Index partial_m_stride = m_strides[
i] / m_dimensions[omitted_dim];
569 const Index idx = index / partial_m_stride;
570 index -= idx * partial_m_stride;
571 result += idx * m_strides[
i];
580 Index result = base +
offset * m_strides[omitted_dim] ;
Matrix3f m
Definition AngleAxis_mimic_euler.cpp:1
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArgReturnType arg() const
Definition ArrayCwiseUnaryOps.h:66
ArrayXXi a
Definition Array_initializer_list_23_cxx11.cpp:1
int n
Definition BiCGSTAB_simple.cpp:1
int i
Definition BiCGSTAB_step_by_step.cpp:9
#define EIGEN_PI
Definition MathFunctions.h:16
#define EIGEN_ALWAYS_INLINE
Definition Macros.h:932
#define EIGEN_DEVICE_FUNC
Definition Macros.h:976
#define eigen_assert(x)
Definition Macros.h:1037
#define EIGEN_STRONG_INLINE
Definition Macros.h:917
RowVector3d w
Definition Matrix_resize_int.cpp:3
#define EIGEN_DEVICE_REF
Definition TensorMacros.h:50
Scalar * b
Definition benchVecAdd.cpp:17
NumTraits< Scalar >::Real RealScalar
Definition bench_gemm.cpp:47
Generic expression where a coefficient-wise binary operator is applied to two expressions.
Definition CwiseBinaryOp.h:84
The tensor base class.
Definition TensorBase.h:973
Definition TensorFFT.h:86
const FFT m_fft
Definition TensorFFT.h:110
Eigen::internal::nested< TensorFFTOp >::type Nested
Definition TensorFFT.h:93
OutputScalar CoeffReturnType
Definition TensorFFT.h:92
Eigen::NumTraits< Scalar >::Real RealScalar
Definition TensorFFT.h:89
Eigen::internal::traits< TensorFFTOp >::Scalar Scalar
Definition TensorFFT.h:88
EIGEN_DEVICE_FUNC const FFT & fft() const
Definition TensorFFT.h:101
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType &expr, const FFT &fft)
Definition TensorFFT.h:97
internal::conditional< FFTResultType==RealPart||FFTResultType==ImagPart, RealScalar, ComplexScalar >::type OutputScalar
Definition TensorFFT.h:91
Eigen::internal::traits< TensorFFTOp >::StorageKind StorageKind
Definition TensorFFT.h:94
Eigen::internal::traits< TensorFFTOp >::Index Index
Definition TensorFFT.h:95
EIGEN_DEVICE_FUNC const internal::remove_all< typenameXprType::Nested >::type & expression() const
Definition TensorFFT.h:104
std::complex< RealScalar > ComplexScalar
Definition TensorFFT.h:90
XprType::Nested m_xpr
Definition TensorFFT.h:109
Definition TensorCostModel.h:25
Definition EmulateArray.h:21
Definition TensorBlock.h:617
Definition TensorRef.h:81
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
Definition gnuplot_common_settings.hh:12
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics set mxtics default set mytics default set mx2tics default set my2tics default set xtics border mirror norotate autofreq set ytics border mirror norotate autofreq set ztics border nomirror norotate autofreq set nox2tics set noy2tics set timestamp bottom norotate offset
Definition gnuplot_common_settings.hh:64
@ ColMajor
Definition Constants.h:319
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cos(const T &x)
Definition MathFunctions.h:1600
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T sin(const T &x)
Definition MathFunctions.h:1619
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
FFTDirection
Definition TensorForwardDeclarations.h:138
@ FFT_FORWARD
Definition TensorForwardDeclarations.h:139
FFTResultType
Definition TensorForwardDeclarations.h:132
@ ImagPart
Definition TensorForwardDeclarations.h:134
@ RealPart
Definition TensorForwardDeclarations.h:133
Definition BandTriangularSolver.h:13
Definition TensorDimensions.h:263
Definition Constants.h:507
Definition TensorFFT.h:26
EIGEN_DEVICE_FUNC T operator()(const T &val) const
Definition TensorFFT.h:29
Definition TensorFFT.h:44
T operator()(const T &val) const
Definition TensorFFT.h:45
Definition TensorForwardDeclarations.h:37
internal::TensorBlockNotImplemented TensorBlock
Definition TensorFFT.h:144
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
Definition TensorFFT.h:207
const Device EIGEN_DEVICE_REF m_device
Definition TensorFFT.h:591
EvaluatorPointerType m_data
Definition TensorFFT.h:590
internal::traits< XprType > XprTraits
Definition TensorFFT.h:124
Storage::Type EvaluatorPointerType
Definition TensorFFT.h:131
internal::conditional< FFTResultType==RealPart||FFTResultType==ImagPart, RealScalar, ComplexScalar >::type OutputScalar
Definition TensorFFT.h:126
TensorEvaluator< ArgType, Device > m_impl
Definition TensorFFT.h:589
TensorFFTOp< FFT, ArgType, FFTResultType, FFTDir > XprType
Definition TensorFFT.h:116
Dimensions m_dimensions
Definition TensorFFT.h:587
EIGEN_STRONG_INLINE TensorEvaluator(const XprType &op, const Device &device)
Definition TensorFFT.h:147
Index m_size
Definition TensorFFT.h:585
const FFT EIGEN_DEVICE_REF m_fft
Definition TensorFFT.h:586
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const
Definition TensorFFT.h:192
std::complex< RealScalar > ComplexScalar
Definition TensorFFT.h:122
DSizes< Index, NumDims > Dimensions
Definition TensorFFT.h:119
Eigen::NumTraits< Scalar >::Real RealScalar
Definition TensorFFT.h:121
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const
Definition TensorFFT.h:203
EIGEN_STRONG_INLINE void cleanup()
Definition TensorFFT.h:184
array< Index, NumDims > m_strides
Definition TensorFFT.h:588
XprTraits::Scalar InputScalar
Definition TensorFFT.h:125
XprType::Scalar Scalar
Definition TensorFFT.h:120
TensorEvaluator< ArgType, Device >::Dimensions InputDimensions
Definition TensorFFT.h:123
StorageMemory< CoeffReturnType, Device > Storage
Definition TensorFFT.h:130
EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data)
Definition TensorFFT.h:172
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions & dimensions() const
Definition TensorFFT.h:168
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
Definition TensorFFT.h:198
OutputScalar CoeffReturnType
Definition TensorFFT.h:127
XprType::Index Index
Definition TensorFFT.h:117
PacketType< OutputScalar, Device >::type PacketReturnType
Definition TensorFFT.h:128
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition TensorEvaluator.h:29
const Device EIGEN_DEVICE_REF m_device
Definition TensorEvaluator.h:192
Storage::Type EvaluatorPointerType
Definition TensorEvaluator.h:39
PacketType< CoeffReturnType, Device >::type PacketReturnType
Definition TensorEvaluator.h:33
EIGEN_DEVICE_FUNC EvaluatorPointerType data() const
Definition TensorEvaluator.h:181
@ BlockAccess
Definition TensorEvaluator.h:48
@ PreferBlockAccess
Definition TensorEvaluator.h:49
@ PacketAccess
Definition TensorEvaluator.h:47
@ Layout
Definition TensorEvaluator.h:50
@ IsAligned
Definition TensorEvaluator.h:46
Derived::Index Index
Definition TensorEvaluator.h:30
Derived::Dimensions Dimensions
Definition TensorEvaluator.h:34
EvaluatorPointerType m_data
Definition TensorEvaluator.h:190
static const int PacketSize
Definition TensorEvaluator.h:36
const TensorFFTOp< FFT, XprType, FFTResultType, FFTDirection > & type
Definition TensorFFT.h:75
Definition XprHelper.h:332
@ value
Definition Meta.h:148
TensorFFTOp< FFT, XprType, FFTResultType, FFTDirection > type
Definition TensorFFT.h:80
Definition TensorTraits.h:175
traits< XprType > XprTraits
Definition TensorFFT.h:59
remove_reference< Nested >::type _Nested
Definition TensorFFT.h:67
XprTraits::Index Index
Definition TensorFFT.h:65
traits< XprType >::PointerType PointerType
Definition TensorFFT.h:70
XprTraits::Scalar InputScalar
Definition TensorFFT.h:62
std::complex< RealScalar > ComplexScalar
Definition TensorFFT.h:61
XprType::Nested Nested
Definition TensorFFT.h:66
conditional< FFTResultType==RealPart||FFTResultType==ImagPart, RealScalar, ComplexScalar >::type OutputScalar
Definition TensorFFT.h:63
NumTraits< typenameXprTraits::Scalar >::Real RealScalar
Definition TensorFFT.h:60
XprTraits::StorageKind StorageKind
Definition TensorFFT.h:64
Definition ForwardDeclarations.h:17
Definition GenericPacketMath.h:133
std::ptrdiff_t j
Definition tut_arithmetic_redux_minmax.cpp:2