TR-mbed 1.0
Loading...
Searching...
No Matches
Complex.h
Go to the documentation of this file.
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_COMPLEX_SSE_H
11#define EIGEN_COMPLEX_SSE_H
12
13namespace Eigen {
14
15namespace internal {
16
17//---------- float ----------
18struct Packet2cf
19{
21 EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
22 Packet4f v;
23};
24
25// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
26// to leverage AVX instructions.
27#ifndef EIGEN_VECTORIZE_AVX
28template<> struct packet_traits<std::complex<float> > : default_packet_traits
29{
30 typedef Packet2cf type;
31 typedef Packet2cf half;
32 enum {
33 Vectorizable = 1,
35 size = 2,
36 HasHalfPacket = 0,
37
38 HasAdd = 1,
39 HasSub = 1,
40 HasMul = 1,
41 HasDiv = 1,
42 HasNegate = 1,
43 HasSqrt = 1,
44 HasAbs = 0,
45 HasAbs2 = 0,
46 HasMin = 0,
47 HasMax = 0,
48 HasSetLinear = 0,
49 HasBlend = 1
50 };
51};
52#endif
53
54template<> struct unpacket_traits<Packet2cf> {
55 typedef std::complex<float> type;
56 typedef Packet2cf half;
58 enum {
59 size=2,
61 vectorizable=true,
64 };
65};
66
69
71{
72 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
73 return Packet2cf(_mm_xor_ps(a.v,mask));
74}
76{
77 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
78 return Packet2cf(_mm_xor_ps(a.v,mask));
79}
80
82{
83 #ifdef EIGEN_VECTORIZE_SSE3
86 vec4f_swizzle1(b.v, 1, 0, 3, 2))));
87// return Packet2cf(_mm_addsub_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
88// _mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
89// vec4f_swizzle1(b.v, 1, 0, 3, 2))));
90 #else
91 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
92 return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
94 vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
95 #endif
96}
97
103
106
107template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
108{
110#ifdef EIGEN_VECTORIZE_SSE3
111 res.v = _mm_castpd_ps(_mm_loaddup_pd(reinterpret_cast<double const*>(&from)));
112#else
113 res.v = _mm_castpd_ps(_mm_load_sd(reinterpret_cast<double const*>(&from)));
114 res.v = _mm_movelh_ps(res.v, res.v);
115#endif
116 return res;
117}
118
119template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
120
123
124
125template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
126{
127 return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
128 std::imag(from[0*stride]), std::real(from[0*stride])));
129}
130
131template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
132{
133 to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
135 to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
137}
138
140
141template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
142{
143 #if EIGEN_GNUC_AT_MOST(4,3)
144 // Workaround gcc 4.2 ICE - this is not performance wise ideal, but who cares...
145 // This workaround also fix invalid code generation with gcc 4.3
146 EIGEN_ALIGN16 std::complex<float> res[2];
147 _mm_store_ps((float*)res, a.v);
148 return res[0];
149 #else
150 std::complex<float> res;
151 _mm_storel_pi((__m64*)&res, a.v);
152 return res;
153 #endif
154}
155
157
158template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
159{
160 return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
161}
162
163template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
164{
165 return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
166}
167
169{
170 return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
171}
172
174
176{
177 // TODO optimize it for SSE3 and 4
178 Packet2cf res = pmul(a, pconj(b));
179 __m128 s = _mm_mul_ps(b.v,b.v);
180 return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,vec4f_swizzle1(s, 1, 0, 3, 2))));
181}
182
183
184
185//---------- double ----------
186struct Packet1cd
187{
189 EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
190 Packet2d v;
191};
192
193// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
194// to leverage AVX instructions.
195#ifndef EIGEN_VECTORIZE_AVX
196template<> struct packet_traits<std::complex<double> > : default_packet_traits
197{
200 enum {
201 Vectorizable = 1,
202 AlignedOnScalar = 0,
203 size = 1,
204 HasHalfPacket = 0,
205
206 HasAdd = 1,
207 HasSub = 1,
208 HasMul = 1,
209 HasDiv = 1,
210 HasNegate = 1,
211 HasSqrt = 1,
212 HasAbs = 0,
213 HasAbs2 = 0,
214 HasMin = 0,
215 HasMax = 0,
216 HasSetLinear = 0
217 };
218};
219#endif
220
221template<> struct unpacket_traits<Packet1cd> {
222 typedef std::complex<double> type;
225 enum {
226 size=1,
228 vectorizable=true,
231 };
232};
233
238{
239 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
240 return Packet1cd(_mm_xor_pd(a.v,mask));
241}
242
244{
245 #ifdef EIGEN_VECTORIZE_SSE3
247 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
248 vec2d_swizzle1(b.v, 1, 0))));
249 #else
250 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
251 return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
253 vec2d_swizzle1(b.v, 1, 0)), mask)));
254 #endif
255}
256
262
263// FIXME force unaligned load, this is a temporary fix
264template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
265{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
266template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
268template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
269{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
270
271template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
272
273// FIXME force unaligned store, this is a temporary fix
274template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
275template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
276
278
279template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
280{
281 EIGEN_ALIGN16 double res[2];
282 _mm_store_pd(res, a.v);
283 return std::complex<double>(res[0],res[1]);
284}
285
286template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
287
288template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
289{
290 return pfirst(a);
291}
292
293template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
294{
295 return pfirst(a);
296}
297
299
301{
302 // TODO optimize it for SSE3 and 4
304 __m128d s = _mm_mul_pd(b.v,b.v);
305 return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
306}
307
308EIGEN_STRONG_INLINE Packet1cd pcplxflip/* <Packet1cd> */(const Packet1cd& x)
309{
310 return Packet1cd(preverse(Packet2d(x.v)));
311}
312
313EIGEN_DEVICE_FUNC inline void
314ptranspose(PacketBlock<Packet2cf,2>& kernel) {
315 __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
316 __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
317
318 __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
319 kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
320 kernel.packet[1].v = tmp;
321}
322
324{
325 __m128 eq = _mm_cmpeq_ps(a.v, b.v);
326 return Packet2cf(pand<Packet4f>(eq, vec4f_swizzle1(eq, 1, 0, 3, 2)));
327}
328
330{
331 __m128d eq = _mm_cmpeq_pd(a.v, b.v);
333}
334
339
343
347
348} // end namespace internal
349} // end namespace Eigen
350
351#endif // EIGEN_COMPLEX_SSE_H
ArrayXXi a
Definition Array_initializer_list_23_cxx11.cpp:1
#define EIGEN_ALIGN16
Definition ConfigureVectorization.h:153
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL)
Definition ConjHelper.h:14
#define EIGEN_DEBUG_ALIGNED_STORE
Definition GenericPacketMath.h:35
#define EIGEN_DEBUG_ALIGNED_LOAD
Definition GenericPacketMath.h:27
#define EIGEN_DEBUG_UNALIGNED_STORE
Definition GenericPacketMath.h:39
#define EIGEN_DEBUG_UNALIGNED_LOAD
Definition GenericPacketMath.h:31
#define EIGEN_DEVICE_FUNC
Definition Macros.h:976
#define EIGEN_STRONG_INLINE
Definition Macros.h:917
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Definition PartialRedux_count.cpp:3
#define vec4f_swizzle1(v, p, q, r, s)
Definition PacketMath.h:61
#define vec2d_swizzle1(v, p, q)
Definition PacketMath.h:67
Scalar * b
Definition benchVecAdd.cpp:17
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy x
Definition gnuplot_common_settings.hh:12
@ Aligned16
Definition Constants.h:235
RealScalar s
Definition level1_cplx_impl.h:126
v2f64 Packet2d
Definition PacketMath.h:820
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
Definition Complex.h:167
EIGEN_STRONG_INLINE std::complex< float > predux_mul< Packet2cf >(const Packet2cf &a)
Definition Complex.h:199
EIGEN_STRONG_INLINE std::complex< float > predux< Packet2cf >(const Packet2cf &a)
Definition Complex.h:191
EIGEN_STRONG_INLINE Packet2cf psqrt< Packet2cf >(const Packet2cf &a)
Definition Complex.h:244
EIGEN_STRONG_INLINE std::complex< double > predux_mul< Packet1cd >(const Packet1cd &a)
Definition Complex.h:605
EIGEN_STRONG_INLINE Packet2cf ptrue< Packet2cf >(const Packet2cf &a)
Definition Complex.h:98
EIGEN_STRONG_INLINE Packet2cf pandnot< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition Complex.h:172
EIGEN_STRONG_INLINE Packet1cd psqrt< Packet1cd >(const Packet1cd &a)
Definition Complex.h:340
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition Complex.h:224
EIGEN_STRONG_INLINE Packet2cf ploaddup< Packet2cf >(const std::complex< float > *from)
Definition Complex.h:125
EIGEN_DEVICE_FUNC Packet pdiv(const Packet &a, const Packet &b)
Definition GenericPacketMath.h:244
EIGEN_STRONG_INLINE Packet2cf pmul< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition Complex.h:173
EIGEN_STRONG_INLINE Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
Definition PacketMath.h:2107
EIGEN_STRONG_INLINE Packet1cd pmul< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition Complex.h:498
EIGEN_STRONG_INLINE std::complex< float > pfirst< Packet2cf >(const Packet2cf &a)
Definition Complex.h:176
EIGEN_STRONG_INLINE Packet1cd ploadu< Packet1cd >(const std::complex< double > *from)
Definition Complex.h:456
EIGEN_STRONG_INLINE Packet2cf por< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition Complex.h:170
EIGEN_STRONG_INLINE Packet2cf pset1< Packet2cf >(const std::complex< float > &from)
Definition Complex.h:112
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition Complex.h:184
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
Definition GenericPacketMath.h:237
EIGEN_STRONG_INLINE Packet8h ptrue(const Packet8h &a)
Definition PacketMath.h:978
EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd &x)
Definition Complex.h:620
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
Definition Complex.h:166
EIGEN_STRONG_INLINE Packet1cd pxor< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition Complex.h:519
EIGEN_STRONG_INLINE Packet1cd padd< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition Complex.h:470
EIGEN_STRONG_INLINE Packet2cf ploadu< Packet2cf >(const std::complex< float > *from)
Definition Complex.h:124
EIGEN_STRONG_INLINE Packet1cd ploaddup< Packet1cd >(const std::complex< double > *from)
Definition Complex.h:533
EIGEN_STRONG_INLINE Packet2cf pload< Packet2cf >(const std::complex< float > *from)
Definition Complex.h:123
EIGEN_STRONG_INLINE bfloat16 pfirst(const Packet8bf &a)
Definition PacketMath.h:1429
EIGEN_STRONG_INLINE std::complex< double > predux< Packet1cd >(const Packet1cd &a)
Definition Complex.h:598
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
Definition GenericPacketMath.h:696
EIGEN_STRONG_INLINE Packet1cd pload< Packet1cd >(const std::complex< double > *from)
Definition Complex.h:449
EIGEN_STRONG_INLINE Packet1cd pand< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition Complex.h:505
EIGEN_STRONG_INLINE Packet2cf pand< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition Complex.h:169
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Definition Complex.h:231
EIGEN_STRONG_INLINE Packet1cd pandnot< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition Complex.h:526
EIGEN_DEVICE_FUNC void pstoreu(Scalar *to, const Packet &from)
Definition GenericPacketMath.h:700
EIGEN_STRONG_INLINE Packet2cf pxor< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition Complex.h:171
EIGEN_STRONG_INLINE Packet2cf psub< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition Complex.h:165
EIGEN_STRONG_INLINE Packet1cd ptrue< Packet1cd >(const Packet1cd &a)
Definition Complex.h:257
EIGEN_STRONG_INLINE Packet2cf padd< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition Complex.h:164
EIGEN_STRONG_INLINE Packet1cd por< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition Complex.h:512
__vector float Packet4f
Definition PacketMath.h:30
EIGEN_STRONG_INLINE Packet1cd pset1< Packet1cd >(const std::complex< double > &from)
Definition Complex.h:463
EIGEN_STRONG_INLINE std::complex< double > pfirst< Packet1cd >(const Packet1cd &a)
Definition Complex.h:584
EIGEN_STRONG_INLINE Packet1cd psub< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition Complex.h:477
EIGEN_DEVICE_FUNC internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar)>::type real_ref(const Scalar &x)
Definition MathFunctions.h:1237
Namespace containing all symbols from the Eigen library.
Definition bench_norm.cpp:85
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition Meta.h:74
Definition BandTriangularSolver.h:13
Definition BFloat16.h:88
Definition Complex.h:341
EIGEN_STRONG_INLINE Packet1cd()
Definition Complex.h:188
Packet2d v
Definition Complex.h:410
EIGEN_STRONG_INLINE Packet1cd(const __m128d &a)
Definition Complex.h:189
Definition Complex.h:31
Packet4f v
Definition Complex.h:80
EIGEN_STRONG_INLINE Packet2cf(const __m128 &a)
Definition Complex.h:21
EIGEN_STRONG_INLINE Packet2cf()
Definition Complex.h:20
@ HasBlend
Definition GenericPacketMath.h:60
@ HasSqrt
Definition GenericPacketMath.h:66
@ HasDiv
Definition GenericPacketMath.h:65
@ HasSub
Definition GenericPacketMath.h:118
@ HasMax
Definition GenericPacketMath.h:124
@ HasNegate
Definition GenericPacketMath.h:120
@ HasMul
Definition GenericPacketMath.h:119
@ HasAdd
Definition GenericPacketMath.h:117
@ HasSetLinear
Definition GenericPacketMath.h:126
@ HasMin
Definition GenericPacketMath.h:123
@ HasAbs2
Definition GenericPacketMath.h:122
@ HasAbs
Definition GenericPacketMath.h:121
@ HasHalfPacket
Definition GenericPacketMath.h:114
@ size
Definition GenericPacketMath.h:112
@ AlignedOnScalar
Definition GenericPacketMath.h:113
@ Vectorizable
Definition GenericPacketMath.h:111
Definition ForwardDeclarations.h:17
Packet2d as_real
Definition Complex.h:224
Packet1cd half
Definition Complex.h:223
std::complex< double > type
Definition Complex.h:222
std::complex< float > type
Definition Complex.h:55
Packet4f as_real
Definition Complex.h:57
Packet2cf half
Definition Complex.h:56
@ masked_load_available
Definition GenericPacketMath.h:141
@ size
Definition GenericPacketMath.h:138
@ masked_store_available
Definition GenericPacketMath.h:142
@ vectorizable
Definition GenericPacketMath.h:140
@ alignment
Definition GenericPacketMath.h:139
Definition datatypes.h:12