25 #ifndef EIGEN_COMPLEX_SSE_H
26 #define EIGEN_COMPLEX_SSE_H
40 template<>
struct packet_traits<std::complex<float> > : default_packet_traits
42 typedef Packet2cf type;
61 template<>
struct unpacket_traits<Packet2cf> {
typedef std::complex<float> type;
enum {size=2}; };
67 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
68 return Packet2cf(_mm_xor_ps(a.v,mask));
72 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
73 return Packet2cf(_mm_xor_ps(a.v,mask));
79 #ifdef EIGEN_VECTORIZE_SSE3
80 return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
81 _mm_mul_ps(_mm_movehdup_ps(a.v),
87 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
88 return Packet2cf(_mm_add_ps(_mm_mul_ps(
vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
105 #if EIGEN_GNUC_AT_MOST(4,2)
107 res.v = _mm_loadl_pi(_mm_set1_ps(0.0f), reinterpret_cast<const __m64*>(&from));
109 res.v = _mm_loadl_pi(res.v, (
const __m64*)&from);
111 return Packet2cf(_mm_movelh_ps(res.v,res.v));
119 template<>
EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(
const std::complex<float> * addr) { _mm_prefetch((
const char*)(addr), _MM_HINT_T0); }
123 #if EIGEN_GNUC_AT_MOST(4,3)
127 _mm_store_ps((
float*)res, a.v);
130 std::complex<float> res;
131 _mm_storel_pi((__m64*)&res, a.v);
140 return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
145 return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
150 return pfirst(
pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
154 struct palign_impl<Offset,Packet2cf>
160 first.v = _mm_movehl_ps(first.v, first.v);
161 first.v = _mm_movelh_ps(first.v, second.v);
166 template<>
struct conj_helper<Packet2cf, Packet2cf, false,true>
173 #ifdef EIGEN_VECTORIZE_SSE3
176 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
177 return Packet2cf(_mm_add_ps(_mm_xor_ps(_mm_mul_ps(
vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
184 template<>
struct conj_helper<Packet2cf, Packet2cf, true,false>
191 #ifdef EIGEN_VECTORIZE_SSE3
194 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
195 return Packet2cf(_mm_add_ps(_mm_mul_ps(
vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
202 template<>
struct conj_helper<Packet2cf, Packet2cf, true,true>
209 #ifdef EIGEN_VECTORIZE_SSE3
212 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
213 return Packet2cf(_mm_sub_ps(_mm_xor_ps(_mm_mul_ps(
vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v), mask),
220 template<>
struct conj_helper<
Packet4f, Packet2cf, false,false>
229 template<>
struct conj_helper<Packet2cf,
Packet4f, false,false>
241 Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().
pmul(a,b);
242 __m128 s = _mm_mul_ps(b.v,b.v);
243 return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
260 template<>
struct packet_traits<std::complex<double> > : default_packet_traits
262 typedef Packet1cd type;
281 template<>
struct unpacket_traits<Packet1cd> {
typedef std::complex<double> type;
enum {size=1}; };
288 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
289 return Packet1cd(_mm_xor_pd(a.v,mask));
295 #ifdef EIGEN_VECTORIZE_SSE3
296 return Packet1cd(_mm_addsub_pd(_mm_mul_pd(
vec2d_swizzle1(a.v, 0, 0), b.v),
300 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
301 return Packet1cd(_mm_add_pd(_mm_mul_pd(
vec2d_swizzle1(a.v, 0, 0), b.v),
326 template<>
EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(
const std::complex<double> * addr) { _mm_prefetch((
const char*)(addr), _MM_HINT_T0); }
331 _mm_store_pd(res, a.v);
332 return std::complex<double>(res[0],res[1]);
353 struct palign_impl<Offset,Packet1cd>
362 template<>
struct conj_helper<Packet1cd, Packet1cd, false,true>
369 #ifdef EIGEN_VECTORIZE_SSE3
372 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
373 return Packet1cd(_mm_add_pd(_mm_xor_pd(_mm_mul_pd(
vec2d_swizzle1(a.v, 0, 0), b.v), mask),
380 template<>
struct conj_helper<Packet1cd, Packet1cd, true,false>
387 #ifdef EIGEN_VECTORIZE_SSE3
390 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
391 return Packet1cd(_mm_add_pd(_mm_mul_pd(
vec2d_swizzle1(a.v, 0, 0), b.v),
398 template<>
struct conj_helper<Packet1cd, Packet1cd, true,true>
405 #ifdef EIGEN_VECTORIZE_SSE3
408 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
409 return Packet1cd(_mm_sub_pd(_mm_xor_pd(_mm_mul_pd(
vec2d_swizzle1(a.v, 0, 0), b.v), mask),
416 template<>
struct conj_helper<
Packet2d, Packet1cd, false,false>
425 template<>
struct conj_helper<Packet1cd,
Packet2d, false,false>
437 Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().
pmul(a,b);
438 __m128d s = _mm_mul_pd(b.v,b.v);
439 return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
451 #endif // EIGEN_COMPLEX_SSE_H