Vector Optimized Library of Kernels 3.0.0
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_32fc_s32f_atan2_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
61#ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
62#define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
63
64#include <inttypes.h>
65#include <math.h>
66#include <stdio.h>
67
68#ifdef LV_HAVE_SSE4_1
69#include <smmintrin.h>
70
71#ifdef LV_HAVE_LIB_SIMDMATH
72#include <simdmath.h>
73#endif /* LV_HAVE_LIB_SIMDMATH */
74
75static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector,
76 const lv_32fc_t* complexVector,
77 const float normalizeFactor,
78 unsigned int num_points)
79{
80 const float* complexVectorPtr = (float*)complexVector;
81 float* outPtr = outputVector;
82
83 unsigned int number = 0;
84 const float invNormalizeFactor = 1.0 / normalizeFactor;
85
86#ifdef LV_HAVE_LIB_SIMDMATH
87 const unsigned int quarterPoints = num_points / 4;
88 __m128 testVector = _mm_set_ps1(2 * M_PI);
89 __m128 correctVector = _mm_set_ps1(M_PI);
90 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
91 __m128 phase;
92 __m128 complex1, complex2, iValue, qValue;
93 __m128 keepMask;
94
95 for (; number < quarterPoints; number++) {
96 // Load IQ data:
97 complex1 = _mm_load_ps(complexVectorPtr);
98 complexVectorPtr += 4;
99 complex2 = _mm_load_ps(complexVectorPtr);
100 complexVectorPtr += 4;
101 // Deinterleave IQ data:
102 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2, 0, 2, 0));
103 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3, 1, 3, 1));
104 // Arctan to get phase:
105 phase = atan2f4(qValue, iValue);
106 // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
107 // Compare to 2pi:
108 keepMask = _mm_cmpneq_ps(phase, testVector);
109 phase = _mm_blendv_ps(correctVector, phase, keepMask);
110 // done with above correction.
111 phase = _mm_mul_ps(phase, vNormalizeFactor);
112 _mm_store_ps((float*)outPtr, phase);
113 outPtr += 4;
114 }
115 number = quarterPoints * 4;
116#endif /* LV_HAVE_LIB_SIMDMATH */
117
118 for (; number < num_points; number++) {
119 const float real = *complexVectorPtr++;
120 const float imag = *complexVectorPtr++;
121 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
122 }
123}
124#endif /* LV_HAVE_SSE4_1 */
125
126
127#ifdef LV_HAVE_SSE
128#include <xmmintrin.h>
129
130#ifdef LV_HAVE_LIB_SIMDMATH
131#include <simdmath.h>
132#endif /* LV_HAVE_LIB_SIMDMATH */
133
134static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector,
135 const lv_32fc_t* complexVector,
136 const float normalizeFactor,
137 unsigned int num_points)
138{
139 const float* complexVectorPtr = (float*)complexVector;
140 float* outPtr = outputVector;
141
142 unsigned int number = 0;
143 const float invNormalizeFactor = 1.0 / normalizeFactor;
144
145#ifdef LV_HAVE_LIB_SIMDMATH
146 const unsigned int quarterPoints = num_points / 4;
147 __m128 testVector = _mm_set_ps1(2 * M_PI);
148 __m128 correctVector = _mm_set_ps1(M_PI);
149 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
150 __m128 phase;
151 __m128 complex1, complex2, iValue, qValue;
152 __m128 mask;
153 __m128 keepMask;
154
155 for (; number < quarterPoints; number++) {
156 // Load IQ data:
157 complex1 = _mm_load_ps(complexVectorPtr);
158 complexVectorPtr += 4;
159 complex2 = _mm_load_ps(complexVectorPtr);
160 complexVectorPtr += 4;
161 // Deinterleave IQ data:
162 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2, 0, 2, 0));
163 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3, 1, 3, 1));
164 // Arctan to get phase:
165 phase = atan2f4(qValue, iValue);
166 // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
167 // Compare to 2pi:
168 keepMask = _mm_cmpneq_ps(phase, testVector);
169 phase = _mm_and_ps(phase, keepMask);
170 mask = _mm_andnot_ps(keepMask, correctVector);
171 phase = _mm_or_ps(phase, mask);
172 // done with above correction.
173 phase = _mm_mul_ps(phase, vNormalizeFactor);
174 _mm_store_ps((float*)outPtr, phase);
175 outPtr += 4;
176 }
177 number = quarterPoints * 4;
178#endif /* LV_HAVE_LIB_SIMDMATH */
179
180 for (; number < num_points; number++) {
181 const float real = *complexVectorPtr++;
182 const float imag = *complexVectorPtr++;
183 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
184 }
185}
186#endif /* LV_HAVE_SSE */
187
188#ifdef LV_HAVE_GENERIC
189
190static inline void volk_32fc_s32f_atan2_32f_generic(float* outputVector,
191 const lv_32fc_t* inputVector,
192 const float normalizeFactor,
193 unsigned int num_points)
194{
195 float* outPtr = outputVector;
196 const float* inPtr = (float*)inputVector;
197 const float invNormalizeFactor = 1.0 / normalizeFactor;
198 unsigned int number;
199 for (number = 0; number < num_points; number++) {
200 const float real = *inPtr++;
201 const float imag = *inPtr++;
202 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
203 }
204}
205#endif /* LV_HAVE_GENERIC */
206
207
208#endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */
float32x4_t __m128
Definition sse2neon.h:235
#define _mm_shuffle_ps(a, b, imm)
Definition sse2neon.h:2586
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition sse2neon.h:2437
FORCE_INLINE __m128 _mm_andnot_ps(__m128 a, __m128 b)
Definition sse2neon.h:1079
FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b)
Definition sse2neon.h:1064
FORCE_INLINE __m128 _mm_blendv_ps(__m128 _a, __m128 _b, __m128 _mask)
Definition sse2neon.h:7458
FORCE_INLINE __m128 _mm_cmpneq_ps(__m128 a, __m128 b)
Definition sse2neon.h:1205
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition sse2neon.h:195
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition sse2neon.h:1858
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition sse2neon.h:2704
FORCE_INLINE __m128 _mm_or_ps(__m128, __m128)
Definition sse2neon.h:2237
static void volk_32fc_s32f_atan2_32f_generic(float *outputVector, const lv_32fc_t *inputVector, const float normalizeFactor, unsigned int num_points)
Definition volk_32fc_s32f_atan2_32f.h:190
static void volk_32fc_s32f_atan2_32f_a_sse(float *outputVector, const lv_32fc_t *complexVector, const float normalizeFactor, unsigned int num_points)
Definition volk_32fc_s32f_atan2_32f.h:134
float complex lv_32fc_t
Definition volk_complex.h:74