Vector Optimized Library of Kernels 3.0.0
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
volk_32fc_s32f_deinterleave_real_16i.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
60#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
61#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
62
63#include <inttypes.h>
64#include <stdio.h>
65#include <volk/volk_common.h>
66
67
68#ifdef LV_HAVE_AVX2
69#include <immintrin.h>
70
71static inline void
72volk_32fc_s32f_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
73 const lv_32fc_t* complexVector,
74 const float scalar,
75 unsigned int num_points)
76{
77 unsigned int number = 0;
78 const unsigned int eighthPoints = num_points / 8;
79
80 const float* complexVectorPtr = (float*)complexVector;
81 int16_t* iBufferPtr = iBuffer;
82
83 __m256 vScalar = _mm256_set1_ps(scalar);
84
85 __m256 cplxValue1, cplxValue2, iValue;
86 __m256i a;
87 __m128i b;
88
89 __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
90
91 for (; number < eighthPoints; number++) {
92 cplxValue1 = _mm256_load_ps(complexVectorPtr);
93 complexVectorPtr += 8;
94
95 cplxValue2 = _mm256_load_ps(complexVectorPtr);
96 complexVectorPtr += 8;
97
98 // Arrange in i1i2i3i4 format
99 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
100
101 iValue = _mm256_mul_ps(iValue, vScalar);
102
103 iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
104 a = _mm256_cvtps_epi32(iValue);
105 a = _mm256_packs_epi32(a, a);
106 a = _mm256_permutevar8x32_epi32(a, idx);
107 b = _mm256_extracti128_si256(a, 0);
108
109 _mm_store_si128((__m128i*)iBufferPtr, b);
110 iBufferPtr += 8;
111 }
112
113 number = eighthPoints * 8;
114 iBufferPtr = &iBuffer[number];
115 for (; number < num_points; number++) {
116 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
117 complexVectorPtr++;
118 }
119}
120
121
122#endif /* LV_HAVE_AVX2 */
123
124#ifdef LV_HAVE_SSE
125#include <xmmintrin.h>
126
127static inline void
129 const lv_32fc_t* complexVector,
130 const float scalar,
131 unsigned int num_points)
132{
133 unsigned int number = 0;
134 const unsigned int quarterPoints = num_points / 4;
135
136 const float* complexVectorPtr = (float*)complexVector;
137 int16_t* iBufferPtr = iBuffer;
138
139 __m128 vScalar = _mm_set_ps1(scalar);
140
141 __m128 cplxValue1, cplxValue2, iValue;
142
143 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
144
145 for (; number < quarterPoints; number++) {
146 cplxValue1 = _mm_load_ps(complexVectorPtr);
147 complexVectorPtr += 4;
148
149 cplxValue2 = _mm_load_ps(complexVectorPtr);
150 complexVectorPtr += 4;
151
152 // Arrange in i1i2i3i4 format
153 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
154
155 iValue = _mm_mul_ps(iValue, vScalar);
156
157 _mm_store_ps(floatBuffer, iValue);
158 *iBufferPtr++ = (int16_t)(floatBuffer[0]);
159 *iBufferPtr++ = (int16_t)(floatBuffer[1]);
160 *iBufferPtr++ = (int16_t)(floatBuffer[2]);
161 *iBufferPtr++ = (int16_t)(floatBuffer[3]);
162 }
163
164 number = quarterPoints * 4;
165 iBufferPtr = &iBuffer[number];
166 for (; number < num_points; number++) {
167 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
168 complexVectorPtr++;
169 }
170}
171
172#endif /* LV_HAVE_SSE */
173
174
175#ifdef LV_HAVE_GENERIC
176
177static inline void
179 const lv_32fc_t* complexVector,
180 const float scalar,
181 unsigned int num_points)
182{
183 const float* complexVectorPtr = (float*)complexVector;
184 int16_t* iBufferPtr = iBuffer;
185 unsigned int number = 0;
186 for (number = 0; number < num_points; number++) {
187 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
188 complexVectorPtr++;
189 }
190}
191
192#endif /* LV_HAVE_GENERIC */
193
194#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H */
195
196#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
197#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
198
199#include <inttypes.h>
200#include <stdio.h>
201#include <volk/volk_common.h>
202
203#ifdef LV_HAVE_AVX2
204#include <immintrin.h>
205
206static inline void
207volk_32fc_s32f_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
208 const lv_32fc_t* complexVector,
209 const float scalar,
210 unsigned int num_points)
211{
212 unsigned int number = 0;
213 const unsigned int eighthPoints = num_points / 8;
214
215 const float* complexVectorPtr = (float*)complexVector;
216 int16_t* iBufferPtr = iBuffer;
217
218 __m256 vScalar = _mm256_set1_ps(scalar);
219
220 __m256 cplxValue1, cplxValue2, iValue;
221 __m256i a;
222 __m128i b;
223
224 __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
225
226 for (; number < eighthPoints; number++) {
227 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
228 complexVectorPtr += 8;
229
230 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
231 complexVectorPtr += 8;
232
233 // Arrange in i1i2i3i4 format
234 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
235
236 iValue = _mm256_mul_ps(iValue, vScalar);
237
238 iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
239 a = _mm256_cvtps_epi32(iValue);
240 a = _mm256_packs_epi32(a, a);
241 a = _mm256_permutevar8x32_epi32(a, idx);
242 b = _mm256_extracti128_si256(a, 0);
243
244 _mm_storeu_si128((__m128i*)iBufferPtr, b);
245 iBufferPtr += 8;
246 }
247
248 number = eighthPoints * 8;
249 iBufferPtr = &iBuffer[number];
250 for (; number < num_points; number++) {
251 *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
252 complexVectorPtr++;
253 }
254}
255
256#endif /* LV_HAVE_AVX2 */
257
258#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H */
FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
Definition sse2neon.h:5937
float32x4_t __m128
Definition sse2neon.h:235
#define _mm_shuffle_ps(a, b, imm)
Definition sse2neon.h:2586
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition sse2neon.h:2205
FORCE_INLINE __m128 _mm_set_ps1(float)
Definition sse2neon.h:2437
FORCE_INLINE void _mm_storeu_si128(__m128i *p, __m128i a)
Definition sse2neon.h:6010
#define _MM_FROUND_TO_ZERO
Definition sse2neon.h:202
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition sse2neon.h:195
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition sse2neon.h:1858
int64x2_t __m128i
Definition sse2neon.h:244
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition sse2neon.h:2704
static void volk_32fc_s32f_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition volk_32fc_s32f_deinterleave_real_16i.h:178
static void volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t *iBuffer, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition volk_32fc_s32f_deinterleave_real_16i.h:128
#define __VOLK_ATTR_ALIGNED(x)
Definition volk_common.h:65
float complex lv_32fc_t
Definition volk_complex.h:74