Added int16 to float SIMD vector function

This commit is contained in:
Xavier Arteaga 2018-02-21 14:50:41 +01:00
parent 4df6227f5b
commit ff5ac85c7d
4 changed files with 59 additions and 5 deletions

View File

@ -120,6 +120,8 @@ SRSLTE_API void srslte_vec_abs_square_cf_simd(const cf_t *x, float *z, const int
/* Other Functions */
SRSLTE_API void srslte_vec_lut_sss_simd(const short *x, const unsigned short *lut, short *y, const int len);
SRSLTE_API void srslte_vec_convert_if_simd(const int16_t *x, float *z, const float scale, const int len);
SRSLTE_API void srslte_vec_convert_fi_simd(const float *x, int16_t *z, const float scale, const int len);
SRSLTE_API void srslte_vec_cp_simd(const cf_t *src, cf_t *dst, int len);

View File

@ -452,6 +452,28 @@ TEST(srslte_vec_convert_fi,
free(z);
)
TEST(srslte_vec_convert_if,
MALLOC(int16_t, x);
MALLOC(float, z);
float scale = 1000.0f;
float gold;
float k = 1.0f/scale;
for (int i = 0; i < block_size; i++) {
x[i] = (int16_t) RANDOM_S();
}
TEST_CALL(srslte_vec_convert_if(x, scale, z, block_size))
for (int i = 0; i < block_size; i++) {
gold = ((float)x[i]) * k;
mse += fabsf(gold - z[i]);
}
free(x);
free(z);
)
TEST(srslte_vec_prod_fff,
MALLOC(float, x);
MALLOC(float, y);
@ -753,6 +775,9 @@ int main(int argc, char **argv) {
passed[func_count][size_count] = test_srslte_vec_convert_fi(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;
passed[func_count][size_count] = test_srslte_vec_convert_if(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;
passed[func_count][size_count] = test_srslte_vec_prod_fff(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;

View File

@ -93,10 +93,7 @@ void srslte_vec_sc_prod_ccc(const cf_t *x, const cf_t h, cf_t *z, const uint32_t
// Used in turbo decoder
void srslte_vec_convert_if(const int16_t *x, const float scale, float *z, const uint32_t len) {
int i;
for (i=0;i<len;i++) {
z[i] = ((float) x[i])/scale;
}
srslte_vec_convert_if_simd(x, z, scale, len);
}
void srslte_vec_convert_fi(const float *x, const float scale, int16_t *z, const uint32_t len) {

View File

@ -228,6 +228,36 @@ void srslte_vec_lut_sss_simd(const short *x, const unsigned short *lut, short *y
}
}
void srslte_vec_convert_if_simd(const int16_t *x, float *z, const float scale, const int len) {
int i = 0;
const float gain = 1.0f / scale;
#ifdef LV_HAVE_SSE
__m128 s = _mm_set1_ps(gain);
if (SRSLTE_IS_ALIGNED(z)) {
for (; i < len - 3; i += 4) {
__m64 *ptr = (__m64 *) &x[i];
__m128 fl = _mm_cvtpi16_ps(*ptr);
__m128 v = _mm_mul_ps(fl, s);
_mm_store_ps(&z[i], v);
}
} else {
for (; i < len - 3; i += 4) {
__m64 *ptr = (__m64 *) &x[i];
__m128 fl = _mm_cvtpi16_ps(*ptr);
__m128 v = _mm_mul_ps(fl, s);
_mm_storeu_ps(&z[i], v);
}
}
#endif /* LV_HAVE_SSE */
for (; i < len; i++) {
z[i] = ((float) x[i]) * gain;
}
}
void srslte_vec_convert_fi_simd(const float *x, int16_t *z, const float scale, const int len) {
int i = 0;
@ -1220,4 +1250,4 @@ void srslte_vec_interleave_add_simd(const cf_t *x, const cf_t *y, cf_t *z, const
z[k++] += x[i];
z[k++] += y[i];
}
}
}