From 681b98ae503e481d72f7b77b9997ad9569e75c83 Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Fri, 25 May 2018 16:05:59 +0200 Subject: [PATCH] Added vector CFO --- CMakeLists.txt | 4 + lib/include/srslte/phy/utils/simd.h | 5 ++ lib/include/srslte/phy/utils/vector.h | 3 + lib/include/srslte/phy/utils/vector_simd.h | 3 + lib/src/phy/sync/cfo.c | 17 ++++- lib/src/phy/utils/test/vector_test.c | 86 +++++++++++++++++++++- lib/src/phy/utils/vector.c | 6 +- lib/src/phy/utils/vector_simd.c | 51 +++++++++++++ 8 files changed, 172 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e30c18d9d..146c061d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -284,6 +284,10 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") endif(HAVE_AVX) endif (HAVE_AVX2) + if (HAVE_FMA) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma -DLV_HAVE_FMA") + endif (HAVE_FMA) + if (HAVE_AVX512) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512") diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index e9cb4da30..a7809136e 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -789,10 +789,15 @@ static inline simd_cf_t srslte_simd_cf_prod (simd_cf_t a, simd_cf_t b) { _mm512_mul_ps(a.im, b.re)); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 +#ifdef LV_HAVE_FMA + ret.re = _mm256_fmsub_ps(a.re, b.re, _mm256_mul_ps(a.im, b.im)); + ret.im = _mm256_fmadd_ps(a.re, b.im, _mm256_mul_ps(a.im, b.re)); +#else /* LV_HAVE_FMA */ ret.re = _mm256_sub_ps(_mm256_mul_ps(a.re, b.re), _mm256_mul_ps(a.im, b.im)); ret.im = _mm256_add_ps(_mm256_mul_ps(a.re, b.im), _mm256_mul_ps(a.im, b.re)); +#endif /* LV_HAVE_FMA */ #else #ifdef LV_HAVE_SSE ret.re = _mm_sub_ps(_mm_mul_ps(a.re, b.re), diff --git a/lib/include/srslte/phy/utils/vector.h b/lib/include/srslte/phy/utils/vector.h index d6742fb3c..bf394944f 100644 --- a/lib/include/srslte/phy/utils/vector.h +++ b/lib/include/srslte/phy/utils/vector.h @@ -156,6 +156,9 @@ SRSLTE_API void srslte_vec_interleave(const cf_t *x, const cf_t *y, cf_t *z, con SRSLTE_API void srslte_vec_interleave_add(const cf_t *x, const cf_t *y, cf_t *z, const int len); +SRSLTE_API void srslte_vec_apply_cfo(const cf_t *x, float cfo, cf_t *z, int len); + + #ifdef __cplusplus } #endif diff --git a/lib/include/srslte/phy/utils/vector_simd.h b/lib/include/srslte/phy/utils/vector_simd.h index 11c6ce1cd..fae19aa02 100644 --- a/lib/include/srslte/phy/utils/vector_simd.h +++ b/lib/include/srslte/phy/utils/vector_simd.h @@ -130,6 +130,9 @@ SRSLTE_API void srslte_vec_interleave_simd(const cf_t *x, const cf_t *y, cf_t *z SRSLTE_API void srslte_vec_interleave_add_simd(const cf_t *x, const cf_t *y, cf_t *z, const int len); +SRSLTE_API void srslte_vec_apply_cfo_simd(const cf_t *x, float cfo, cf_t *z, int len); + + /* SIMD Find Max functions */ SRSLTE_API uint32_t srslte_vec_max_fi_simd(const float *x, const int len); diff --git a/lib/src/phy/sync/cfo.c b/lib/src/phy/sync/cfo.c index 806701a9a..a1529be13 100644 --- a/lib/src/phy/sync/cfo.c +++ b/lib/src/phy/sync/cfo.c @@ -34,7 +34,11 @@ #include "srslte/phy/utils/vector.h" #include "srslte/phy/utils/debug.h" +/* Set next macro to 1 for using table generated CFO compensation */ +#define SRSLTE_CFO_USE_EXP_TABLE 0 + int srslte_cfo_init(srslte_cfo_t *h, uint32_t nsamples) { +#if SRSLTE_CFO_USE_EXP_TABLE int ret = SRSLTE_ERROR; bzero(h, sizeof(srslte_cfo_t)); @@ -57,13 +61,19 @@ clean: srslte_cfo_free(h); } return ret; +#else /* SRSLTE_CFO_USE_EXP_TABLE */ + h->nsamples = nsamples; + return SRSLTE_SUCCESS; +#endif /* SRSLTE_CFO_USE_EXP_TABLE */ } void srslte_cfo_free(srslte_cfo_t *h) { +#if SRSLTE_CFO_USE_EXP_TABLE srslte_cexptab_free(&h->tab); if (h->cur_cexp) { free(h->cur_cexp); } +#endif /* SRSLTE_CFO_USE_EXP_TABLE */ bzero(h, sizeof(srslte_cfo_t)); } @@ -72,6 +82,7 @@ void srslte_cfo_set_tol(srslte_cfo_t *h, float tol) { } int srslte_cfo_resize(srslte_cfo_t *h, uint32_t samples) { +#if SRSLTE_CFO_USE_EXP_TABLE if (samples <= h->max_samples) { srslte_cexptab_gen(&h->tab, h->cur_cexp, h->last_freq, samples); h->nsamples = samples; @@ -79,15 +90,19 @@ int srslte_cfo_resize(srslte_cfo_t *h, uint32_t samples) { fprintf(stderr, "Error in cfo_resize(): nof_samples must be lower than initialized\n"); return SRSLTE_ERROR; } - +#endif /* SRSLTE_CFO_USE_EXP_TABLE */ return SRSLTE_SUCCESS; } void srslte_cfo_correct(srslte_cfo_t *h, const cf_t *input, cf_t *output, float freq) { +#if SRSLTE_CFO_USE_EXP_TABLE if (fabs(h->last_freq - freq) > h->tol) { h->last_freq = freq; srslte_cexptab_gen(&h->tab, h->cur_cexp, h->last_freq, h->nsamples); DEBUG("CFO generating new table for frequency %.4fe-6\n", freq*1e6); } srslte_vec_prod_ccc(h->cur_cexp, input, output, h->nsamples); +#else /* SRSLTE_CFO_USE_EXP_TABLE */ + srslte_vec_apply_cfo(input, freq, output, h->nsamples); +#endif /* SRSLTE_CFO_USE_EXP_TABLE */ } diff --git a/lib/src/phy/utils/test/vector_test.c b/lib/src/phy/utils/test/vector_test.c index 623fdb526..30f80c07d 100644 --- a/lib/src/phy/utils/test/vector_test.c +++ b/lib/src/phy/utils/test/vector_test.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "srslte/phy/utils/mat.h" #include "srslte/phy/utils/simd.h" @@ -729,6 +730,80 @@ TEST(srslte_vec_max_abs_ci, free(x); ) +TEST(srslte_vec_apply_cfo, + MALLOC(cf_t, x); + MALLOC(cf_t, z); + + const float cfo = 0.1f; + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_apply_cfo(x, cfo, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * cexpf(_Complex_I * 2.0f * (float) M_PI * i * cfo); + mse += cabsf(gold - z[i]) / cabsf(gold); + } + mse /= block_size; + + free(x); + free(z); +) + +TEST(srslte_cfo_correct, + srslte_cfo_t srslte_cfo = {0}; + MALLOC(cf_t, x); + MALLOC(cf_t, z); + + const float cfo = 0.1f; + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + srslte_cfo_init(&srslte_cfo, block_size); + + TEST_CALL(srslte_cfo_correct(&srslte_cfo, x, z, cfo)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * cexpf(_Complex_I * 2.0f * (float) M_PI * i * cfo); + mse += cabsf(gold - z[i]) / cabsf(gold); + } + mse /= block_size; + + free(x); + free(z); + srslte_cfo_free(&srslte_cfo); +) + +TEST(srslte_cfo_correct_change, + srslte_cfo_t srslte_cfo = {0}; + MALLOC(cf_t, x); + MALLOC(cf_t, z); + + float cfo = 0.1f; + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + srslte_cfo_init(&srslte_cfo, block_size); + + TEST_CALL(cfo = (i%2)?0.1:-0.1; srslte_cfo_correct(&srslte_cfo, x, z, cfo)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * cexpf(_Complex_I * 2.0f * (float) M_PI * i * cfo); + mse += cabsf(gold - z[i]) / cabsf(gold); + } + mse /= block_size; + + free(x); + free(z); + srslte_cfo_free(&srslte_cfo); +) + int main(int argc, char **argv) { char func_names[MAX_FUNCTIONS][32]; double timmings[MAX_FUNCTIONS][MAX_BLOCKS]; @@ -738,7 +813,7 @@ int main(int argc, char **argv) { bool passed[MAX_FUNCTIONS][MAX_BLOCKS]; bool all_passed = true; - for (uint32_t block_size = 1; block_size <= 1024*8; block_size *= 2) { + for (uint32_t block_size = 1; block_size <= 1024*32; block_size *= 2) { func_count = 0; @@ -827,6 +902,15 @@ int main(int argc, char **argv) { passed[func_count][size_count] = test_srslte_vec_max_abs_ci(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; + passed[func_count][size_count] = test_srslte_vec_apply_cfo(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_cfo_correct(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_cfo_correct_change(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + sizes[size_count] = block_size; size_count++; } diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index 6c55ddefb..79e6f92b8 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -437,4 +437,8 @@ void srslte_vec_interleave(const cf_t *x, const cf_t *y, cf_t *z, const int len) void srslte_vec_interleave_add(const cf_t *x, const cf_t *y, cf_t *z, const int len) { srslte_vec_interleave_add_simd(x, y, z, len); -} \ No newline at end of file +} + +void srslte_vec_apply_cfo(const cf_t *x, float cfo, cf_t *z, int len) { + srslte_vec_apply_cfo_simd(x, cfo, z, len); +} diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 732e8eea2..2835a58cf 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -1251,3 +1251,54 @@ void srslte_vec_interleave_add_simd(const cf_t *x, const cf_t *y, cf_t *z, const z[k++] += y[i]; } } + +void srslte_vec_apply_cfo_simd(const cf_t *x, float cfo, cf_t *z, int len) { + const float TWOPI = 2.0f * (float) M_PI; + int i = 0; + +#if SRSLTE_SIMD_CF_SIZE + __attribute__ ((aligned (SRSLTE_SIMD_BIT_ALIGN/8))) cf_t _osc[SRSLTE_SIMD_CF_SIZE]; + __attribute__ ((aligned (SRSLTE_SIMD_BIT_ALIGN/8))) cf_t _phase[SRSLTE_SIMD_CF_SIZE]; + + if (i < len - SRSLTE_SIMD_CF_SIZE + 1) { + for (int k = 0; k < SRSLTE_SIMD_CF_SIZE; k++) { + _osc[k] = cexpf(_Complex_I * TWOPI * cfo * SRSLTE_SIMD_CF_SIZE); + _phase[k] = cexpf(_Complex_I * TWOPI * cfo * k); + } + } + simd_cf_t _simd_osc = srslte_simd_cfi_load(_osc); + simd_cf_t _simd_phase = srslte_simd_cfi_load(_phase); + + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) { + simd_cf_t a = srslte_simd_cfi_load(&x[i]); + + simd_cf_t r = srslte_simd_cf_prod(a, _simd_phase); + + srslte_simd_cfi_store(&z[i], r); + + _simd_phase = srslte_simd_cf_prod(_simd_phase, _simd_osc); + + } + } else { + for (; i < len - SRSLTE_SIMD_F_SIZE + 1; i += SRSLTE_SIMD_F_SIZE) { + for (; i < len - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) { + simd_cf_t a = srslte_simd_cfi_loadu(&x[i]); + + simd_cf_t r = srslte_simd_cf_prod(a, _simd_phase); + _simd_phase = srslte_simd_cf_prod(_simd_phase, _simd_osc); + + srslte_simd_cfi_storeu(&z[i], r); + } + } + } +#endif + cf_t osc = cexpf(_Complex_I * TWOPI * cfo); + cf_t phase = cexpf(_Complex_I * TWOPI * cfo * i); + for (; i < len; i++) { + z[i] = x[i] * phase; + + phase *= osc; + } +} +