fixed some issues with AVX machines

This commit is contained in:
Ismael Gomez 2017-06-09 13:03:47 +02:00
parent 3cbf403c54
commit 215dac6662
8 changed files with 44 additions and 38 deletions

View File

@ -37,19 +37,19 @@ extern "C" {
SRSLTE_API int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len);
SRSLTE_API int srslte_vec_dot_prod_sss_avx(short *x, short *y, uint32_t len);
SRSLTE_API int srslte_vec_dot_prod_sss_avx2(short *x, short *y, uint32_t len);
SRSLTE_API void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_sss_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_sss_avx2(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_sss_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_sss_avx2(short *x, short *y, short *z, uint32_t len);
@ -57,12 +57,12 @@ SRSLTE_API void srslte_vec_sub_sss_avx(short *x, short *y, short *z, uint32_t le
SRSLTE_API void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_avx2(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_sse(short *x, int n_rightshift, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_avx2(short *x, int k, short *z, uint32_t len);

View File

@ -122,7 +122,7 @@ void free37_sse(void *o) {
#ifdef LV_HAVE_AVX
#ifdef LV_HAVE_AVX2
int decode37_avx2(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) {
srslte_viterbi_t *q = o;
@ -333,7 +333,7 @@ int init37_neon(srslte_viterbi_t *q, int poly[3], uint32_t framebits, bool tail_
#endif
#ifdef LV_HAVE_AVX
#ifdef LV_HAVE_AVX2
int init37_avx2(srslte_viterbi_t *q, int poly[3], uint32_t framebits, bool tail_biting) {
q->K = 7;
q->R = 3;
@ -383,7 +383,7 @@ int srslte_viterbi_init(srslte_viterbi_t *q, srslte_viterbi_type_t type, int pol
switch (type) {
case SRSLTE_VITERBI_37:
#ifdef LV_HAVE_SSE
#ifdef LV_HAVE_AVX
#ifdef LV_HAVE_AVX2
return init37_avx2(q, poly, max_frame_length, tail_bitting);
#else
return init37_sse(q, poly, max_frame_length, tail_bitting);
@ -408,7 +408,7 @@ int srslte_viterbi_init_sse(srslte_viterbi_t *q, srslte_viterbi_type_t type, int
}
#endif
#ifdef LV_HAVE_AVX
#ifdef LV_HAVE_AVX2
int srslte_viterbi_init_avx2(srslte_viterbi_t *q, srslte_viterbi_type_t type, int poly[3], uint32_t max_frame_length, bool tail_bitting)
{
return init37_avx2(q, poly, max_frame_length, tail_bitting);

View File

@ -14,7 +14,7 @@
//#define DEBUG
#ifdef LV_HAVE_SSE
#ifdef LV_HAVE_AVX2
#include <emmintrin.h>
#include <tmmintrin.h>

View File

@ -450,11 +450,11 @@ int srslte_pdsch_decode_multi(srslte_pdsch_t *q,
if (SRSLTE_VERBOSE_ISDEBUG()) {
DEBUG("SAVED FILE subframe.dat: received subframe symbols\n",0);
srslte_vec_save_file("subframe.dat", sf_symbols, SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t));
srslte_vec_save_file("subframe.dat", sf_symbols[0], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t));
DEBUG("SAVED FILE hest0.dat and hest1.dat: channel estimates for port 0 and port 1\n",0);
srslte_vec_save_file("hest0.dat", ce[0], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t));
srslte_vec_save_file("hest0.dat", ce[0][0], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t));
if (q->cell.nof_ports > 1) {
srslte_vec_save_file("hest1.dat", ce[1], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t));
srslte_vec_save_file("hest1.dat", ce[1][0], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t));
}
DEBUG("SAVED FILE pdsch_symbols.dat: symbols after equalization\n",0);
srslte_vec_save_file("pdsch_symbols.dat", q->d, cfg->nbits.nof_re*sizeof(cf_t));

View File

@ -202,10 +202,12 @@ int main(int argc, char **argv) {
fprintf(stderr, "Error initiating soft buffer\n");
goto quit;
}
srslte_softbuffer_tx_reset(&softbuffer_tx);
if (srslte_softbuffer_rx_init(&softbuffer_rx, 100)) {
fprintf(stderr, "Error initiating soft buffer\n");
goto quit;
}
srslte_softbuffer_rx_reset(&softbuffer_rx);
uint32_t ntrials = 100;

View File

@ -110,8 +110,8 @@ void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) {
}
void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) {
#ifdef LV_HAVE_AVX
srslte_vec_sub_sss_avx(x, y, z, len);
#ifdef LV_HAVE_AVX2
srslte_vec_sub_sss_avx2(x, y, z, len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_sub_sss_sse(x, y, z, len);
@ -140,8 +140,8 @@ void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len) {
}
void srslte_vec_sum_sss(short *x, short *y, short *z, uint32_t len) {
#ifdef LV_HAVE_AVX
srslte_vec_sum_sss_avx(x, y, z, len);
#ifdef LV_HAVE_AVX2
srslte_vec_sum_sss_avx2(x, y, z, len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_sum_sss_sse(x, y, z, len);
@ -212,8 +212,8 @@ void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len) {
}
void srslte_vec_sc_div2_sss(short *x, int n_rightshift, short *z, uint32_t len) {
#ifdef LV_HAVE_AVX
srslte_vec_sc_div2_sss_avx(x, n_rightshift, z, len);
#ifdef LV_HAVE_AVX2
srslte_vec_sc_div2_sss_avx2(x, n_rightshift, z, len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_sc_div2_sss_sse(x, n_rightshift, z, len);
@ -345,14 +345,14 @@ void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len) {
#endif
}
/* Note: We align memory to 32 bytes (for AVX compatibility)
/* Note: We align memory to 32 bytes (for AVX2 compatibility)
* because in some cases volk can incorrectly detect the architecture.
* This could be inefficient for SSE or non-SIMD platforms but shouldn't
* be a huge problem.
*/
void *srslte_vec_malloc(uint32_t size) {
void *ptr;
if (posix_memalign(&ptr,32,size)) {
if (posix_memalign(&ptr,256,size)) {
return NULL;
} else {
return ptr;
@ -364,7 +364,7 @@ void *srslte_vec_realloc(void *ptr, uint32_t old_size, uint32_t new_size) {
return realloc(ptr, new_size);
#else
void *new_ptr;
if (posix_memalign(&new_ptr,volk_get_alignment(),new_size)) {
if (posix_memalign(&new_ptr,256,new_size)) {
return NULL;
} else {
memcpy(new_ptr, ptr, old_size);
@ -520,8 +520,8 @@ void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len) {
}
void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
#ifdef LV_HAVE_AVX
srslte_vec_prod_sss_avx(x,y,z,len);
#ifdef LV_HAVE_AVX2
srslte_vec_prod_sss_avx2(x,y,z,len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_prod_sss_sse(x,y,z,len);
@ -661,8 +661,8 @@ float srslte_vec_dot_prod_fff(float *x, float *y, uint32_t len) {
}
int32_t srslte_vec_dot_prod_sss(int16_t *x, int16_t *y, uint32_t len) {
#ifdef LV_HAVE_AVX
return srslte_vec_dot_prod_sss_avx(x, y, len);
#ifdef LV_HAVE_AVX2
return srslte_vec_dot_prod_sss_avx2(x, y, len);
#else
#ifdef LV_HAVE_SSE
return srslte_vec_dot_prod_sss_sse(x, y, len);

View File

@ -87,10 +87,10 @@ int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len)
}
int srslte_vec_dot_prod_sss_avx(short *x, short *y, uint32_t len)
int srslte_vec_dot_prod_sss_avx2(short *x, short *y, uint32_t len)
{
int result = 0;
#ifdef LV_HAVE_AVX
#ifdef LV_HAVE_AVX2
unsigned int number = 0;
const unsigned int points = len / 16;
@ -160,9 +160,9 @@ void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len)
}
void srslte_vec_sum_sss_avx(short *x, short *y, short *z, uint32_t len)
void srslte_vec_sum_sss_avx2(short *x, short *y, short *z, uint32_t len)
{
#ifdef LV_HAVE_SSE
#ifdef LV_HAVE_AVX2
unsigned int number = 0;
const unsigned int points = len / 16;
@ -225,9 +225,9 @@ void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len)
#endif
}
void srslte_vec_sub_sss_avx(short *x, short *y, short *z, uint32_t len)
void srslte_vec_sub_sss_avx2(short *x, short *y, short *z, uint32_t len)
{
#ifdef LV_HAVE_AVX
#ifdef LV_HAVE_AVX2
unsigned int number = 0;
const unsigned int points = len / 16;
@ -292,9 +292,9 @@ void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len)
#endif
}
void srslte_vec_prod_sss_avx(short *x, short *y, short *z, uint32_t len)
void srslte_vec_prod_sss_avx2(short *x, short *y, short *z, uint32_t len)
{
#ifdef LV_HAVE_SSE
#ifdef LV_HAVE_AVX2
unsigned int number = 0;
const unsigned int points = len / 16;
@ -359,9 +359,9 @@ void srslte_vec_sc_div2_sss_sse(short *x, int k, short *z, uint32_t len)
#endif
}
void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len)
void srslte_vec_sc_div2_sss_avx2(short *x, int k, short *z, uint32_t len)
{
#ifdef LV_HAVE_AVX
#ifdef LV_HAVE_AVX2
unsigned int number = 0;
const unsigned int points = len / 16;
@ -394,7 +394,11 @@ void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len)
/* No improvement with AVX */
void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len)
{
#ifndef DEBUG_MODE
#ifdef DEBUG_MODE
for (int i=0;i<len;i++) {
y[lut[i]] = x[i];
}
#else
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int points = len / 8;

View File

@ -46,7 +46,7 @@ endif (RPATH)
########################################################################
if (NOT ${BUILDUE_CMD} STREQUAL "")
message(STATUS "Added custom post-build-UE command: ${BUILDUE_CMD}")
add_custom_command(TARGET ue POST_BUILD COMMAND ${BUILDUE_CMD})
add_custom_command(TARGET srsue POST_BUILD COMMAND ${BUILDUE_CMD})
else(NOT ${BUILDUE_CMD} STREQUAL "")
message(STATUS "No post-build-UE command defined")
endif (NOT ${BUILDUE_CMD} STREQUAL "")