adding simd xor functionality

This commit is contained in:
yagoda 2017-11-27 11:10:50 +00:00
parent 3ebda40580
commit 38903de07c
7 changed files with 190 additions and 6 deletions

View file

@ -1506,4 +1506,119 @@ static inline simd_s_t srslte_simd_convert_2f_s(simd_f_t a, simd_f_t b) {
#endif /* SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_C16_SIZE */
#if SRSLTE_SIMD_B_SIZE
/* Data types */
#ifdef LV_HAVE_AVX512
typedef __m512i simd_b_t;
#else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2
typedef __m256i simd_b_t;
#else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE
typedef __m128i simd_b_t;
#else /* HAVE_NEON */
#ifdef HAVE_NEON
typedef int8x16_t simd_b_t;
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
static inline simd_b_t srslte_simd_b_load(int8_t *ptr){
#ifdef LV_HAVE_AVX512
return _mm512_load_si512(ptr);
#else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2
return _mm256_load_si256((__m256i*) ptr);
#else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE
return _mm_load_si128((__m128i*) ptr);
#else /* LV_HAVE_SSE */
#ifdef HAVE_NEON
return vld1q_s8(ptr);
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
}
static inline simd_b_t srslte_simd_b_loadu(int8_t *ptr){
#ifdef LV_HAVE_AVX512
return _mm512_loadu_si512(ptr);
#else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2
return _mm256_loadu_si256((__m256i*) ptr);
#else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE
return _mm_loadu_si128((__m128i*) ptr);
#else /* LV_HAVE_SSE */
#ifdef HAVE_NEON
return vld1q_s8(ptr);
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
}
static inline void srslte_simd_b_store(int8_t *ptr, simd_b_t simdreg) {
#ifdef LV_HAVE_AVX512
_mm512_store_si512(ptr, simdreg);
#else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2
_mm256_store_si256((__m256i*) ptr, simdreg);
#else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE
_mm_store_si128((__m128i*) ptr, simdreg);
#else /* LV_HAVE_SSE */
#ifdef HAVE_NEON
vst1q_s8( ptr, simdreg);
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
}
static inline void srslte_simd_b_storeu(int8_t *ptr, simd_b_t simdreg) {
#ifdef LV_HAVE_AVX512
_mm512_storeu_si512(ptr, simdreg);
#else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2
_mm256_storeu_si256((__m256i*) ptr, simdreg);
#else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE
_mm_storeu_si128((__m128i*) ptr, simdreg);
#else /* LV_HAVE_SSE */
#ifdef HAVE_NEON
vst1q_s8(ptr, simdreg);
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
}
static inline simd_b_t srslte_simd_b_xor(simd_b_t a, simd_b_t b) {
#ifdef LV_HAVE_AVX512
return _mm512_xor_epi32(a, b);
#else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2
return _mm256_xor_si256(a, b);
#else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE
return _mm_xor_si128 (a, b);
#else /* LV_HAVE_SSE */
#ifdef HAVE_NEON
return veorq_s8(a, b);
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
}
#endif /*SRSLTE_SIMD_B_SIZE */
#endif //SRSLTE_SIMD_H_H

View file

@ -53,6 +53,10 @@ extern "C" {
// Exponential moving average
#define SRSLTE_VEC_EMA(data, average, alpha) ((alpha)*(data)+(1-alpha)*(average))
/*logical operations */
SRSLTE_API void srslte_vec_xor_bbb(int8_t *x,int8_t *y,int8_t *z, uint32_t len);
/** Return the sum of all the elements */
SRSLTE_API float srslte_vec_acc_ff(float *x, uint32_t len);
SRSLTE_API cf_t srslte_vec_acc_cc(cf_t *x, uint32_t len);

View file

@ -53,6 +53,10 @@ extern "C" {
#endif /* LV_HAVE_AVX */
#endif /* LV_HAVE_AVX512 */
/*SIMD Logical operations*/
SRSLTE_API void srslte_vec_xor_bbb_simd(int8_t *x, int8_t *y, int8_t *z, int len);
/* SIMD Basic vector math */
SRSLTE_API void srslte_vec_sum_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len);

View file

@ -60,10 +60,8 @@ void srslte_scrambling_c_offset(srslte_sequence_t *s, cf_t *data, int offset, in
}
void scrambling_b(uint8_t *c, uint8_t *data, int len) {
int i;
for (i = 0; i < len; i++) {
data[i] = (data[i] ^ c[i]);
}
srslte_vec_xor_bbb((int8_t*)c,(int8_t*)data,(int8_t*)data,len);
}
void scrambling_b_word(uint8_t *c, uint8_t *data, int len) {

View file

@ -47,8 +47,10 @@ bool verbose = false;
#define MAX_FUNCTIONS (64)
#define MAX_BLOCKS (16)
#define RANDOM_F() ((float)rand())/((float)RAND_MAX)
#define RANDOM_S() ((int16_t)(rand() && 0x800F))
#define RANDOM_B() ((int8_t)(rand() && 0x8008))
#define RANDOM_CF() (RANDOM_F() + _Complex_I*RANDOM_F())
#define TEST_CALL(TEST_CODE) gettimeofday(&start, NULL);\
@ -87,6 +89,29 @@ float squared_error (cf_t a, cf_t b) {
return diff_re*diff_re + diff_im*diff_im;
}
TEST(srslte_vec_xor_bbb,
MALLOC(int8_t, x);
MALLOC(int8_t, y);
MALLOC(int8_t, z);
cf_t gold = 0.0f;
for (int i = 0; i < block_size; i++) {
x[i] = RANDOM_B();
y[i] = RANDOM_B();
}
TEST_CALL(srslte_vec_xor_bbb(x, y, z, block_size))
for (int i = 0; i < block_size; i++) {
gold = x[i] ^ y[i];
mse += cabsf(gold - z[i]);
}
free(x);
free(y);
free(z);
)
TEST(srslte_vec_acc_ff,
MALLOC(float, x);
float z;
@ -613,8 +638,8 @@ TEST(srslte_vec_div_fff,
cf_t gold;
for (int i = 0; i < block_size; i++) {
x[i] = RANDOM_F();
y[i] = RANDOM_F();
x[i] = RANDOM_F() + 0.0001;
y[i] = RANDOM_F()+ 0.0001;
}
TEST_CALL(srslte_vec_div_fff(x, y, z, block_size))
@ -690,6 +715,11 @@ int main(int argc, char **argv) {
for (uint32_t block_size = 1; block_size <= 1024*8; block_size *= 2) {
func_count = 0;
passed[func_count][size_count] = test_srslte_vec_xor_bbb(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;
passed[func_count][size_count] = test_srslte_vec_acc_ff(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;

View file

@ -37,6 +37,10 @@
void srslte_vec_xor_bbb(int8_t *x,int8_t *y,int8_t *z, uint32_t len) {
srslte_vec_xor_bbb_simd(x, y, z, len);
}
// Used in PRACH detector, AGC and chest_dl for noise averaging
float srslte_vec_acc_ff(float *x, uint32_t len) {
return srslte_vec_acc_ff_simd(x, len);

View file

@ -37,6 +37,35 @@
#include "srslte/phy/utils/simd.h"
void srslte_vec_xor_bbb_simd(int8_t *x, int8_t *y, int8_t *z, int len) {
int i = 0;
#if SRSLTE_SIMD_B_SIZE
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
simd_b_t a = srslte_simd_b_load(&x[i]);
simd_b_t b = srslte_simd_b_load(&y[i]);
simd_b_t r = srslte_simd_b_xor(a, b);
srslte_simd_b_store(&z[i], r);
}
} else {
for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
simd_b_t a = srslte_simd_b_loadu(&x[i]);
simd_b_t b = srslte_simd_b_loadu(&y[i]);
simd_s_t r = srslte_simd_b_xor(a, b);
srslte_simd_b_storeu(&z[i], r);
}
}
#endif /* SRSLTE_SIMD_B_SIZE */
for(; i < len; i++){
z[i] = x[i] ^ y[i];
}
}
int srslte_vec_dot_prod_sss_simd(int16_t *x, int16_t *y, int len) {
int i = 0;
int result = 0;