New optimization on the PHY for both UE and eNodeB (#251)
* New parallel Turbodecoder implementation in SSE/AVX 16-bit and 8-bit * Optimised UL Interleaver * Include TB CRC calculation in FEC encoder * New threading priorities
This commit is contained in:
parent
695990f297
commit
bc9d342959
|
@ -279,8 +279,8 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
|||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ggdb -DBUILD_TYPE_RELWITHDEBINFO")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -DBUILD_TYPE_RELWITHDEBINFO")
|
||||
else(${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -DBUILD_TYPE_RELEASE")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -DBUILD_TYPE_RELEASE")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -fno-trapping-math -fno-math-errno -DBUILD_TYPE_RELEASE")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fno-trapping-math -fno-math-errno -DBUILD_TYPE_RELEASE")
|
||||
endif(${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
|
||||
endif(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
||||
|
||||
|
@ -325,9 +325,9 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
|||
endif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm")
|
||||
set(CMAKE_REQUIRED_FLAGS ${CMAKE_C_FLAGS})
|
||||
|
||||
if(NOT HAVE_SSE AND NOT HAVE_NEON AND NOT DISABLE_SIMD)
|
||||
message(FATAL_ERROR "no SIMD instructions found")
|
||||
endif(NOT HAVE_SSE AND NOT HAVE_NEON AND NOT DISABLE_SIMD)
|
||||
if(NOT HAVE_SSE AND NOT HAVE_NEON AND NOT DISABLE_SIMD)
|
||||
message(FATAL_ERROR "no SIMD instructions found")
|
||||
endif(NOT HAVE_SSE AND NOT HAVE_NEON AND NOT DISABLE_SIMD)
|
||||
|
||||
if(NOT WIN32)
|
||||
ADD_CXX_COMPILER_FLAG_IF_AVAILABLE(-fvisibility=hidden HAVE_VISIBILITY_HIDDEN)
|
||||
|
|
|
@ -52,6 +52,14 @@
|
|||
#endif
|
||||
|
||||
|
||||
// Useful macros for templates
|
||||
#define CONCAT(a, b) a##b
|
||||
#define CONCAT2(a, b) CONCAT(a,b)
|
||||
|
||||
#define STRING2(x) #x
|
||||
#define STRING(x) STRING2(x)
|
||||
|
||||
|
||||
// Common error codes
|
||||
#define SRSLTE_SUCCESS 0
|
||||
#define SRSLTE_ERROR -1
|
||||
|
|
|
@ -545,6 +545,7 @@ typedef struct {
|
|||
bool sic_pss_enabled;
|
||||
float rx_gain_offset;
|
||||
bool pdsch_csi_enabled;
|
||||
bool pdsch_8bit_decoder;
|
||||
uint32_t intra_freq_meas_len_ms;
|
||||
uint32_t intra_freq_meas_period_ms;
|
||||
} phy_args_t;
|
||||
|
|
|
@ -44,6 +44,7 @@ typedef struct SRSLTE_API {
|
|||
uint8_t *c_bytes;
|
||||
float *c_float;
|
||||
short *c_short;
|
||||
int8_t *c_char;
|
||||
uint32_t cur_len;
|
||||
uint32_t max_len;
|
||||
} srslte_sequence_t;
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#define SRSLTE_RM_TURBO_H
|
||||
|
||||
#include "srslte/config.h"
|
||||
#include "srslte/phy/fec/turbodecoder.h"
|
||||
|
||||
#ifndef SRSLTE_RX_NULL
|
||||
#define SRSLTE_RX_NULL 10000
|
||||
|
@ -47,7 +48,6 @@
|
|||
|
||||
#include "srslte/config.h"
|
||||
|
||||
|
||||
SRSLTE_API int srslte_rm_turbo_tx(uint8_t *w_buff,
|
||||
uint32_t buff_len,
|
||||
uint8_t *input,
|
||||
|
@ -82,7 +82,19 @@ SRSLTE_API int srslte_rm_turbo_rx_lut(int16_t *input,
|
|||
int16_t *output,
|
||||
uint32_t in_len,
|
||||
uint32_t cb_idx,
|
||||
uint32_t rv_idx);
|
||||
uint32_t rv_idx);
|
||||
|
||||
SRSLTE_API int srslte_rm_turbo_rx_lut_(int16_t *input,
|
||||
int16_t *output,
|
||||
uint32_t in_len,
|
||||
uint32_t cb_idx,
|
||||
uint32_t rv_idx,
|
||||
bool enable_input_tdec);
|
||||
|
||||
SRSLTE_API int srslte_rm_turbo_rx_lut_8bit(int8_t *input,
|
||||
int8_t *output,
|
||||
uint32_t in_len,
|
||||
uint32_t cb_idx,
|
||||
uint32_t rv_idx);
|
||||
|
||||
#endif // SRSLTE_RM_TURBO_H
|
||||
|
|
|
@ -1,58 +1,59 @@
|
|||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
/**********************************************************************************************
|
||||
* File: tc_interl.h
|
||||
*
|
||||
* Description: Turbo code interleaver.
|
||||
*
|
||||
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2.3
|
||||
*********************************************************************************************/
|
||||
|
||||
#ifndef SRSLTE_TC_INTERL_H
|
||||
#define SRSLTE_TC_INTERL_H
|
||||
|
||||
#include "srslte/config.h"
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
uint16_t *forward;
|
||||
uint16_t *reverse;
|
||||
uint32_t max_long_cb;
|
||||
} srslte_tc_interl_t;
|
||||
|
||||
SRSLTE_API int srslte_tc_interl_LTE_gen(srslte_tc_interl_t *h,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tc_interl_UMTS_gen(srslte_tc_interl_t *h,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tc_interl_init(srslte_tc_interl_t *h,
|
||||
uint32_t max_long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tc_interl_free(srslte_tc_interl_t *h);
|
||||
|
||||
#endif // SRSLTE_TC_INTERL_H
|
||||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
/**********************************************************************************************
|
||||
* File: tc_interl.h
|
||||
*
|
||||
* Description: Turbo code interleaver.
|
||||
*
|
||||
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2.3
|
||||
*********************************************************************************************/
|
||||
|
||||
#ifndef SRSLTE_TC_INTERL_H
|
||||
#define SRSLTE_TC_INTERL_H
|
||||
|
||||
#include "srslte/config.h"
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
uint16_t *forward;
|
||||
uint16_t *reverse;
|
||||
uint32_t max_long_cb;
|
||||
} srslte_tc_interl_t;
|
||||
|
||||
SRSLTE_API int srslte_tc_interl_LTE_gen(srslte_tc_interl_t *h,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tc_interl_LTE_gen_interl(srslte_tc_interl_t *h,
|
||||
uint32_t long_cb,
|
||||
uint32_t interl_win);
|
||||
|
||||
SRSLTE_API int srslte_tc_interl_init(srslte_tc_interl_t *h,
|
||||
uint32_t max_long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tc_interl_free(srslte_tc_interl_t *h);
|
||||
|
||||
#endif // SRSLTE_TC_INTERL_H
|
||||
|
|
|
@ -70,10 +70,12 @@ SRSLTE_API int srslte_tcod_encode(srslte_tcod_t *h,
|
|||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tcod_encode_lut(srslte_tcod_t *h,
|
||||
srslte_crc_t *crc,
|
||||
uint8_t *input,
|
||||
srslte_crc_t *crc_tb,
|
||||
srslte_crc_t *crc_cb,
|
||||
uint8_t *input,
|
||||
uint8_t *parity,
|
||||
uint32_t cblen_idx);
|
||||
uint32_t cblen_idx,
|
||||
bool last_cb);
|
||||
|
||||
SRSLTE_API void srslte_tcod_gentable();
|
||||
|
||||
|
|
|
@ -1,120 +1,143 @@
|
|||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
/**********************************************************************************************
|
||||
* File: turbodecoder.h
|
||||
*
|
||||
* Description: Turbo Decoder.
|
||||
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
|
||||
* encoders and one turbo code internal interleaver. The coding rate of turbo
|
||||
* encoder is 1/3.
|
||||
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
|
||||
*
|
||||
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
|
||||
*********************************************************************************************/
|
||||
|
||||
#ifndef SRSLTE_TURBODECODER_H
|
||||
#define SRSLTE_TURBODECODER_H
|
||||
|
||||
#include "srslte/config.h"
|
||||
#include "srslte/phy/fec/tc_interl.h"
|
||||
#include "srslte/phy/fec/cbsegm.h"
|
||||
|
||||
#define SRSLTE_TCOD_RATE 3
|
||||
#define SRSLTE_TCOD_TOTALTAIL 12
|
||||
|
||||
#define SRSLTE_TCOD_MAX_LEN_CB 6144
|
||||
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
|
||||
|
||||
#include "srslte/phy/fec/turbodecoder_gen.h"
|
||||
#include "srslte/phy/fec/turbodecoder_simd.h"
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
float *input_conv;
|
||||
union {
|
||||
srslte_tdec_simd_t tdec_simd;
|
||||
srslte_tdec_gen_t tdec_gen;
|
||||
};
|
||||
} srslte_tdec_t;
|
||||
|
||||
SRSLTE_API int srslte_tdec_init(srslte_tdec_t * h,
|
||||
uint32_t max_long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_free(srslte_tdec_t * h);
|
||||
|
||||
SRSLTE_API int srslte_tdec_reset(srslte_tdec_t * h,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tdec_reset_cb(srslte_tdec_t * h,
|
||||
uint32_t cb_idx);
|
||||
|
||||
SRSLTE_API int srslte_tdec_get_nof_iterations_cb(srslte_tdec_t * h,
|
||||
uint32_t cb_idx);
|
||||
|
||||
SRSLTE_API uint32_t srslte_tdec_get_nof_parallel(srslte_tdec_t * h);
|
||||
|
||||
SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h,
|
||||
int16_t* input,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_decision(srslte_tdec_t * h,
|
||||
uint8_t *output,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_decision_byte(srslte_tdec_t * h,
|
||||
uint8_t *output,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h,
|
||||
int16_t * input,
|
||||
uint8_t *output,
|
||||
uint32_t nof_iterations,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_iteration_par(srslte_tdec_t * h,
|
||||
int16_t* input[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_decision_par(srslte_tdec_t * h,
|
||||
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_decision_byte_par(srslte_tdec_t * h,
|
||||
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_decision_byte_par_cb(srslte_tdec_t * h,
|
||||
uint8_t *output,
|
||||
uint32_t cb_idx,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tdec_run_all_par(srslte_tdec_t * h,
|
||||
int16_t * input[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t nof_iterations,
|
||||
uint32_t long_cb);
|
||||
|
||||
#endif // SRSLTE_TURBODECODER_H
|
||||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
/**********************************************************************************************
|
||||
* File: turbodecoder.h
|
||||
*
|
||||
* Description: Turbo Decoder.
|
||||
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
|
||||
* encoders and one turbo code internal interleaver. The coding rate of turbo
|
||||
* encoder is 1/3.
|
||||
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
|
||||
*
|
||||
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
|
||||
*********************************************************************************************/
|
||||
|
||||
#ifndef SRSLTE_TURBODECODER_H
|
||||
#define SRSLTE_TURBODECODER_H
|
||||
|
||||
#include "srslte/config.h"
|
||||
#include "srslte/phy/fec/tc_interl.h"
|
||||
#include "srslte/phy/fec/cbsegm.h"
|
||||
|
||||
#define SRSLTE_TCOD_RATE 3
|
||||
#define SRSLTE_TCOD_TOTALTAIL 12
|
||||
|
||||
#define SRSLTE_TCOD_MAX_LEN_CB 6144
|
||||
|
||||
// Expect the input to be aligned for sub-block window processing.
|
||||
#define SRSLTE_TDEC_EXPECT_INPUT_SB 1
|
||||
|
||||
// Include interfaces for 8 and 16 bit decoder implementations
|
||||
#define LLR_IS_8BIT
|
||||
#include "srslte/phy/fec/turbodecoder_impl.h"
|
||||
#undef LLR_IS_8BIT
|
||||
|
||||
#define LLR_IS_16BIT
|
||||
#include "srslte/phy/fec/turbodecoder_impl.h"
|
||||
#undef LLR_IS_16BIT
|
||||
|
||||
#define SRSLTE_TDEC_NOF_AUTO_MODES_8 2
|
||||
#define SRSLTE_TDEC_NOF_AUTO_MODES_16 3
|
||||
|
||||
typedef enum {SRSLTE_TDEC_8, SRSLTE_TDEC_16} srslte_tdec_llr_type_t;
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
uint32_t max_long_cb;
|
||||
|
||||
void *dec8_hdlr[SRSLTE_TDEC_NOF_AUTO_MODES_8];
|
||||
void *dec16_hdlr[SRSLTE_TDEC_NOF_AUTO_MODES_16];
|
||||
srslte_tdec_8bit_impl_t *dec8[SRSLTE_TDEC_NOF_AUTO_MODES_8];
|
||||
srslte_tdec_16bit_impl_t *dec16[SRSLTE_TDEC_NOF_AUTO_MODES_16];
|
||||
int nof_blocks8[SRSLTE_TDEC_NOF_AUTO_MODES_8];
|
||||
int nof_blocks16[SRSLTE_TDEC_NOF_AUTO_MODES_16];
|
||||
|
||||
// Declare as void types as can be int8 or int16
|
||||
void *app1;
|
||||
void *app2;
|
||||
void *ext1;
|
||||
void *ext2;
|
||||
void *syst0;
|
||||
void *parity0;
|
||||
void *parity1;
|
||||
|
||||
void *input_conv;
|
||||
|
||||
bool force_not_sb;
|
||||
|
||||
srslte_tdec_impl_type_t dec_type;
|
||||
|
||||
srslte_tdec_llr_type_t current_llr_type;
|
||||
uint32_t current_dec;
|
||||
uint32_t current_long_cb;
|
||||
uint32_t current_inter_idx;
|
||||
int current_cbidx;
|
||||
srslte_tc_interl_t interleaver[4][SRSLTE_NOF_TC_CB_SIZES];
|
||||
int n_iter;
|
||||
} srslte_tdec_t;
|
||||
|
||||
SRSLTE_API int srslte_tdec_init(srslte_tdec_t * h,
|
||||
uint32_t max_long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tdec_init_manual(srslte_tdec_t * h,
|
||||
uint32_t max_long_cb,
|
||||
srslte_tdec_impl_type_t dec_type);
|
||||
|
||||
SRSLTE_API void srslte_tdec_free(srslte_tdec_t * h);
|
||||
|
||||
SRSLTE_API void srslte_tdec_force_not_sb(srslte_tdec_t *h);
|
||||
|
||||
SRSLTE_API int srslte_tdec_new_cb(srslte_tdec_t * h,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tdec_get_nof_iterations(srslte_tdec_t * h);
|
||||
|
||||
SRSLTE_API uint32_t srslte_tdec_autoimp_get_subblocks(uint32_t long_cb);
|
||||
|
||||
SRSLTE_API uint32_t srslte_tdec_autoimp_get_subblocks_8bit(uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h,
|
||||
int16_t* input,
|
||||
uint8_t *output);
|
||||
|
||||
SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h,
|
||||
int16_t * input,
|
||||
uint8_t *output,
|
||||
uint32_t nof_iterations,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_iteration_8bit(srslte_tdec_t * h,
|
||||
int8_t* input,
|
||||
uint8_t *output);
|
||||
|
||||
SRSLTE_API int srslte_tdec_run_all_8bit(srslte_tdec_t * h,
|
||||
int8_t * input,
|
||||
uint8_t *output,
|
||||
uint32_t nof_iterations,
|
||||
uint32_t long_cb);
|
||||
|
||||
|
||||
#endif // SRSLTE_TURBODECODER_H
|
||||
|
|
|
@ -1,99 +1,62 @@
|
|||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
/**********************************************************************************************
|
||||
* File: turbodecoder.h
|
||||
*
|
||||
* Description: Turbo Decoder.
|
||||
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
|
||||
* encoders and one turbo code internal interleaver. The coding rate of turbo
|
||||
* encoder is 1/3.
|
||||
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
|
||||
*
|
||||
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
|
||||
*********************************************************************************************/
|
||||
|
||||
#ifndef SRSLTE_TURBODECODER_GEN_H
|
||||
#define SRSLTE_TURBODECODER_GEN_H
|
||||
|
||||
#include "srslte/config.h"
|
||||
#include "srslte/phy/fec/tc_interl.h"
|
||||
#include "srslte/phy/fec/cbsegm.h"
|
||||
|
||||
#define SRSLTE_TCOD_RATE 3
|
||||
#define SRSLTE_TCOD_TOTALTAIL 12
|
||||
|
||||
#define SRSLTE_TCOD_MAX_LEN_CB 6144
|
||||
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
int max_long_cb;
|
||||
float *beta;
|
||||
} srslte_map_gen_vl_t;
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
int max_long_cb;
|
||||
|
||||
srslte_map_gen_vl_t dec;
|
||||
|
||||
float *llr1;
|
||||
float *llr2;
|
||||
float *w;
|
||||
float *syst;
|
||||
float *parity;
|
||||
|
||||
int current_cbidx;
|
||||
uint32_t current_cb_len;
|
||||
uint32_t n_iter;
|
||||
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
|
||||
} srslte_tdec_gen_t;
|
||||
|
||||
SRSLTE_API int srslte_tdec_gen_init(srslte_tdec_gen_t * h,
|
||||
uint32_t max_long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_gen_free(srslte_tdec_gen_t * h);
|
||||
|
||||
SRSLTE_API int srslte_tdec_gen_reset(srslte_tdec_gen_t * h, uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_gen_iteration(srslte_tdec_gen_t * h,
|
||||
float * input,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_gen_decision(srslte_tdec_gen_t * h,
|
||||
uint8_t *output,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_gen_decision_byte(srslte_tdec_gen_t * h,
|
||||
uint8_t *output,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tdec_gen_run_all(srslte_tdec_gen_t * h,
|
||||
float * input,
|
||||
uint8_t *output,
|
||||
uint32_t nof_iterations,
|
||||
uint32_t long_cb);
|
||||
|
||||
#endif // SRSLTE_TURBODECODER_GEN_H
|
||||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
/**********************************************************************************************
|
||||
* File: turbodecoder.h
|
||||
*
|
||||
* Description: Turbo Decoder.
|
||||
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
|
||||
* encoders and one turbo code internal interleaver. The coding rate of turbo
|
||||
* encoder is 1/3.
|
||||
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
|
||||
*
|
||||
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
|
||||
*********************************************************************************************/
|
||||
|
||||
#ifndef SRSLTE_TURBODECODER_GEN_H
|
||||
#define SRSLTE_TURBODECODER_GEN_H
|
||||
|
||||
#include "srslte/config.h"
|
||||
#include "srslte/phy/fec/tc_interl.h"
|
||||
#include "srslte/phy/fec/cbsegm.h"
|
||||
|
||||
#define SRSLTE_TCOD_RATE 3
|
||||
#define SRSLTE_TCOD_TOTALTAIL 12
|
||||
|
||||
#define SRSLTE_TCOD_MAX_LEN_CB 6144
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
uint32_t max_long_cb;
|
||||
int16_t *beta;
|
||||
} tdec_gen_t;
|
||||
|
||||
int tdec_gen_init(void **h, uint32_t max_long_cb);
|
||||
void tdec_gen_free(void *h);
|
||||
void tdec_gen_dec(void *h, int16_t * input, int16_t *app, int16_t * parity, int16_t *output, uint32_t long_cb);
|
||||
void tdec_gen_extract_input(int16_t *input, int16_t *syst, int16_t *parity0, int16_t *parity1, int16_t *app2, uint32_t long_cb);
|
||||
void tdec_gen_decision_byte(int16_t *app1, uint8_t *output, uint32_t long_cb);
|
||||
|
||||
#endif // SRSLTE_TURBODECODER_GEN_H
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SRSLTE_TURBODECODER_IMPL_H
|
||||
#define SRSLTE_TURBODECODER_IMPL_H
|
||||
|
||||
#include "srslte/config.h"
|
||||
|
||||
/* Interface for internal decoder implementation */
|
||||
typedef enum SRSLTE_API {
|
||||
SRSLTE_TDEC_AUTO = 0,
|
||||
SRSLTE_TDEC_GENERIC,
|
||||
SRSLTE_TDEC_SSE,
|
||||
SRSLTE_TDEC_SSE_WINDOW,
|
||||
SRSLTE_TDEC_AVX_WINDOW,
|
||||
SRSLTE_TDEC_SSE8_WINDOW,
|
||||
SRSLTE_TDEC_AVX8_WINDOW,
|
||||
SRSLTE_TDEC_NOF_IMP
|
||||
} srslte_tdec_impl_type_t;
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef LLR_IS_8BIT
|
||||
#define llr_t int8_t
|
||||
#define type_name srslte_tdec_8bit_impl_t
|
||||
#else
|
||||
#ifdef LLR_IS_16BIT
|
||||
#define llr_t int16_t
|
||||
#define type_name srslte_tdec_16bit_impl_t
|
||||
#else
|
||||
#error "Unsupported LLR mode"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
int (*tdec_init)(void **h, uint32_t max_long_cb);
|
||||
void (*tdec_free)(void *h);
|
||||
void (*tdec_dec)(void *h, llr_t * input, llr_t *app, llr_t * parity, llr_t *output, uint32_t long_cb);
|
||||
void (*tdec_extract_input)(llr_t *input, llr_t *syst, llr_t *parity0, llr_t *parity1, llr_t *app2, uint32_t long_cb);
|
||||
void (*tdec_decision_byte)(llr_t *app1, uint8_t *output, uint32_t long_cb);
|
||||
} type_name;
|
||||
|
||||
#undef llr_t
|
||||
#undef type_name
|
|
@ -0,0 +1,158 @@
|
|||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "srslte/config.h"
|
||||
|
||||
#define MAKE_CALL(a) CONCAT2(a,type_name)
|
||||
#define MAKE_VEC(a) CONCAT2(a,vec_suffix)
|
||||
#define PRINT CONCAT2(srslte_vec_fprint,print_suffix)
|
||||
|
||||
#ifdef LLR_IS_8BIT
|
||||
#define llr_t int8_t
|
||||
#define type_name _8bit
|
||||
#define vec_suffix _bbb
|
||||
#define print_suffix _bs
|
||||
#define decptr h->dec8[h->current_dec]
|
||||
#define dechdlr h->dec8_hdlr[h->current_dec]
|
||||
#define input_is_interleaved 1
|
||||
#else
|
||||
#ifdef LLR_IS_16BIT
|
||||
#define llr_t int16_t
|
||||
#define vec_suffix _sss
|
||||
#define print_suffix _s
|
||||
#define decptr h->dec16[h->current_dec]
|
||||
#define dechdlr h->dec16_hdlr[h->current_dec]
|
||||
#define input_is_interleaved (h->current_dec > 0)
|
||||
#define type_name _16bit
|
||||
#else
|
||||
#warning "Unsupported LLR mode"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define debug_enabled_iter 0
|
||||
#define debug_len 20
|
||||
|
||||
#define debug_vec(a) if (debug_enabled_iter) {printf("%s it=%d: ", STRING(a), n_iter);PRINT(stdout, a, debug_len);}
|
||||
|
||||
|
||||
static void MAKE_CALL(extract_input_tail_sb)(llr_t *input, llr_t *syst, llr_t *app2, llr_t *parity0, llr_t *parity1, uint32_t long_cb)
|
||||
{
|
||||
for (int i = long_cb; i < long_cb + 3; i++) {
|
||||
syst[i] = input[3*(long_cb+32) + 2*(i - long_cb)];
|
||||
parity0[i] = input[3*(long_cb+32)+ 2*(i - long_cb) + 1];
|
||||
|
||||
app2[i] = input[3*(long_cb+32) + 6 + 2*(i - long_cb)];
|
||||
parity1[i] = input[3*(long_cb+32) + 6 + 2*(i - long_cb) + 1];
|
||||
}
|
||||
}
|
||||
|
||||
/* Runs 1 turbo decoder iteration */
|
||||
void MAKE_CALL(run_tdec_iteration)(srslte_tdec_t * h, llr_t * input)
|
||||
{
|
||||
|
||||
if (h->current_cbidx >= 0) {
|
||||
uint16_t *inter = h->interleaver[h->current_inter_idx][h->current_cbidx].forward;
|
||||
uint16_t *deinter = h->interleaver[h->current_inter_idx][h->current_cbidx].reverse;
|
||||
llr_t *syst = (llr_t*) h->syst0;
|
||||
llr_t *parity0 = (llr_t*) h->parity0;
|
||||
llr_t *parity1 = (llr_t*) h->parity1;
|
||||
|
||||
llr_t *app1 = (llr_t*) h->app1;
|
||||
llr_t *app2 = (llr_t*) h->app2;
|
||||
llr_t *ext1 = (llr_t*) h->ext1;
|
||||
llr_t *ext2 = (llr_t*) h->ext2;
|
||||
|
||||
uint32_t long_cb = h->current_long_cb;
|
||||
uint32_t n_iter = h->n_iter;
|
||||
|
||||
if (SRSLTE_TDEC_EXPECT_INPUT_SB && !h->force_not_sb && input_is_interleaved) {
|
||||
syst = input;
|
||||
// align to 32 bytes (warning: must be same alignment as in rm_turbo.c)
|
||||
parity0 = &input[long_cb+32];
|
||||
parity1 = &input[2*(long_cb+32)];
|
||||
if (n_iter == 0) {
|
||||
MAKE_CALL(extract_input_tail_sb)(input, syst, app2, parity0, parity1, long_cb);
|
||||
}
|
||||
} else {
|
||||
if (n_iter == 0) {
|
||||
decptr->tdec_extract_input(input, syst, app2, parity0, parity1, long_cb);
|
||||
}
|
||||
}
|
||||
|
||||
if ((n_iter%2) == 0) {
|
||||
|
||||
// Add apriori information to decoder 1
|
||||
if (n_iter) {
|
||||
MAKE_VEC(srslte_vec_sub)(app1, ext1, app1, long_cb);
|
||||
}
|
||||
|
||||
// Run MAP DEC #1
|
||||
decptr->tdec_dec(dechdlr, syst, n_iter ? app1 : NULL, parity0, ext1, long_cb);
|
||||
|
||||
}
|
||||
// Interleave extrinsic output of DEC1 to form apriori info for decoder 2
|
||||
if (n_iter%2) {
|
||||
// Convert aposteriori information into extrinsic information
|
||||
if (n_iter > 1) {
|
||||
MAKE_VEC(srslte_vec_sub)(ext1, app1, ext1, long_cb);
|
||||
}
|
||||
|
||||
MAKE_VEC(srslte_vec_lut)(ext1, deinter, app2, long_cb);
|
||||
|
||||
// Run MAP DEC #2. 2nd decoder uses apriori information as systematic bits
|
||||
decptr->tdec_dec(dechdlr, app2, NULL, parity1, ext2, long_cb);
|
||||
|
||||
// Deinterleaved extrinsic bits become apriori info for decoder 1
|
||||
MAKE_VEC(srslte_vec_lut)(ext2, inter, app1, long_cb);
|
||||
|
||||
}
|
||||
|
||||
if (h->n_iter == 0) {
|
||||
debug_vec(syst);
|
||||
debug_vec(parity0);
|
||||
debug_vec(parity1);
|
||||
}
|
||||
debug_vec(ext1);
|
||||
debug_vec(ext2);
|
||||
debug_vec(app1);
|
||||
debug_vec(app2);
|
||||
|
||||
h->n_iter++;
|
||||
} else {
|
||||
fprintf(stderr, "Error CB index not set (call srslte_tdec_new_cb() first\n");
|
||||
}
|
||||
}
|
||||
|
||||
#undef debug_enabled
|
||||
#undef debug_len
|
||||
#undef debug_vec
|
||||
#undef llr_t
|
||||
#undef vec_suffix
|
||||
#undef print_suffix
|
||||
#undef decptr
|
||||
#undef dechdlr
|
||||
#undef type_name
|
||||
#undef input_is_interleaved
|
|
@ -1,122 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
/**********************************************************************************************
|
||||
* File: turbodecoder.h
|
||||
*
|
||||
* Description: Turbo Decoder.
|
||||
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
|
||||
* encoders and one turbo code internal interleaver. The coding rate of turbo
|
||||
* encoder is 1/3.
|
||||
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
|
||||
*
|
||||
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
|
||||
*********************************************************************************************/
|
||||
|
||||
#ifndef SRSLTE_TURBODECODER_SIMD_H
|
||||
#define SRSLTE_TURBODECODER_SIMD_H
|
||||
|
||||
#include "srslte/config.h"
|
||||
#include "srslte/phy/fec/tc_interl.h"
|
||||
#include "srslte/phy/fec/cbsegm.h"
|
||||
|
||||
// Define maximum number of CB decoded in parallel (2 for AVX2)
|
||||
#define SRSLTE_TDEC_MAX_NPAR 2
|
||||
|
||||
#define SRSLTE_TCOD_RATE 3
|
||||
#define SRSLTE_TCOD_TOTALTAIL 12
|
||||
|
||||
#define SRSLTE_TCOD_MAX_LEN_CB 6144
|
||||
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
uint32_t max_long_cb;
|
||||
uint32_t max_par_cb;
|
||||
int16_t *alpha;
|
||||
int16_t *branch;
|
||||
} map_gen_t;
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
uint32_t max_long_cb;
|
||||
uint32_t max_par_cb;
|
||||
|
||||
map_gen_t dec;
|
||||
|
||||
int16_t *app1[SRSLTE_TDEC_MAX_NPAR];
|
||||
int16_t *app2[SRSLTE_TDEC_MAX_NPAR];
|
||||
int16_t *ext1[SRSLTE_TDEC_MAX_NPAR];
|
||||
int16_t *ext2[SRSLTE_TDEC_MAX_NPAR];
|
||||
int16_t *syst[SRSLTE_TDEC_MAX_NPAR];
|
||||
int16_t *parity0[SRSLTE_TDEC_MAX_NPAR];
|
||||
int16_t *parity1[SRSLTE_TDEC_MAX_NPAR];
|
||||
|
||||
int cb_mask;
|
||||
int current_cbidx;
|
||||
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
|
||||
int n_iter[SRSLTE_TDEC_MAX_NPAR];
|
||||
} srslte_tdec_simd_t;
|
||||
|
||||
SRSLTE_API int srslte_tdec_simd_init(srslte_tdec_simd_t * h,
|
||||
uint32_t max_par_cb,
|
||||
uint32_t max_long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_simd_free(srslte_tdec_simd_t * h);
|
||||
|
||||
SRSLTE_API int srslte_tdec_simd_reset(srslte_tdec_simd_t * h,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API
|
||||
|
||||
SRSLTE_API int srslte_tdec_simd_get_nof_iterations_cb(srslte_tdec_simd_t * h,
|
||||
uint32_t cb_idx);
|
||||
|
||||
SRSLTE_API int srslte_tdec_simd_reset_cb(srslte_tdec_simd_t * h,
|
||||
uint32_t cb_idx);
|
||||
|
||||
SRSLTE_API void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h,
|
||||
int16_t * input[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_simd_decision(srslte_tdec_simd_t * h,
|
||||
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h,
|
||||
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_simd_decision_byte_cb(srslte_tdec_simd_t * h,
|
||||
uint8_t *output,
|
||||
uint32_t cbidx,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tdec_simd_run_all(srslte_tdec_simd_t * h,
|
||||
int16_t * input[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t nof_iterations,
|
||||
uint32_t long_cb);
|
||||
|
||||
#endif // SRSLTE_TURBODECODER_SIMD_H
|
|
@ -1,119 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
/**********************************************************************************************
|
||||
* File: turbodecoder.h
|
||||
*
|
||||
* Description: Turbo Decoder.
|
||||
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
|
||||
* encoders and one turbo code internal interleaver. The coding rate of turbo
|
||||
* encoder is 1/3.
|
||||
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
|
||||
*
|
||||
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
|
||||
*********************************************************************************************/
|
||||
|
||||
#ifndef SRSLTE_TURBODECODER_SIMD_INTER_H
|
||||
#define SRSLTE_TURBODECODER_SIMD_INTER_H
|
||||
|
||||
|
||||
/** This is an simd inter-frame parallel turbo decoder. Parallizes 8 code-blocks using SSE
|
||||
* This implementation is currently not functional and not used by the rest of the code
|
||||
*/
|
||||
|
||||
#include "srslte/config.h"
|
||||
#include "srslte/phy/fec/tc_interl.h"
|
||||
#include "srslte/phy/fec/cbsegm.h"
|
||||
|
||||
#if LV_HAVE_AVX2
|
||||
#define SRSLTE_TDEC_MAX_NPAR 16
|
||||
#else
|
||||
#define SRSLTE_TDEC_MAX_NPAR 8
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
int max_long_cb;
|
||||
|
||||
int16_t *syst0;
|
||||
int16_t *parity0;
|
||||
int16_t *syst1;
|
||||
int16_t *parity1;
|
||||
int16_t *llr1;
|
||||
int16_t *llr2;
|
||||
int16_t *w;
|
||||
int16_t *alpha;
|
||||
|
||||
uint32_t max_par_cb;
|
||||
int current_cbidx;
|
||||
uint32_t current_long_cb;
|
||||
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
|
||||
int n_iter[SRSLTE_TDEC_MAX_NPAR];
|
||||
} srslte_tdec_simd_inter_t;
|
||||
|
||||
SRSLTE_API int srslte_tdec_simd_inter_init(srslte_tdec_simd_inter_t * h,
|
||||
uint32_t max_par_cb,
|
||||
uint32_t max_long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_simd_inter_free(srslte_tdec_simd_inter_t * h);
|
||||
|
||||
SRSLTE_API int srslte_tdec_simd_inter_reset(srslte_tdec_simd_inter_t * h,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tdec_simd_inter_get_nof_iterations_cb(srslte_tdec_simd_inter_t * h,
|
||||
uint32_t cb_idx);
|
||||
|
||||
SRSLTE_API int srslte_tdec_simd_inter_reset_cb(srslte_tdec_simd_inter_t * h,
|
||||
uint32_t cb_idx);
|
||||
|
||||
SRSLTE_API void srslte_tdec_simd_inter_iteration(srslte_tdec_simd_inter_t * h,
|
||||
int16_t * input[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t nof_cb,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_simd_inter_decision(srslte_tdec_simd_inter_t * h,
|
||||
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t nof_cb,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_simd_inter_decision_byte(srslte_tdec_simd_inter_t * h,
|
||||
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t nof_cb,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_simd_inter_decision_byte_cb(srslte_tdec_simd_inter_t * h,
|
||||
uint8_t *output,
|
||||
uint32_t cbidx,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tdec_simd_inter_run_all(srslte_tdec_simd_inter_t * h,
|
||||
int16_t *input[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t nof_iterations,
|
||||
uint32_t nof_cb,
|
||||
uint32_t long_cb);
|
||||
|
||||
#endif // SRSLTE_TURBODECODER_SIMD_INTER_H
|
|
@ -1,101 +1,45 @@
|
|||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
/**********************************************************************************************
|
||||
* File: turbodecoder.h
|
||||
*
|
||||
* Description: Turbo Decoder.
|
||||
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
|
||||
* encoders and one turbo code internal interleaver. The coding rate of turbo
|
||||
* encoder is 1/3.
|
||||
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
|
||||
*
|
||||
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
|
||||
*********************************************************************************************/
|
||||
|
||||
#ifndef SRSLTE_TURBODECODER_SSE_
|
||||
#define SRSLTE_TURBODECODER_SSE_
|
||||
|
||||
#include "srslte/config.h"
|
||||
#include "srslte/phy/fec/tc_interl.h"
|
||||
#include "srslte/phy/fec/cbsegm.h"
|
||||
|
||||
#define SRSLTE_TCOD_RATE 3
|
||||
#define SRSLTE_TCOD_TOTALTAIL 12
|
||||
|
||||
#define SRSLTE_TCOD_MAX_LEN_CB 6144
|
||||
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
int max_long_cb;
|
||||
int16_t *alpha;
|
||||
int16_t *branch;
|
||||
} map_gen_t;
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
int max_long_cb;
|
||||
|
||||
map_gen_t dec;
|
||||
|
||||
int16_t *app1;
|
||||
int16_t *app2;
|
||||
int16_t *ext1;
|
||||
int16_t *ext2;
|
||||
int16_t *syst;
|
||||
int16_t *parity0;
|
||||
int16_t *parity1;
|
||||
|
||||
int current_cbidx;
|
||||
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
|
||||
int n_iter;
|
||||
} srslte_tdec_sse_t;
|
||||
|
||||
SRSLTE_API int srslte_tdec_sse_init(srslte_tdec_sse_t * h,
|
||||
uint32_t max_long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_sse_free(srslte_tdec_sse_t * h);
|
||||
|
||||
SRSLTE_API int srslte_tdec_sse_reset(srslte_tdec_sse_t * h, uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_sse_iteration(srslte_tdec_sse_t * h,
|
||||
int16_t * input,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_sse_decision(srslte_tdec_sse_t * h,
|
||||
uint8_t *output,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_sse_decision_byte(srslte_tdec_sse_t * h,
|
||||
uint8_t *output,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tdec_sse_run_all(srslte_tdec_sse_t * h,
|
||||
int16_t * input,
|
||||
uint8_t *output,
|
||||
uint32_t nof_iterations,
|
||||
uint32_t long_cb);
|
||||
|
||||
#endif // SRSLTE_TURBODECODER_SSE_
|
||||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SRSLTE_TURBODECODER_SSE_H
|
||||
#define SRSLTE_TURBODECODER_SSE_H
|
||||
|
||||
#include "srslte/config.h"
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
uint32_t max_long_cb;
|
||||
int16_t *alpha;
|
||||
int16_t *branch;
|
||||
} tdec_sse_t;
|
||||
|
||||
int tdec_sse_init(void **h, uint32_t max_long_cb);
|
||||
void tdec_sse_free(void *h);
|
||||
void tdec_sse_dec(void *h, int16_t * input, int16_t *app, int16_t * parity,
|
||||
int16_t *output, uint32_t long_cb);
|
||||
void tdec_sse_extract_input(int16_t *input, int16_t *syst, int16_t *parity0, int16_t *parity1, int16_t *app2, uint32_t long_cb);
|
||||
void tdec_sse_decision_byte(int16_t *app1, uint8_t *output, uint32_t long_cb);
|
||||
|
||||
#endif // SRSLTE_TURBODECODER_SSE_H
|
||||
|
|
|
@ -0,0 +1,750 @@
|
|||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "srslte/config.h"
|
||||
|
||||
#define MAKE_FUNC(a) CONCAT2(CONCAT2(tdec_win,WINIMP),CONCAT2(_,a))
|
||||
#define MAKE_TYPE CONCAT2(CONCAT2(tdec_win_,WINIMP),_t)
|
||||
|
||||
#ifdef WINIMP_IS_SSE16
|
||||
|
||||
#ifndef LV_HAVE_SSE
|
||||
#error "Selected SSE window decoder but instruction set not supported"
|
||||
#endif
|
||||
|
||||
#include <nmmintrin.h>
|
||||
|
||||
#define WINIMP sse16
|
||||
#define nof_blocks 8
|
||||
|
||||
#define llr_t int16_t
|
||||
|
||||
#define simd_type_t __m128i
|
||||
#define simd_load _mm_load_si128
|
||||
#define simd_store _mm_store_si128
|
||||
#define simd_add _mm_adds_epi16
|
||||
#define simd_sub _mm_subs_epi16
|
||||
#define simd_max _mm_max_epi16
|
||||
#define simd_set1 _mm_set1_epi16
|
||||
#define simd_insert _mm_insert_epi16
|
||||
#define simd_shuffle _mm_shuffle_epi8
|
||||
#define move_right _mm_set_epi8(15,14,15,14,13,12,11,10,9,8,7,6,5,4,3,2)
|
||||
#define move_left _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,1,0)
|
||||
#define simd_rb_shift _mm_srai_epi16
|
||||
|
||||
#define normalize_period 2
|
||||
#define win_overlap_len 40
|
||||
|
||||
#define divide_output 1
|
||||
|
||||
#define INF 10000
|
||||
|
||||
#else
|
||||
#ifdef WINIMP_IS_AVX16
|
||||
|
||||
#ifndef LV_HAVE_AVX
|
||||
#error "Selected AVX window decoder but instruction set not supported"
|
||||
#endif
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#define WINIMP avx16
|
||||
#define nof_blocks 16
|
||||
|
||||
#define llr_t int16_t
|
||||
|
||||
#define simd_type_t __m256i
|
||||
#define simd_load _mm256_load_si256
|
||||
#define simd_store _mm256_store_si256
|
||||
#define simd_add _mm256_adds_epi16
|
||||
#define simd_sub _mm256_subs_epi16
|
||||
#define simd_max _mm256_max_epi16
|
||||
#define simd_set1 _mm256_set1_epi16
|
||||
#define simd_insert _mm256_insert_epi16
|
||||
#define simd_shuffle _mm256_shuffle_epi8
|
||||
#define move_right _mm256_set_epi8(31,30,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2)
|
||||
#define move_left _mm256_set_epi8(29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,1,0)
|
||||
|
||||
#define normalize_period 2
|
||||
#define win_overlap_len 40
|
||||
|
||||
#define INF 10000
|
||||
#else
|
||||
|
||||
#ifdef WINIMP_IS_SSE8
|
||||
|
||||
#ifndef LV_HAVE_SSE
|
||||
#error "Selected SSE window decoder but instruction set not supported"
|
||||
#endif
|
||||
|
||||
#include <nmmintrin.h>
|
||||
|
||||
#define WINIMP sse8
|
||||
#define nof_blocks 16
|
||||
|
||||
#define llr_t int8_t
|
||||
|
||||
#define simd_type_t __m128i
|
||||
#define simd_load _mm_load_si128
|
||||
#define simd_store _mm_store_si128
|
||||
#define simd_add _mm_adds_epi8
|
||||
#define simd_sub _mm_subs_epi8
|
||||
#define simd_max _mm_max_epi8
|
||||
#define simd_set1 _mm_set1_epi8
|
||||
#define simd_insert _mm_insert_epi8
|
||||
#define simd_shuffle _mm_shuffle_epi8
|
||||
#define move_right _mm_set_epi8(15,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1)
|
||||
#define move_left _mm_set_epi8(14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,0)
|
||||
#define simd_rb_shift simd_rb_shift_128
|
||||
|
||||
#define normalize_max
|
||||
#define normalize_period 1
|
||||
#define win_overlap_len 40
|
||||
#define use_saturated_add
|
||||
#define divide_output 1
|
||||
|
||||
#define INF 0
|
||||
|
||||
inline static simd_type_t simd_rb_shift_128(simd_type_t v, const int l) {
|
||||
__m128i low = _mm_srai_epi16(_mm_slli_epi16(v,8), l+8);
|
||||
__m128i hi = _mm_srai_epi16(v,l);
|
||||
return _mm_blendv_epi8(hi, low, _mm_set1_epi32(0x00FF00FF));
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
|
||||
#ifdef WINIMP_IS_AVX8
|
||||
|
||||
#ifndef LV_HAVE_AVX
|
||||
#error "Selected AVX window decoder but instruction set not supported"
|
||||
#endif
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#define WINIMP avx8
|
||||
#define nof_blocks 32
|
||||
|
||||
#define llr_t int8_t
|
||||
|
||||
#define simd_type_t __m256i
|
||||
#define simd_load _mm256_load_si256
|
||||
#define simd_store _mm256_store_si256
|
||||
#define simd_add _mm256_adds_epi8
|
||||
#define simd_sub _mm256_subs_epi8
|
||||
#define simd_max _mm256_max_epi8
|
||||
#define simd_set1 _mm256_set1_epi8
|
||||
#define simd_insert _mm256_insert_epi8
|
||||
#define simd_shuffle _mm256_shuffle_epi8
|
||||
#define move_right _mm256_set_epi8(31,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1)
|
||||
#define move_left _mm256_set_epi8(30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,0)
|
||||
#define simd_rb_shift simd_rb_shift_256
|
||||
|
||||
#define INF 0
|
||||
|
||||
#define normalize_max
|
||||
#define normalize_period 1
|
||||
#define win_overlap_len 40
|
||||
#define use_saturated_add
|
||||
#define divide_output 1
|
||||
|
||||
inline static simd_type_t simd_rb_shift_256(simd_type_t v, const int l) {
|
||||
__m256i low = _mm256_srai_epi16(_mm256_slli_epi16(v,8), l+8);
|
||||
__m256i hi = _mm256_srai_epi16(v,l);
|
||||
return _mm256_blendv_epi8(hi, low, _mm256_set1_epi32(0x00FF00FF));
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
#error "Unknown WINIMP value"
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef struct SRSLTE_API {
|
||||
uint32_t max_long_cb;
|
||||
llr_t *beta;
|
||||
} MAKE_TYPE;
|
||||
|
||||
|
||||
#define long_sb (long_cb/nof_blocks)
|
||||
|
||||
|
||||
|
||||
#define debug_enabled_win 0
|
||||
|
||||
#if debug_enabled_win
|
||||
#define debug_state(d) printf("k=%5d, in=%5d, pa=%3d, out=%5d, alpha=[", d*long_sb+k+1, MAKE_FUNC(get_simd)(x,d), MAKE_FUNC(get_simd)(y,d), MAKE_FUNC(get_simd)(out,d)); \
|
||||
for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(old[j],d)); \
|
||||
printf("], beta=["); \
|
||||
for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(beta_save[j], d));printf("\n");
|
||||
|
||||
#define debug_state_pre(d) printf("pre-window k=%5d, in=%5d, pa=%3d, alpha=[", (d+1)*long_sb-loop_len+k+1, MAKE_FUNC(get_simd)(x,d), MAKE_FUNC(get_simd)(y,d)); \
|
||||
for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(old[j],d)); \
|
||||
printf("]\n");
|
||||
|
||||
#define debug_state_beta(d) printf("k=%5d, in=%5d, pa=%3d, beta=[", d*long_sb+k, MAKE_FUNC(get_simd)(x,d), MAKE_FUNC(get_simd)(y,d)); \
|
||||
for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(old[j],d));\
|
||||
printf("\n");
|
||||
|
||||
static llr_t MAKE_FUNC(get_simd)(simd_type_t x, uint32_t pos) {
|
||||
llr_t *s = (llr_t*) &x;
|
||||
return s[pos];
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
#define debug_state(a)
|
||||
#define debug_state_pre(a)
|
||||
#define debug_state_beta(a)
|
||||
#endif
|
||||
/*
|
||||
static void MAKE_FUNC(print_simd)(simd_type_t x) {
|
||||
llr_t *s = (llr_t*) &x;
|
||||
printf("[");
|
||||
for (int i=0;i<nof_blocks;i++) {
|
||||
printf("%4d, ", s[i]);
|
||||
}
|
||||
printf("]\n");
|
||||
}*/
|
||||
|
||||
inline static llr_t MAKE_FUNC(sadd)(llr_t x, llr_t y) {
|
||||
#ifndef use_saturated_add
|
||||
return x+y;
|
||||
#else
|
||||
int16_t z = (int16_t) x+y;
|
||||
return z>127?127:(int8_t) z;
|
||||
#endif
|
||||
}
|
||||
|
||||
inline static void MAKE_FUNC(normalize)(uint32_t k, simd_type_t old[8]) {
|
||||
if ((k % normalize_period) == 0 && k != 0) {
|
||||
#ifdef normalize_max
|
||||
simd_type_t m = simd_max(old[0],old[1]);
|
||||
for (int i=2;i<8;i++) {
|
||||
m = simd_max(m,old[i]);
|
||||
}
|
||||
for (int i=0;i<8;i++) {
|
||||
old[i] = simd_sub(old[i], m);
|
||||
}
|
||||
#else
|
||||
for (int i = 1; i < 8; i++) {
|
||||
old[i] = simd_sub(old[i], old[0]);
|
||||
}
|
||||
old[0] = simd_set1(0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void MAKE_FUNC(beta_trellis)(llr_t *input, llr_t *parity, uint32_t long_cb, llr_t old[8])
|
||||
{
|
||||
llr_t m_b[8], new[8];
|
||||
llr_t x, y, xy;
|
||||
|
||||
/* Calculate last state using Tail. No need to use SIMD here */
|
||||
old[0] = 0;
|
||||
for (int i = 1; i < 8; i++) {
|
||||
old[i] = -INF;
|
||||
}
|
||||
for (int k=long_cb+2;k >= long_cb; k--) {
|
||||
x = input[k];
|
||||
y = parity[k];
|
||||
|
||||
xy = MAKE_FUNC(sadd)(x, y);
|
||||
|
||||
m_b[0] = MAKE_FUNC(sadd)(old[4],xy);
|
||||
m_b[1] = old[4];
|
||||
m_b[2] = MAKE_FUNC(sadd)(old[5], y);
|
||||
m_b[3] = MAKE_FUNC(sadd)(old[5], x);
|
||||
m_b[4] = MAKE_FUNC(sadd)(old[6], x);
|
||||
m_b[5] = MAKE_FUNC(sadd)(old[6], y);
|
||||
m_b[6] = old[7];
|
||||
m_b[7] = MAKE_FUNC(sadd)(old[7], xy);
|
||||
|
||||
new[0] = old[0];
|
||||
new[1] = MAKE_FUNC(sadd)(old[0], xy);
|
||||
new[2] = MAKE_FUNC(sadd)(old[1], x);
|
||||
new[3] = MAKE_FUNC(sadd)(old[1], y);
|
||||
new[4] = MAKE_FUNC(sadd)(old[2], y);
|
||||
new[5] = MAKE_FUNC(sadd)(old[2], x);
|
||||
new[6] = MAKE_FUNC(sadd)(old[3], xy);
|
||||
new[7] = old[3];
|
||||
|
||||
#if debug_enabled_win
|
||||
printf("trellis: k=%d, in=%d, pa=%d, beta: ", k, x, y); for (int i=0;i<8;i++) {printf("%d,", old[i]);} printf("\n");
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (m_b[i] > new[i])
|
||||
new[i] = m_b[i];
|
||||
old[i] = new[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Computes beta values */
|
||||
static void MAKE_FUNC(beta)(MAKE_TYPE * s, llr_t *input, llr_t *app, llr_t *parity, uint32_t long_cb)
|
||||
{
|
||||
simd_type_t m_b[8], new[8], old[8];
|
||||
simd_type_t x, y, xy, ap;
|
||||
|
||||
simd_type_t *inputPtr;
|
||||
simd_type_t *appPtr;
|
||||
simd_type_t *parityPtr;
|
||||
simd_type_t *betaPtr = (simd_type_t*) s->beta;
|
||||
|
||||
uint32_t loop_len;
|
||||
for (int j=0;j<2;j++) {
|
||||
|
||||
// First run L states to find initial state for all sub-blocks after first
|
||||
if (j==0) {
|
||||
loop_len = win_overlap_len;
|
||||
} else {
|
||||
loop_len = long_sb;
|
||||
}
|
||||
|
||||
// When passing through all window pick estimated initial states (known state for sb=0)
|
||||
if (loop_len == long_sb) {
|
||||
|
||||
// shuffle across 128-bit boundary manually
|
||||
#ifdef WINIMP_IS_AVX16
|
||||
llr_t tmp[8];
|
||||
for (int i = 0; i < 8; i++) {
|
||||
tmp[i] = _mm256_extract_epi16(old[i], 8);
|
||||
}
|
||||
#endif
|
||||
#ifdef WINIMP_IS_AVX8
|
||||
llr_t tmp[8];
|
||||
for (int i = 0; i < 8; i++) {
|
||||
tmp[i] = _mm256_extract_epi8(old[i], 16);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
old[i] = simd_shuffle(old[i], move_right);
|
||||
}
|
||||
// last sub-block state is calculated from the trellis
|
||||
llr_t trellis_old[8];
|
||||
MAKE_FUNC(beta_trellis)(input, parity, long_cb, trellis_old);
|
||||
for (int i = 0; i < 8; i++) {
|
||||
old[i] = simd_insert(old[i], trellis_old[i], nof_blocks-1);
|
||||
}
|
||||
|
||||
#ifdef WINIMP_IS_AVX16
|
||||
for (int i = 0; i < 8; i++) {
|
||||
old[i] = _mm256_insert_epi16(old[i], tmp[i], 7);
|
||||
}
|
||||
#endif
|
||||
#ifdef WINIMP_IS_AVX8
|
||||
for (int i = 0; i < 8; i++) {
|
||||
old[i] = _mm256_insert_epi8(old[i], tmp[i], 15);
|
||||
}
|
||||
#endif
|
||||
|
||||
inputPtr = (simd_type_t*) &input[long_cb-nof_blocks];
|
||||
appPtr = (simd_type_t*) &app[long_cb-nof_blocks];
|
||||
parityPtr = (simd_type_t*) &parity[long_cb-nof_blocks];
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
simd_store(&betaPtr[8*long_sb + i], old[i]);
|
||||
}
|
||||
|
||||
} else {
|
||||
// when estimating states, just set all to unknown
|
||||
for (int i = 0; i < 8; i++) {
|
||||
old[i] = simd_set1(-INF);
|
||||
}
|
||||
inputPtr = (simd_type_t*) &input[nof_blocks*(loop_len-1)];
|
||||
appPtr = (simd_type_t*) &app[nof_blocks*(loop_len-1)];
|
||||
parityPtr = (simd_type_t*) &parity[nof_blocks*(loop_len-1)];
|
||||
}
|
||||
|
||||
for (int k = loop_len - 1; k >= 0; k--) {
|
||||
x = simd_load(inputPtr--);
|
||||
y = simd_load(parityPtr--);
|
||||
|
||||
if (app) {
|
||||
ap = simd_load(appPtr--);
|
||||
x = simd_add(ap, x);
|
||||
}
|
||||
|
||||
xy = simd_add(x, y);
|
||||
|
||||
m_b[0] = simd_add(old[4], xy);
|
||||
m_b[1] = old[4];
|
||||
m_b[2] = simd_add(old[5], y);
|
||||
m_b[3] = simd_add(old[5], x);
|
||||
m_b[4] = simd_add(old[6], x);
|
||||
m_b[5] = simd_add(old[6], y);
|
||||
m_b[6] = old[7];
|
||||
m_b[7] = simd_add(old[7], xy);
|
||||
|
||||
new[0] = old[0];
|
||||
new[1] = simd_add(old[0], xy);
|
||||
new[2] = simd_add(old[1], x);
|
||||
new[3] = simd_add(old[1], y);
|
||||
new[4] = simd_add(old[2], y);
|
||||
new[5] = simd_add(old[2], x);
|
||||
new[6] = simd_add(old[3], xy);
|
||||
new[7] = old[3];
|
||||
|
||||
// Calculate maximum metric
|
||||
for (int i = 0; i < 8; i++) {
|
||||
old[i] = simd_max(m_b[i], new[i]);
|
||||
}
|
||||
// Store metric only when doing the final pass
|
||||
if (loop_len == long_sb) {
|
||||
for (int i = 0; i < 8; i++) {
|
||||
simd_store(&betaPtr[8*k + i], old[i]);
|
||||
}
|
||||
}
|
||||
if (loop_len!=long_sb) {
|
||||
debug_state_beta(0);
|
||||
} else {
|
||||
debug_state_beta(0);
|
||||
}
|
||||
|
||||
// normalize
|
||||
MAKE_FUNC(normalize)(k, old);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Computes alpha metrics */
|
||||
static void MAKE_FUNC(alpha)(MAKE_TYPE * s, llr_t *input, llr_t *app, llr_t *parity, llr_t * output, uint32_t long_cb)
|
||||
{
|
||||
simd_type_t m_b[8], new[8], old[8], max1[8], max0[8];
|
||||
simd_type_t x, y, xy, ap;
|
||||
simd_type_t m1, m0;
|
||||
|
||||
simd_type_t *inputPtr;
|
||||
simd_type_t *appPtr;
|
||||
simd_type_t *parityPtr;
|
||||
simd_type_t *betaPtr = (simd_type_t*) s->beta;
|
||||
simd_type_t *outputPtr = (simd_type_t*) output;
|
||||
|
||||
#if debug_enabled_win
|
||||
simd_type_t beta_save[8];
|
||||
#endif
|
||||
|
||||
// Skip state 0
|
||||
betaPtr+=8;
|
||||
|
||||
uint32_t loop_len;
|
||||
|
||||
for (int j=0;j<2;j++) {
|
||||
|
||||
// First run L states to find initial state for all sub-blocks after first
|
||||
if (j==0) {
|
||||
loop_len = win_overlap_len;
|
||||
} else {
|
||||
loop_len = long_sb;
|
||||
}
|
||||
|
||||
// When passing through all window pick estimated initial states (known state for sb=0)
|
||||
if (loop_len == long_sb) {
|
||||
|
||||
#ifdef WINIMP_IS_AVX16
|
||||
llr_t tmp[8];
|
||||
for (int i=0;i<8;i++) {
|
||||
tmp[i] = _mm256_extract_epi16(old[i], 7);
|
||||
}
|
||||
#endif
|
||||
#ifdef WINIMP_IS_AVX8
|
||||
llr_t tmp[8];
|
||||
for (int i=0;i<8;i++) {
|
||||
tmp[i] = _mm256_extract_epi8(old[i], 15);
|
||||
}
|
||||
#endif
|
||||
for (int i = 0; i < 8; i++) {
|
||||
old[i] = simd_shuffle(old[i], move_left);
|
||||
}
|
||||
#ifdef WINIMP_IS_AVX16
|
||||
for (int i=0;i<8;i++) {
|
||||
old[i] = _mm256_insert_epi16(old[i], tmp[i], 8);
|
||||
}
|
||||
#endif
|
||||
#ifdef WINIMP_IS_AVX8
|
||||
for (int i=0;i<8;i++) {
|
||||
old[i] = _mm256_insert_epi8(old[i], tmp[i], 16);
|
||||
}
|
||||
#endif
|
||||
// 1st sub-block state is known
|
||||
old[0] = simd_insert(old[0], 0, 0);
|
||||
for (int i = 1; i < 8; i++) {
|
||||
old[i] = simd_insert(old[i], -INF, 0);
|
||||
}
|
||||
} else {
|
||||
// when estimating states, just set all to unknown
|
||||
for (int i = 0; i < 8; i++) {
|
||||
old[i] = simd_set1(-INF);
|
||||
}
|
||||
}
|
||||
|
||||
inputPtr = (simd_type_t*) &input[nof_blocks*(long_sb-loop_len)];
|
||||
appPtr = (simd_type_t*) &app[nof_blocks*(long_sb-loop_len)];
|
||||
parityPtr = (simd_type_t*) &parity[nof_blocks*(long_sb-loop_len)];
|
||||
|
||||
for (int k = 0; k < loop_len; k++) {
|
||||
x = simd_load(inputPtr++);
|
||||
y = simd_load(parityPtr++);
|
||||
|
||||
if (app) {
|
||||
ap = simd_load(appPtr++);
|
||||
x = simd_add(ap, x);
|
||||
}
|
||||
|
||||
xy = simd_add(x,y);
|
||||
|
||||
m_b[0] = old[0];
|
||||
m_b[1] = simd_add(old[3], y);
|
||||
m_b[2] = simd_add(old[4], y);
|
||||
m_b[3] = old[7];
|
||||
m_b[4] = old[1];
|
||||
m_b[5] = simd_add(old[2], y);
|
||||
m_b[6] = simd_add(old[5], y);
|
||||
m_b[7] = old[6];
|
||||
|
||||
new[0] = simd_add(old[1], xy);
|
||||
new[1] = simd_add(old[2], x);
|
||||
new[2] = simd_add(old[5], x);
|
||||
new[3] = simd_add(old[6], xy);
|
||||
new[4] = simd_add(old[0], xy);
|
||||
new[5] = simd_add(old[3], x);
|
||||
new[6] = simd_add(old[4], x);
|
||||
new[7] = simd_add(old[7], xy);
|
||||
|
||||
// Load beta and compute output only when passing through all window
|
||||
if (loop_len == long_sb) {
|
||||
simd_type_t beta;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
beta = simd_load(betaPtr++);
|
||||
max0[i] = simd_add(beta, m_b[i]);
|
||||
max1[i] = simd_add(beta, new[i]);
|
||||
|
||||
#if debug_enabled_win
|
||||
beta_save[i] = beta;
|
||||
#endif
|
||||
}
|
||||
|
||||
m1 = simd_max(max1[0], max1[1]);
|
||||
m0 = simd_max(max0[0], max0[1]);
|
||||
|
||||
for (int i = 2; i < 8; i++) {
|
||||
m1 = simd_max(m1, max1[i]);
|
||||
m0 = simd_max(m0, max0[i]);
|
||||
}
|
||||
|
||||
simd_type_t out = simd_sub(m1, m0);
|
||||
|
||||
// Divide output when using 8-bit arithmetic
|
||||
#ifdef divide_output
|
||||
out = simd_rb_shift(out, divide_output);
|
||||
#endif
|
||||
|
||||
simd_store(outputPtr++, out);
|
||||
|
||||
debug_state(0);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
old[i] = simd_max(m_b[i], new[i]);
|
||||
}
|
||||
|
||||
// normalize
|
||||
MAKE_FUNC(normalize)(k, old);
|
||||
|
||||
if (loop_len != long_sb) {
|
||||
debug_state_pre(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int MAKE_FUNC(init)(void **hh, uint32_t max_long_cb)
|
||||
{
|
||||
*hh = calloc(1, sizeof(MAKE_TYPE));
|
||||
|
||||
MAKE_TYPE *h = (MAKE_TYPE*) *hh;
|
||||
|
||||
h->beta = srslte_vec_malloc(sizeof(llr_t) * 8 * max_long_cb * nof_blocks);
|
||||
if (!h->beta) {
|
||||
perror("srslte_vec_malloc");
|
||||
return -1;
|
||||
}
|
||||
h->max_long_cb = max_long_cb;
|
||||
return nof_blocks;
|
||||
}
|
||||
|
||||
void MAKE_FUNC(free)(void *hh)
|
||||
{
|
||||
MAKE_TYPE *h = (MAKE_TYPE*) hh;
|
||||
if (h->beta) {
|
||||
free(h->beta);
|
||||
}
|
||||
bzero(h, sizeof(MAKE_TYPE));
|
||||
}
|
||||
|
||||
void MAKE_FUNC(dec)(void *hh, llr_t *input, llr_t *app, llr_t *parity, llr_t *output, uint32_t long_cb)
|
||||
{
|
||||
MAKE_TYPE *h = (MAKE_TYPE*) hh;
|
||||
MAKE_FUNC(beta)(h, input, app, parity, long_cb);
|
||||
MAKE_FUNC(alpha)(h, input, app, parity, output, long_cb);
|
||||
#if debug_enabled_win
|
||||
printf("running win decoder: %s\n", STRING(WINIMP));
|
||||
#endif
|
||||
}
|
||||
|
||||
#define INSERT8_INPUT(reg, st, off) reg = simd_insert(reg, input[3*(i+(st+0)*long_sb)+off], st+0);\
|
||||
reg = simd_insert(reg, input[3*(i+(st+1)*long_sb)+off], st+1);\
|
||||
reg = simd_insert(reg, input[3*(i+(st+2)*long_sb)+off], st+2);\
|
||||
reg = simd_insert(reg, input[3*(i+(st+3)*long_sb)+off], st+3);\
|
||||
reg = simd_insert(reg, input[3*(i+(st+4)*long_sb)+off], st+4);\
|
||||
reg = simd_insert(reg, input[3*(i+(st+5)*long_sb)+off], st+5);\
|
||||
reg = simd_insert(reg, input[3*(i+(st+6)*long_sb)+off], st+6);\
|
||||
reg = simd_insert(reg, input[3*(i+(st+7)*long_sb)+off], st+7);
|
||||
|
||||
|
||||
void MAKE_FUNC(extract_input)(llr_t *input, llr_t *systematic, llr_t *app2, llr_t *parity_0, llr_t *parity_1, uint32_t long_cb)
|
||||
{
|
||||
simd_type_t *systPtr = (simd_type_t*) systematic;
|
||||
simd_type_t *parity0Ptr = (simd_type_t*) parity_0;
|
||||
simd_type_t *parity1Ptr = (simd_type_t*) parity_1;
|
||||
|
||||
simd_type_t syst, parity0, parity1;
|
||||
|
||||
for (int i=0;i<long_sb;i++) {
|
||||
INSERT8_INPUT(syst, 0, 0);
|
||||
INSERT8_INPUT(parity0, 0, 1);
|
||||
INSERT8_INPUT(parity1, 0, 2);
|
||||
|
||||
#if nof_blocks >= 16
|
||||
INSERT8_INPUT(syst, 8, 0);
|
||||
INSERT8_INPUT(parity0, 8, 1);
|
||||
INSERT8_INPUT(parity1, 8, 2);
|
||||
#endif
|
||||
|
||||
#if nof_blocks >= 32
|
||||
INSERT8_INPUT(syst, 16, 0);
|
||||
INSERT8_INPUT(parity0, 16, 1);
|
||||
INSERT8_INPUT(parity1, 16, 2);
|
||||
INSERT8_INPUT(syst, 24, 0);
|
||||
INSERT8_INPUT(parity0, 24, 1);
|
||||
INSERT8_INPUT(parity1, 24, 2);
|
||||
#endif
|
||||
|
||||
simd_store(systPtr++, syst);
|
||||
simd_store(parity0Ptr++, parity0);
|
||||
simd_store(parity1Ptr++, parity1);
|
||||
}
|
||||
|
||||
for (int i = long_cb; i < long_cb + 3; i++) {
|
||||
systematic[i] = input[3*long_cb + 2*(i - long_cb)];
|
||||
parity_0[i] = input[3*long_cb + 2*(i - long_cb) + 1];
|
||||
|
||||
app2[i] = input[3*long_cb + 6 + 2*(i - long_cb)];
|
||||
parity_1[i] = input[3*long_cb + 6 + 2*(i - long_cb) + 1];
|
||||
}
|
||||
}
|
||||
|
||||
#define deinter(x,win) ((x%(long_cb/win))*(win)+x/(long_cb/win))
|
||||
|
||||
#define reset_cnt(a,b) if(!((a+1)%b)) { \
|
||||
k+=b*nof_blocks; \
|
||||
if (k >= long_cb) { \
|
||||
k -= (long_cb-1);\
|
||||
}\
|
||||
}
|
||||
#define insert_bit(a,b) ap = _mm_insert_epi16(ap, app1[k+(a%b)*nof_blocks], 7-a); \
|
||||
reset_cnt(a,b); \
|
||||
|
||||
|
||||
#define decide_for(b) for (uint32_t i = 0; i < long_cb/8; i++) { \
|
||||
insert_bit(0,b);\
|
||||
insert_bit(1,b);\
|
||||
insert_bit(2,b);\
|
||||
insert_bit(3,b);\
|
||||
insert_bit(4,b);\
|
||||
insert_bit(5,b);\
|
||||
insert_bit(6,b);\
|
||||
insert_bit(7,b);\
|
||||
output[i] = (uint8_t) _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_packs_epi16(ap,zeros),zeros));\
|
||||
}
|
||||
|
||||
/* No improvement to use AVX here */
|
||||
void MAKE_FUNC(decision_byte)(llr_t *app1, uint8_t *output, uint32_t long_cb)
|
||||
{
|
||||
uint32_t k=0;
|
||||
__m128i zeros = _mm_setzero_si128();
|
||||
__m128i ap;
|
||||
|
||||
if ((long_cb%(nof_blocks*8)) == 0) {
|
||||
decide_for(8);
|
||||
} else if ((long_cb%(nof_blocks*4)) == 0) {
|
||||
decide_for(4);
|
||||
} else if ((long_cb%(nof_blocks*2)) == 0) {
|
||||
decide_for(2);
|
||||
} else {
|
||||
decide_for(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#undef WINIMP
|
||||
#undef nof_blocks
|
||||
#undef llr_t
|
||||
#undef normalize_period
|
||||
#undef INF
|
||||
#undef win_overlap_len
|
||||
#undef simd_type_t
|
||||
#undef simd_load
|
||||
#undef simd_store
|
||||
#undef simd_add
|
||||
#undef simd_sub
|
||||
#undef simd_max
|
||||
#undef simd_set1
|
||||
#undef simd_insert
|
||||
#undef simd_shuffle
|
||||
#undef move_right
|
||||
#undef move_left
|
||||
#undef debug_enabled_win
|
||||
|
||||
#ifdef normalize_max
|
||||
#undef normalize_max
|
||||
#endif
|
||||
|
||||
#ifdef use_saturated_add
|
||||
#undef use_saturated_add
|
||||
#endif
|
||||
|
||||
#ifdef simd_rb_shift
|
||||
#undef simd_rb_shift
|
||||
#endif
|
||||
|
||||
#ifdef divide_output
|
||||
#undef divide_output
|
||||
#endif
|
|
@ -53,4 +53,9 @@ SRSLTE_API int srslte_demod_soft_demodulate_s(srslte_mod_t modulation,
|
|||
short* llr,
|
||||
int nsymbols);
|
||||
|
||||
SRSLTE_API int srslte_demod_soft_demodulate_b(srslte_mod_t modulation,
|
||||
const cf_t* symbols,
|
||||
int8_t* llr,
|
||||
int nsymbols);
|
||||
|
||||
#endif // SRSLTE_DEMOD_SOFT_H
|
||||
|
|
|
@ -65,6 +65,8 @@ typedef struct SRSLTE_API {
|
|||
uint16_t ue_rnti;
|
||||
bool is_ue;
|
||||
|
||||
bool llr_is_8bit;
|
||||
|
||||
/* Power allocation parameter 3GPP 36.213 Clause 5.2 Rho_b */
|
||||
float rho_a;
|
||||
|
||||
|
|
|
@ -74,6 +74,8 @@ typedef struct SRSLTE_API {
|
|||
uint16_t ue_rnti;
|
||||
uint32_t max_re;
|
||||
|
||||
bool llr_is_8bit;
|
||||
|
||||
srslte_dft_precoding_t dft_precoding;
|
||||
|
||||
/* buffers */
|
||||
|
|
|
@ -59,6 +59,8 @@ typedef struct SRSLTE_API {
|
|||
uint32_t max_iterations;
|
||||
uint32_t nof_iterations;
|
||||
|
||||
bool llr_is_8bit;
|
||||
|
||||
/* buffers */
|
||||
uint8_t *cb_in;
|
||||
uint8_t *parity_bits;
|
||||
|
|
|
@ -68,6 +68,11 @@ SRSLTE_API void srslte_scrambling_s_offset(srslte_sequence_t *s,
|
|||
int offset,
|
||||
int len);
|
||||
|
||||
SRSLTE_API void srslte_scrambling_sb_offset(srslte_sequence_t *s,
|
||||
int8_t *data,
|
||||
int offset,
|
||||
int len);
|
||||
|
||||
SRSLTE_API void srslte_scrambling_c(srslte_sequence_t *s,
|
||||
cf_t *data);
|
||||
|
||||
|
|
|
@ -100,6 +100,7 @@
|
|||
|
||||
#define SRSLTE_SIMD_I_SIZE 16
|
||||
|
||||
#define SRSLTE_SIMD_B_SIZE 64
|
||||
#define SRSLTE_SIMD_S_SIZE 32
|
||||
#define SRSLTE_SIMD_C16_SIZE 0
|
||||
|
||||
|
@ -111,6 +112,7 @@
|
|||
|
||||
#define SRSLTE_SIMD_I_SIZE 8
|
||||
|
||||
#define SRSLTE_SIMD_B_SIZE 32
|
||||
#define SRSLTE_SIMD_S_SIZE 16
|
||||
#define SRSLTE_SIMD_C16_SIZE 16
|
||||
|
||||
|
@ -122,6 +124,7 @@
|
|||
|
||||
#define SRSLTE_SIMD_I_SIZE 4
|
||||
|
||||
#define SRSLTE_SIMD_B_SIZE 16
|
||||
#define SRSLTE_SIMD_S_SIZE 8
|
||||
#define SRSLTE_SIMD_C16_SIZE 8
|
||||
|
||||
|
@ -132,7 +135,7 @@
|
|||
#define SRSLTE_SIMD_CF_SIZE 4
|
||||
|
||||
#define SRSLTE_SIMD_I_SIZE 4
|
||||
|
||||
#define SRSLTE_SIMD_B_SIZE 16
|
||||
#define SRSLTE_SIMD_S_SIZE 8
|
||||
#define SRSLTE_SIMD_C16_SIZE 8
|
||||
|
||||
|
@ -141,7 +144,7 @@
|
|||
#define SRSLTE_SIMD_CF_SIZE 0
|
||||
|
||||
#define SRSLTE_SIMD_I_SIZE 0
|
||||
|
||||
#define SRSLTE_SIMD_B_SIZE 0
|
||||
#define SRSLTE_SIMD_S_SIZE 0
|
||||
#define SRSLTE_SIMD_C16_SIZE 0
|
||||
|
||||
|
@ -1336,6 +1339,24 @@ static inline simd_s_t srslte_simd_s_mul(simd_s_t a, simd_s_t b) {
|
|||
#endif /* LV_HAVE_AVX512 */
|
||||
}
|
||||
|
||||
static inline simd_s_t srslte_simd_s_neg(simd_s_t a, simd_s_t b) {
|
||||
#ifdef LV_HAVE_AVX512
|
||||
#error sign instruction not available in avx512
|
||||
#else /* LV_HAVE_AVX512 */
|
||||
#ifdef LV_HAVE_AVX2
|
||||
return _mm256_sign_epi16(a, b);
|
||||
#else /* LV_HAVE_AVX2 */
|
||||
#ifdef LV_HAVE_SSE
|
||||
return _mm_sign_epi16(a, b);
|
||||
#else /* LV_HAVE_SSE */
|
||||
#ifdef HAVE_NEON
|
||||
#error sign instruction not available in Neon
|
||||
#endif /* HAVE_NEON */
|
||||
#endif /* LV_HAVE_SSE */
|
||||
#endif /* LV_HAVE_AVX2 */
|
||||
#endif /* LV_HAVE_AVX512 */
|
||||
}
|
||||
|
||||
static inline simd_s_t srslte_simd_s_add(simd_s_t a, simd_s_t b) {
|
||||
#ifdef LV_HAVE_AVX512
|
||||
return _mm512_add_epi16(a, b);
|
||||
|
@ -1681,7 +1702,7 @@ typedef int8x16_t simd_b_t;
|
|||
|
||||
|
||||
|
||||
static inline simd_b_t srslte_simd_b_load(int8_t *ptr){
|
||||
static inline simd_b_t srslte_simd_b_load(const int8_t *ptr){
|
||||
#ifdef LV_HAVE_AVX512
|
||||
return _mm512_load_si512(ptr);
|
||||
#else /* LV_HAVE_AVX512 */
|
||||
|
@ -1699,7 +1720,7 @@ static inline simd_b_t srslte_simd_b_load(int8_t *ptr){
|
|||
#endif /* LV_HAVE_AVX512 */
|
||||
}
|
||||
|
||||
static inline simd_b_t srslte_simd_b_loadu(int8_t *ptr){
|
||||
static inline simd_b_t srslte_simd_b_loadu(const int8_t *ptr){
|
||||
#ifdef LV_HAVE_AVX512
|
||||
return _mm512_loadu_si512(ptr);
|
||||
#else /* LV_HAVE_AVX512 */
|
||||
|
@ -1773,6 +1794,44 @@ static inline simd_b_t srslte_simd_b_xor(simd_b_t a, simd_b_t b) {
|
|||
#endif /* LV_HAVE_AVX512 */
|
||||
}
|
||||
|
||||
static inline simd_s_t srslte_simd_b_sub(simd_s_t a, simd_s_t b) {
|
||||
#ifdef LV_HAVE_AVX512
|
||||
return _mm512_subs_epi8(a, b);
|
||||
#else /* LV_HAVE_AVX512 */
|
||||
#ifdef LV_HAVE_AVX2
|
||||
return _mm256_subs_epi8(a, b);
|
||||
#else /* LV_HAVE_AVX2 */
|
||||
#ifdef LV_HAVE_SSE
|
||||
return _mm_subs_epi8(a, b);
|
||||
#else /* LV_HAVE_SSE */
|
||||
#ifdef HAVE_NEON
|
||||
return vsubqs_s8(a, b);
|
||||
#endif /* HAVE_NEON */
|
||||
#endif /* LV_HAVE_SSE */
|
||||
#endif /* LV_HAVE_AVX2 */
|
||||
#endif /* LV_HAVE_AVX512 */
|
||||
}
|
||||
|
||||
static inline simd_s_t srslte_simd_b_neg(simd_b_t a, simd_b_t b) {
|
||||
#ifdef LV_HAVE_AVX512
|
||||
#error sign instruction not available in avx512
|
||||
#else /* LV_HAVE_AVX512 */
|
||||
#ifdef LV_HAVE_AVX2
|
||||
return _mm256_sign_epi8(a, b);
|
||||
#else /* LV_HAVE_AVX2 */
|
||||
#ifdef LV_HAVE_SSE
|
||||
return _mm_sign_epi8(a, b);
|
||||
#else /* LV_HAVE_SSE */
|
||||
#ifdef HAVE_NEON
|
||||
#error sign instruction not available in Neon
|
||||
#endif /* HAVE_NEON */
|
||||
#endif /* LV_HAVE_SSE */
|
||||
#endif /* LV_HAVE_AVX2 */
|
||||
#endif /* LV_HAVE_AVX512 */
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif /*SRSLTE_SIMD_B_SIZE */
|
||||
|
||||
|
||||
|
|
|
@ -69,6 +69,7 @@ SRSLTE_API void *srslte_vec_realloc(void *ptr, uint32_t old_size, uint32_t new_s
|
|||
SRSLTE_API void srslte_vec_fprint_c(FILE *stream, cf_t *x, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_fprint_f(FILE *stream, float *x, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_fprint_b(FILE *stream, uint8_t *x, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_fprint_bs(FILE *stream, int8_t *x, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_fprint_byte(FILE *stream, uint8_t *x, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_fprint_i(FILE *stream, int *x, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_fprint_s(FILE *stream, short *x, const uint32_t len);
|
||||
|
@ -82,12 +83,13 @@ SRSLTE_API void srslte_vec_load_file(char *filename, void *buffer, const uint32_
|
|||
/* sum two vectors */
|
||||
SRSLTE_API void srslte_vec_sum_fff(const float *x, const float *y, float *z, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_sum_ccc(const cf_t *x, const cf_t *y, cf_t *z, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_sub_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_sum_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len);
|
||||
|
||||
/* substract two vectors z=x-y */
|
||||
SRSLTE_API void srslte_vec_sub_fff(const float *x, const float *y, float *z, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_sub_ccc(const cf_t *x, const cf_t *y, cf_t *z, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_sub_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_sub_bbb(const int8_t *x, const int8_t *y, int8_t *z, const uint32_t len);
|
||||
|
||||
/* scalar product */
|
||||
SRSLTE_API void srslte_vec_sc_prod_cfc(const cf_t *x, const float h, cf_t *z, const uint32_t len);
|
||||
|
@ -97,8 +99,10 @@ SRSLTE_API void srslte_vec_sc_prod_fff(const float *x, const float h, float *z,
|
|||
|
||||
SRSLTE_API void srslte_vec_convert_fi(const float *x, const float scale, int16_t *z, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_convert_if(const int16_t *x, const float scale, float *z, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_convert_fb(const float *x, const float scale, int8_t *z, const uint32_t len);
|
||||
|
||||
SRSLTE_API void srslte_vec_lut_sss(const short *x, const unsigned short *lut, short *y, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_lut_bbb(const int8_t *x, const unsigned short *lut, int8_t *y, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_lut_sis(const short *x, const unsigned int *lut, short *y, const uint32_t len);
|
||||
|
||||
/* vector product (element-wise) */
|
||||
|
@ -115,6 +119,10 @@ SRSLTE_API void srslte_vec_prod_conj_ccc(const cf_t *x, const cf_t *y, cf_t *z,
|
|||
SRSLTE_API void srslte_vec_prod_fff(const float *x, const float *y, float *z, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_prod_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len);
|
||||
|
||||
// Negate sign (scrambling)
|
||||
SRSLTE_API void srslte_vec_neg_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len);
|
||||
SRSLTE_API void srslte_vec_neg_bbb(const int8_t *x, const int8_t *y, int8_t *z, const uint32_t len);
|
||||
|
||||
/* Dot-product */
|
||||
SRSLTE_API cf_t srslte_vec_dot_prod_cfc(const cf_t *x, const float *y, const uint32_t len);
|
||||
SRSLTE_API cf_t srslte_vec_dot_prod_ccc(const cf_t *x, const cf_t *y, const uint32_t len);
|
||||
|
|
|
@ -62,6 +62,8 @@ SRSLTE_API void srslte_vec_sum_sss_simd(const int16_t *x, const int16_t *y, int1
|
|||
|
||||
SRSLTE_API void srslte_vec_sub_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, int len);
|
||||
|
||||
SRSLTE_API void srslte_vec_sub_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, int len);
|
||||
|
||||
SRSLTE_API float srslte_vec_acc_ff_simd(const float *x, int len);
|
||||
|
||||
SRSLTE_API cf_t srslte_vec_acc_cc_simd(const cf_t *x, int len);
|
||||
|
@ -86,6 +88,10 @@ SRSLTE_API void srslte_vec_prod_ccc_c16_simd(const int16_t *a_re, const int16_t
|
|||
|
||||
SRSLTE_API void srslte_vec_prod_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len);
|
||||
|
||||
SRSLTE_API void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len);
|
||||
|
||||
SRSLTE_API void srslte_vec_neg_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const int len);
|
||||
|
||||
SRSLTE_API void srslte_vec_prod_cfc_simd(const cf_t *x, const float *y, cf_t *z, const int len);
|
||||
|
||||
SRSLTE_API void srslte_vec_prod_fff_simd(const float *x, const float *y, float *z, const int len);
|
||||
|
@ -120,10 +126,14 @@ SRSLTE_API void srslte_vec_abs_square_cf_simd(const cf_t *x, float *z, const int
|
|||
/* Other Functions */
|
||||
SRSLTE_API void srslte_vec_lut_sss_simd(const short *x, const unsigned short *lut, short *y, const int len);
|
||||
|
||||
SRSLTE_API void srslte_vec_lut_bbb_simd(const int8_t *x, const unsigned short *lut, int8_t *y, const int len);
|
||||
|
||||
SRSLTE_API void srslte_vec_convert_if_simd(const int16_t *x, float *z, const float scale, const int len);
|
||||
|
||||
SRSLTE_API void srslte_vec_convert_fi_simd(const float *x, int16_t *z, const float scale, const int len);
|
||||
|
||||
SRSLTE_API void srslte_vec_convert_fb_simd(const float *x, int8_t *z, const float scale, const int len);
|
||||
|
||||
SRSLTE_API void srslte_vec_cp_simd(const cf_t *src, cf_t *dst, int len);
|
||||
|
||||
SRSLTE_API void srslte_vec_interleave_simd(const cf_t *x, const cf_t *y, cf_t *z, const int len);
|
||||
|
|
|
@ -53,8 +53,9 @@ bool threads_new_rt_cpu(pthread_t *thread, void *(*start_routine) (void*), void
|
|||
cpu_set_t cpuset;
|
||||
bool attr_enable = false;
|
||||
|
||||
#ifdef PER_THREAD_PRIO
|
||||
if (prio_offset >= 0) {
|
||||
param.sched_priority = sched_get_priority_max(SCHED_FIFO) - prio_offset;
|
||||
param.sched_priority = sched_get_priority_max(SCHED_FIFO) - prio_offset;
|
||||
pthread_attr_init(&attr);
|
||||
if (pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED)) {
|
||||
perror("pthread_attr_setinheritsched");
|
||||
|
@ -82,6 +83,25 @@ bool threads_new_rt_cpu(pthread_t *thread, void *(*start_routine) (void*), void
|
|||
}
|
||||
attr_enable = true;
|
||||
} else if (prio_offset == -2) {
|
||||
#else
|
||||
// All threads have normal priority except prio_offset=0,1,2,3,4
|
||||
if (prio_offset >= 0 && prio_offset < 5) {
|
||||
param.sched_priority = 50;
|
||||
pthread_attr_init(&attr);
|
||||
if (pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED)) {
|
||||
perror("pthread_attr_setinheritsched");
|
||||
}
|
||||
if (pthread_attr_setschedpolicy(&attr, SCHED_FIFO)) {
|
||||
perror("pthread_attr_setschedpolicy");
|
||||
}
|
||||
if (pthread_attr_setschedparam(&attr, ¶m)) {
|
||||
perror("pthread_attr_setschedparam");
|
||||
fprintf(stderr, "Error not enough privileges to set Scheduling priority\n");
|
||||
}
|
||||
attr_enable = true;
|
||||
|
||||
} else {
|
||||
#endif
|
||||
param.sched_priority = 0;
|
||||
pthread_attr_init(&attr);
|
||||
if (pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED)) {
|
||||
|
|
|
@ -139,6 +139,7 @@ int srslte_sequence_LTE_pr(srslte_sequence_t *q, uint32_t len, uint32_t seed) {
|
|||
for (int i=0;i<len;i++) {
|
||||
q->c_float[i] = (1-2*q->c[i]);
|
||||
q->c_short[i] = (int16_t) q->c_float[i];
|
||||
q->c_char[i] = (int8_t) q->c_float[i];;
|
||||
}
|
||||
return SRSLTE_SUCCESS;
|
||||
}
|
||||
|
@ -164,6 +165,10 @@ int srslte_sequence_init(srslte_sequence_t *q, uint32_t len) {
|
|||
if (!q->c_short) {
|
||||
return SRSLTE_ERROR;
|
||||
}
|
||||
q->c_char = srslte_vec_malloc(len * sizeof(int8_t));
|
||||
if (!q->c_char) {
|
||||
return SRSLTE_ERROR;
|
||||
}
|
||||
q->max_len = len;
|
||||
}
|
||||
return SRSLTE_SUCCESS;
|
||||
|
@ -182,6 +187,9 @@ void srslte_sequence_free(srslte_sequence_t *q) {
|
|||
if (q->c_short) {
|
||||
free(q->c_short);
|
||||
}
|
||||
if (q->c_char) {
|
||||
free(q->c_char);
|
||||
}
|
||||
bzero(q, sizeof(srslte_sequence_t));
|
||||
}
|
||||
|
||||
|
|
|
@ -43,13 +43,15 @@
|
|||
#endif
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <smmintrin.h>
|
||||
int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
|
||||
#include <x86intrin.h>
|
||||
int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
|
||||
int srslte_rm_turbo_rx_lut_sse_8bit(int8_t *input, int8_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
|
||||
#endif
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
|
||||
#include <x86intrin.h>
|
||||
int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
|
||||
int srslte_rm_turbo_rx_lut_avx_8bit(int8_t *input, int8_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
|
||||
#endif
|
||||
|
||||
#define NCOLS 32
|
||||
|
@ -66,12 +68,31 @@ static uint16_t interleaver_parity_bits[192][2*6160];
|
|||
static srslte_bit_interleaver_t bit_interleavers_parity_bits[192];
|
||||
static uint16_t deinterleaver[192][4][18448];
|
||||
static int k0_vec[SRSLTE_NOF_TC_CB_SIZES][4][2];
|
||||
static bool rm_turbo_tables_generated = false;
|
||||
static bool rm_turbo_tables_generated = false;
|
||||
|
||||
|
||||
// Store deinterleaver version for sub-block turbo decoder
|
||||
#if SRSLTE_TDEC_EXPECT_INPUT_SB == 1
|
||||
// Prepare bit for sub-block decoder processing. These are the nof subblock sizes
|
||||
#define NOF_DEINTER_TABLE_SB_IDX 3
|
||||
const static int deinter_table_sb_idx[NOF_DEINTER_TABLE_SB_IDX] = {8, 16, 32};
|
||||
int deinter_table_idx_from_sb_len(uint32_t nof_subblocks) {
|
||||
for (int i=0;i<NOF_DEINTER_TABLE_SB_IDX;i++) {
|
||||
if (deinter_table_sb_idx[i] == nof_subblocks) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
if (nof_subblocks != 0) {
|
||||
fprintf(stderr, "Error number of sub-blocks %d not supported in RM\n", nof_subblocks);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
static uint16_t deinterleaver_sb[NOF_DEINTER_TABLE_SB_IDX][192][4][18448];
|
||||
#endif
|
||||
|
||||
static uint16_t temp_table1[3*6176], temp_table2[3*6176];
|
||||
|
||||
void srslte_rm_turbo_gentable_systematic(uint16_t *table_bits, int k0_vec[4][2], uint32_t nrows, int ndummy) {
|
||||
static void srslte_rm_turbo_gentable_systematic(uint16_t *table_bits, int k0_vec[4][2], uint32_t nrows, int ndummy) {
|
||||
|
||||
bool last_is_null=true;
|
||||
int k_b=0, buff_idx=0;
|
||||
|
@ -96,7 +117,7 @@ void srslte_rm_turbo_gentable_systematic(uint16_t *table_bits, int k0_vec[4][2],
|
|||
}
|
||||
}
|
||||
|
||||
void srslte_rm_turbo_gentable_parity(uint16_t *table_parity, int k0_vec[4][2], int offset, uint16_t nrows, int ndummy) {
|
||||
static void srslte_rm_turbo_gentable_parity(uint16_t *table_parity, int k0_vec[4][2], int offset, uint16_t nrows, int ndummy) {
|
||||
|
||||
bool last_is_null=true;
|
||||
int k_b=0, buff_idx0=0;
|
||||
|
@ -140,9 +161,7 @@ void srslte_rm_turbo_gentable_parity(uint16_t *table_parity, int k0_vec[4][2], i
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void srslte_rm_turbo_gentable_receive(uint16_t *table, uint32_t cb_len, uint32_t rv_idx)
|
||||
static void srslte_rm_turbo_gentable_receive(uint16_t *table, uint32_t cb_len, uint32_t rv_idx)
|
||||
{
|
||||
|
||||
int nrows = (uint32_t) (cb_len / 3 - 1) / NCOLS + 1;
|
||||
|
@ -217,14 +236,41 @@ void srslte_rm_turbo_gentable_receive(uint16_t *table, uint32_t cb_len, uint32_t
|
|||
table[i] = temp_table2[temp_table1[i]];
|
||||
}
|
||||
}
|
||||
#if SRSLTE_TDEC_EXPECT_INPUT_SB==1
|
||||
#define inter(x,win) ((x%(long_cb/win))*(win)+x/(long_cb/win))
|
||||
|
||||
/* Prepare output for sliding window decoder:
|
||||
* (0..long_cb-1) bits are systematic
|
||||
* (long_cb..2*long_cb-1) are parity0
|
||||
* (2*long_cb..3*long_cb-1) are parity1
|
||||
* then tail bits
|
||||
*
|
||||
* Within each block, bits are interleaved every nof_sb
|
||||
*/
|
||||
static void interleave_table_sb(uint16_t *in, uint16_t *out, uint32_t cb_idx, uint32_t nof_sb)
|
||||
{
|
||||
int long_cb = srslte_cbsegm_cbsize(cb_idx);
|
||||
int out_len = 3*long_cb+12;
|
||||
for (int i=0;i<out_len;i++) {
|
||||
// Do not change tail bit order
|
||||
if (in[i] < 3*long_cb) {
|
||||
|
||||
// align to 32 bytes (warning: must be same alignment as in rm_turbo.c)
|
||||
out[i] = (in[i]%3)*(long_cb+32)+inter(in[i]/3,nof_sb);
|
||||
} else {
|
||||
out[i] = (in[i]-3*long_cb)+3*(long_cb+32);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void srslte_rm_turbo_gentables() {
|
||||
if (!rm_turbo_tables_generated) {
|
||||
rm_turbo_tables_generated = true;
|
||||
for (int cb_idx=0;cb_idx<SRSLTE_NOF_TC_CB_SIZES;cb_idx++) {
|
||||
int cb_len=srslte_cbsegm_cbsize(cb_idx);
|
||||
int in_len=3*cb_len+12;
|
||||
|
||||
int cb_len = srslte_cbsegm_cbsize(cb_idx);
|
||||
int in_len = 3 * cb_len + 12;
|
||||
|
||||
int nrows = (in_len / 3 - 1) / NCOLS + 1;
|
||||
int K_p = nrows * NCOLS;
|
||||
int ndummy = K_p - in_len / 3;
|
||||
|
@ -232,23 +278,30 @@ void srslte_rm_turbo_gentables() {
|
|||
ndummy = 0;
|
||||
}
|
||||
|
||||
for (int i=0;i<4;i++) {
|
||||
k0_vec[cb_idx][i][0] = nrows * (2 * (uint16_t) ceilf((float) (3*K_p) / (float) (8 * nrows)) * i + 2);
|
||||
k0_vec[cb_idx][i][1] = -1;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
k0_vec[cb_idx][i][0] = nrows * (2 * (uint16_t) ceilf((float) (3 * K_p) / (float) (8 * nrows)) * i + 2);
|
||||
k0_vec[cb_idx][i][1] = -1;
|
||||
}
|
||||
srslte_rm_turbo_gentable_systematic(interleaver_systematic_bits[cb_idx], k0_vec[cb_idx], nrows, ndummy);
|
||||
srslte_bit_interleaver_init(&bit_interleavers_systematic_bits[cb_idx], interleaver_systematic_bits[cb_idx],
|
||||
(uint32_t) srslte_cbsegm_cbsize(cb_idx)+4);
|
||||
(uint32_t) srslte_cbsegm_cbsize(cb_idx) + 4);
|
||||
|
||||
srslte_rm_turbo_gentable_parity(interleaver_parity_bits[cb_idx], k0_vec[cb_idx], in_len/3, nrows, ndummy);
|
||||
srslte_rm_turbo_gentable_parity(interleaver_parity_bits[cb_idx], k0_vec[cb_idx], in_len / 3, nrows, ndummy);
|
||||
srslte_bit_interleaver_init(&bit_interleavers_parity_bits[cb_idx], interleaver_parity_bits[cb_idx],
|
||||
(uint32_t) (srslte_cbsegm_cbsize(cb_idx)+4)*2);
|
||||
(uint32_t) (srslte_cbsegm_cbsize(cb_idx) + 4) * 2);
|
||||
|
||||
for (int i=0;i<4;i++) {
|
||||
srslte_rm_turbo_gentable_receive(deinterleaver[cb_idx][i], in_len, i);
|
||||
}
|
||||
for (int i = 0; i < 4; i++) {
|
||||
srslte_rm_turbo_gentable_receive(deinterleaver[cb_idx][i], in_len, i);
|
||||
|
||||
#if SRSLTE_TDEC_EXPECT_INPUT_SB == 1
|
||||
for (uint32_t s = 0; s < NOF_DEINTER_TABLE_SB_IDX; s++) {
|
||||
interleave_table_sb(deinterleaver[cb_idx][i], deinterleaver_sb[s][cb_idx][i], cb_idx,
|
||||
deinter_table_sb_idx[s]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void srslte_rm_turbo_free_tables () {
|
||||
|
@ -322,6 +375,10 @@ int srslte_rm_turbo_tx_lut(uint8_t *w_buff, uint8_t *systematic, uint8_t *parity
|
|||
}
|
||||
}
|
||||
|
||||
int srslte_rm_turbo_rx_lut(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
|
||||
{
|
||||
return srslte_rm_turbo_rx_lut_(input, output, in_len, cb_idx, rv_idx, true);
|
||||
}
|
||||
/**
|
||||
* Undoes rate matching for LTE Turbo Coder. Expands rate matched buffer to full size buffer.
|
||||
*
|
||||
|
@ -331,38 +388,94 @@ int srslte_rm_turbo_tx_lut(uint8_t *w_buff, uint8_t *systematic, uint8_t *parity
|
|||
* @param[in] rv_idx Redundancy Version from DCI control message
|
||||
* @return Error code
|
||||
*/
|
||||
int srslte_rm_turbo_rx_lut(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
|
||||
{
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
return srslte_rm_turbo_rx_lut_avx(input, output, in_len, cb_idx, rv_idx);
|
||||
#else
|
||||
#ifdef LV_HAVE_SSE
|
||||
return srslte_rm_turbo_rx_lut_sse(input, output, in_len, cb_idx, rv_idx);
|
||||
int srslte_rm_turbo_rx_lut_(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx, bool enable_input_tdec)
|
||||
{
|
||||
|
||||
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
|
||||
|
||||
#if SRSLTE_TDEC_EXPECT_INPUT_SB == 1
|
||||
int cb_len=srslte_cbsegm_cbsize(cb_idx);
|
||||
int idx = deinter_table_idx_from_sb_len(srslte_tdec_autoimp_get_subblocks(cb_len));
|
||||
uint16_t *deinter = NULL;
|
||||
if (idx < 0 || !enable_input_tdec) {
|
||||
deinter = deinterleaver[cb_idx][rv_idx];
|
||||
} else if (idx < NOF_DEINTER_TABLE_SB_IDX) {
|
||||
deinter = deinterleaver_sb[idx][cb_idx][rv_idx];
|
||||
} else {
|
||||
fprintf(stderr, "Sub-block size index %d not supported in srslte_rm_turbo_rx_lut()\n", idx);
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
uint16_t *deinter = deinterleaver[cb_idx][rv_idx];
|
||||
#endif
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
return srslte_rm_turbo_rx_lut_avx(input, output, deinter, in_len, cb_idx, rv_idx);
|
||||
#else
|
||||
#ifdef LV_HAVE_SSE
|
||||
return srslte_rm_turbo_rx_lut_sse(input, output, deinter, in_len, cb_idx, rv_idx);
|
||||
#else
|
||||
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
|
||||
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
|
||||
uint16_t *deinter = deinterleaver[cb_idx][rv_idx];
|
||||
for (int i=0;i<in_len;i++) {
|
||||
|
||||
for (int i=0;i<in_len;i++) {
|
||||
output[deinter[i%out_len]] += input[i];
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
printf("Invalid inputs rv_idx=%d, cb_idx=%d\n", rv_idx, cb_idx);
|
||||
return SRSLTE_ERROR_INVALID_INPUTS;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
} else {
|
||||
printf("Invalid inputs rv_idx=%d, cb_idx=%d\n", rv_idx, cb_idx);
|
||||
return SRSLTE_ERROR_INVALID_INPUTS;
|
||||
}
|
||||
}
|
||||
|
||||
int srslte_rm_turbo_rx_lut_8bit(int8_t *input, int8_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
|
||||
{
|
||||
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
|
||||
|
||||
#if SRSLTE_TDEC_EXPECT_INPUT_SB == 1
|
||||
int cb_len=srslte_cbsegm_cbsize(cb_idx);
|
||||
int idx = deinter_table_idx_from_sb_len(srslte_tdec_autoimp_get_subblocks_8bit(cb_len));
|
||||
uint16_t *deinter = NULL;
|
||||
if (idx < 0) {
|
||||
deinter = deinterleaver[cb_idx][rv_idx];
|
||||
} else if (idx < NOF_DEINTER_TABLE_SB_IDX) {
|
||||
deinter = deinterleaver_sb[idx][cb_idx][rv_idx];
|
||||
} else {
|
||||
fprintf(stderr, "Sub-block size index %d not supported in srslte_rm_turbo_rx_lut()\n", idx);
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
uint16_t *deinter = deinterleaver[cb_idx][rv_idx];
|
||||
#endif
|
||||
|
||||
// FIXME: AVX version of rm_turbo_rx_lut not working
|
||||
// Warning: Need to check if 8-bit sse version is correct
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
return srslte_rm_turbo_rx_lut_sse_8bit(input, output, deinter, in_len, cb_idx, rv_idx);
|
||||
#else
|
||||
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
|
||||
|
||||
for (int i=0;i<in_len;i++) {
|
||||
output[deinter[i%out_len]] += input[i];
|
||||
}
|
||||
return 0;
|
||||
#endif
|
||||
} else {
|
||||
printf("Invalid inputs rv_idx=%d, cb_idx=%d\n", rv_idx, cb_idx);
|
||||
return SRSLTE_ERROR_INVALID_INPUTS;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
|
||||
int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
|
||||
int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
|
||||
{
|
||||
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
|
||||
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
|
||||
uint16_t *deinter = deinterleaver[cb_idx][rv_idx];
|
||||
|
||||
|
||||
const __m128i* xPtr = (const __m128i*) input;
|
||||
const __m128i* lutPtr = (const __m128i*) deinter;
|
||||
__m128i xVal, lutVal;
|
||||
|
@ -427,6 +540,97 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len,
|
|||
}
|
||||
}
|
||||
|
||||
int srslte_rm_turbo_rx_lut_sse_8bit(int8_t *input, int8_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
|
||||
{
|
||||
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
|
||||
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
|
||||
|
||||
const __m128i* xPtr = (const __m128i*) input;
|
||||
const __m128i* lutPtr = (const __m128i*) deinter;
|
||||
__m128i xVal, lutVal1, lutVal2;
|
||||
|
||||
/* Simplify load if we do not need to wrap (ie high rates) */
|
||||
if (in_len <= out_len) {
|
||||
for (int i=0;i<in_len/16;i++) {
|
||||
xVal = _mm_loadu_si128(xPtr);
|
||||
xPtr ++;
|
||||
lutVal1 = _mm_loadu_si128(lutPtr);
|
||||
lutPtr++;
|
||||
lutVal2 = _mm_loadu_si128(lutPtr);
|
||||
lutPtr ++;
|
||||
|
||||
for (int j=0;j<8;j++) {
|
||||
int8_t x = (int8_t) _mm_extract_epi8(xVal, j);
|
||||
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal1, j);
|
||||
output[l] += x;
|
||||
}
|
||||
for (int j=0;j<8;j++) {
|
||||
int8_t x = (int8_t) _mm_extract_epi8(xVal, j+8);
|
||||
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal2, j);
|
||||
output[l] += x;
|
||||
}
|
||||
}
|
||||
for (int i=16*(in_len/16);i<in_len;i++) {
|
||||
output[deinter[i%out_len]] += input[i];
|
||||
}
|
||||
} else {
|
||||
int intCnt = 16;
|
||||
int inputCnt = 0;
|
||||
int nwrapps = 0;
|
||||
while(inputCnt < in_len - 16) {
|
||||
xVal = _mm_loadu_si128(xPtr);
|
||||
xPtr ++;
|
||||
lutVal1 = _mm_loadu_si128(lutPtr);
|
||||
lutPtr++;
|
||||
lutVal2 = _mm_loadu_si128(lutPtr);
|
||||
lutPtr ++;
|
||||
|
||||
for (int j=0;j<8;j++) {
|
||||
int8_t x = (int8_t) _mm_extract_epi8(xVal, j);
|
||||
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal1, j);
|
||||
output[l] += x;
|
||||
}
|
||||
for (int j=0;j<8;j++) {
|
||||
int8_t x = (int8_t) _mm_extract_epi8(xVal, j+8);
|
||||
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal2, j);
|
||||
output[l] += x;
|
||||
}
|
||||
intCnt += 16;
|
||||
inputCnt += 16;
|
||||
if (intCnt >= out_len && inputCnt < in_len - 16) {
|
||||
/* Copy last elements */
|
||||
if ((out_len%16) == 12) {
|
||||
for (int j=(nwrapps+1)*out_len-12;j<(nwrapps+1)*out_len;j++) {
|
||||
output[deinter[j%out_len]] += input[j];
|
||||
inputCnt++;
|
||||
}
|
||||
} else {
|
||||
for (int j=(nwrapps+1)*out_len-4;j<(nwrapps+1)*out_len;j++) {
|
||||
output[deinter[j%out_len]] += input[j];
|
||||
inputCnt++;
|
||||
}
|
||||
}
|
||||
/* And wrap pointers */
|
||||
nwrapps++;
|
||||
intCnt = 16;
|
||||
xPtr = (const __m128i*) &input[nwrapps*out_len];
|
||||
lutPtr = (const __m128i*) deinter;
|
||||
}
|
||||
}
|
||||
for (int i=inputCnt;i<in_len;i++) {
|
||||
output[deinter[i%out_len]] += input[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
} else {
|
||||
printf("Invalid inputs rv_idx=%d, cb_idx=%d\n", rv_idx, cb_idx);
|
||||
return SRSLTE_ERROR_INVALID_INPUTS;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -436,20 +640,18 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len,
|
|||
l = (uint16_t) _mm256_extract_epi16(lutVal, j);\
|
||||
output[l] += x;
|
||||
|
||||
|
||||
int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
|
||||
int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
|
||||
{
|
||||
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
|
||||
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
|
||||
uint16_t *deinter = deinterleaver[cb_idx][rv_idx];
|
||||
|
||||
|
||||
const __m256i* xPtr = (const __m256i*) input;
|
||||
const __m256i* lutPtr = (const __m256i*) deinter;
|
||||
__m256i xVal, lutVal;
|
||||
|
||||
int16_t x;
|
||||
|
||||
int16_t x;
|
||||
uint16_t l;
|
||||
|
||||
|
||||
/* Simplify load if we do not need to wrap (ie high rates) */
|
||||
if (in_len <= out_len) {
|
||||
for (int i=0;i<in_len/16;i++) {
|
||||
|
@ -463,7 +665,7 @@ int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint32_t in_len,
|
|||
SAVE_OUTPUT(5);
|
||||
SAVE_OUTPUT(6);
|
||||
SAVE_OUTPUT(7);
|
||||
|
||||
|
||||
SAVE_OUTPUT(8);
|
||||
SAVE_OUTPUT(9);
|
||||
SAVE_OUTPUT(10);
|
||||
|
@ -472,21 +674,21 @@ int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint32_t in_len,
|
|||
SAVE_OUTPUT(13);
|
||||
SAVE_OUTPUT(14);
|
||||
SAVE_OUTPUT(15);
|
||||
|
||||
|
||||
xPtr ++;
|
||||
lutPtr ++;
|
||||
}
|
||||
for (int i=16*(in_len/16);i<in_len;i++) {
|
||||
for (int i=16*(in_len/16);i<in_len;i++) {
|
||||
output[deinter[i%out_len]] += input[i];
|
||||
}
|
||||
} else {
|
||||
int intCnt = 16;
|
||||
int inputCnt = 0;
|
||||
int nwrapps = 0;
|
||||
int nwrapps = 0;
|
||||
while(inputCnt < in_len - 16) {
|
||||
xVal = _mm256_loadu_si256(xPtr);
|
||||
lutVal = _mm256_loadu_si256(lutPtr);
|
||||
|
||||
|
||||
SAVE_OUTPUT(0);
|
||||
SAVE_OUTPUT(1);
|
||||
SAVE_OUTPUT(2);
|
||||
|
@ -495,7 +697,7 @@ int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint32_t in_len,
|
|||
SAVE_OUTPUT(5);
|
||||
SAVE_OUTPUT(6);
|
||||
SAVE_OUTPUT(7);
|
||||
|
||||
|
||||
SAVE_OUTPUT(8);
|
||||
SAVE_OUTPUT(9);
|
||||
SAVE_OUTPUT(10);
|
||||
|
@ -511,31 +713,180 @@ int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint32_t in_len,
|
|||
if (intCnt >= out_len && inputCnt < in_len - 16) {
|
||||
/* Copy last elements */
|
||||
if ((out_len%16) == 12) {
|
||||
for (int j=(nwrapps+1)*out_len-12;j<(nwrapps+1)*out_len;j++) {
|
||||
for (int j=(nwrapps+1)*out_len-12;j<(nwrapps+1)*out_len;j++) {
|
||||
output[deinter[j%out_len]] += input[j];
|
||||
inputCnt++;
|
||||
}
|
||||
} else {
|
||||
for (int j=(nwrapps+1)*out_len-4;j<(nwrapps+1)*out_len;j++) {
|
||||
for (int j=(nwrapps+1)*out_len-4;j<(nwrapps+1)*out_len;j++) {
|
||||
output[deinter[j%out_len]] += input[j];
|
||||
inputCnt++;
|
||||
}
|
||||
}
|
||||
/* And wrap pointers */
|
||||
nwrapps++;
|
||||
intCnt = 16;
|
||||
intCnt = 16;
|
||||
xPtr = (const __m256i*) &input[nwrapps*out_len];
|
||||
lutPtr = (const __m256i*) deinter;
|
||||
}
|
||||
}
|
||||
for (int i=inputCnt;i<in_len;i++) {
|
||||
}
|
||||
for (int i=inputCnt;i<in_len;i++) {
|
||||
output[deinter[i%out_len]] += input[i];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
printf("Invalid inputs rv_idx=%d, cb_idx=%d\n", rv_idx, cb_idx);
|
||||
return SRSLTE_ERROR_INVALID_INPUTS;
|
||||
return SRSLTE_ERROR_INVALID_INPUTS;
|
||||
}
|
||||
}
|
||||
|
||||
#define SAVE_OUTPUT8(j) x = (int8_t) _mm256_extract_epi8(xVal, j);\
|
||||
l = (uint16_t) _mm256_extract_epi16(lutVal1, j);\
|
||||
output[l] += x;
|
||||
|
||||
#define SAVE_OUTPUT8_2(j) x = (int8_t) _mm256_extract_epi8(xVal, j+8);\
|
||||
l = (uint16_t) _mm256_extract_epi16(lutVal2, j);\
|
||||
output[l] += x;
|
||||
|
||||
|
||||
int srslte_rm_turbo_rx_lut_avx_8bit(int8_t *input, int8_t *output, uint16_t *deinter, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
|
||||
{
|
||||
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
|
||||
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
|
||||
|
||||
const __m256i* xPtr = (const __m256i*) input;
|
||||
const __m256i* lutPtr = (const __m256i*) deinter;
|
||||
__m256i xVal, lutVal1, lutVal2;
|
||||
|
||||
int8_t x;
|
||||
uint16_t l;
|
||||
|
||||
/* Simplify load if we do not need to wrap (ie high rates) */
|
||||
if (in_len <= out_len) {
|
||||
for (int i=0;i<in_len/32;i++) {
|
||||
xVal = _mm256_loadu_si256(xPtr);
|
||||
xPtr ++;
|
||||
lutVal1 = _mm256_loadu_si256(lutPtr);
|
||||
lutPtr ++;
|
||||
lutVal2 = _mm256_loadu_si256(lutPtr);
|
||||
lutPtr ++;
|
||||
|
||||
SAVE_OUTPUT8(0);
|
||||
SAVE_OUTPUT8(1);
|
||||
SAVE_OUTPUT8(2);
|
||||
SAVE_OUTPUT8(3);
|
||||
SAVE_OUTPUT8(4);
|
||||
SAVE_OUTPUT8(5);
|
||||
SAVE_OUTPUT8(6);
|
||||
SAVE_OUTPUT8(7);
|
||||
|
||||
SAVE_OUTPUT8(8);
|
||||
SAVE_OUTPUT8(9);
|
||||
SAVE_OUTPUT8(10);
|
||||
SAVE_OUTPUT8(11);
|
||||
SAVE_OUTPUT8(12);
|
||||
SAVE_OUTPUT8(13);
|
||||
SAVE_OUTPUT8(14);
|
||||
SAVE_OUTPUT8(15);
|
||||
|
||||
SAVE_OUTPUT8_2(0);
|
||||
SAVE_OUTPUT8_2(1);
|
||||
SAVE_OUTPUT8_2(2);
|
||||
SAVE_OUTPUT8_2(3);
|
||||
SAVE_OUTPUT8_2(4);
|
||||
SAVE_OUTPUT8_2(5);
|
||||
SAVE_OUTPUT8_2(6);
|
||||
SAVE_OUTPUT8_2(7);
|
||||
|
||||
SAVE_OUTPUT8_2(8);
|
||||
SAVE_OUTPUT8_2(9);
|
||||
SAVE_OUTPUT8_2(10);
|
||||
SAVE_OUTPUT8_2(11);
|
||||
SAVE_OUTPUT8_2(12);
|
||||
SAVE_OUTPUT8_2(13);
|
||||
SAVE_OUTPUT8_2(14);
|
||||
SAVE_OUTPUT8_2(15);
|
||||
|
||||
}
|
||||
for (int i=32*(in_len/32);i<in_len;i++) {
|
||||
output[deinter[i%out_len]] += input[i];
|
||||
}
|
||||
} else {
|
||||
printf("wraps not implemented!\n");
|
||||
#ifdef kk
|
||||
int intCnt = 32;
|
||||
int inputCnt = 0;
|
||||
int nwrapps = 0;
|
||||
while(inputCnt < in_len - 32) {
|
||||
xVal = _mm256_loadu_si256(xPtr);
|
||||
xPtr ++;
|
||||
lutVal1 = _mm256_loadu_si256(lutPtr);
|
||||
lutPtr ++;
|
||||
lutVal2 = _mm256_loadu_si256(lutPtr);
|
||||
lutPtr ++;
|
||||
|
||||
SAVE_OUTPUT8(0);
|
||||
SAVE_OUTPUT8(1);
|
||||
SAVE_OUTPUT8(2);
|
||||
SAVE_OUTPUT8(3);
|
||||
SAVE_OUTPUT8(4);
|
||||
SAVE_OUTPUT8(5);
|
||||
SAVE_OUTPUT8(6);
|
||||
SAVE_OUTPUT8(7);
|
||||
|
||||
SAVE_OUTPUT8(8);
|
||||
SAVE_OUTPUT8(9);
|
||||
SAVE_OUTPUT8(10);
|
||||
SAVE_OUTPUT8(11);
|
||||
SAVE_OUTPUT8(12);
|
||||
SAVE_OUTPUT8(13);
|
||||
SAVE_OUTPUT8(14);
|
||||
SAVE_OUTPUT8(15);
|
||||
|
||||
SAVE_OUTPUT8_2(0);
|
||||
SAVE_OUTPUT8_2(1);
|
||||
SAVE_OUTPUT8_2(2);
|
||||
SAVE_OUTPUT8_2(3);
|
||||
SAVE_OUTPUT8_2(4);
|
||||
SAVE_OUTPUT8_2(5);
|
||||
SAVE_OUTPUT8_2(6);
|
||||
SAVE_OUTPUT8_2(7);
|
||||
|
||||
SAVE_OUTPUT8_2(8);
|
||||
SAVE_OUTPUT8_2(9);
|
||||
SAVE_OUTPUT8_2(10);
|
||||
SAVE_OUTPUT8_2(11);
|
||||
SAVE_OUTPUT8_2(12);
|
||||
SAVE_OUTPUT8_2(13);
|
||||
SAVE_OUTPUT8_2(14);
|
||||
SAVE_OUTPUT8_2(15);
|
||||
|
||||
intCnt += 32;
|
||||
inputCnt += 32;
|
||||
if (intCnt >= out_len && inputCnt < in_len - 32) {
|
||||
printf("warning rate matching wrapping remainder %d\n", out_len%32);
|
||||
/* Copy last elements */
|
||||
for (int j=(nwrapps+1)*out_len-(out_len%32) ;j<(nwrapps+1)*out_len;j++) {
|
||||
output[deinter[j%out_len]] += input[j];
|
||||
inputCnt++;
|
||||
}
|
||||
/* And wrap pointers */
|
||||
nwrapps++;
|
||||
intCnt = 32;
|
||||
xPtr = (const __m256i*) &input[nwrapps*out_len];
|
||||
lutPtr = (const __m256i*) deinter;
|
||||
}
|
||||
}
|
||||
for (int i=inputCnt;i<in_len;i++) {
|
||||
output[deinter[i%out_len]] += input[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
printf("Invalid inputs rv_idx=%d, cb_idx=%d\n", rv_idx, cb_idx);
|
||||
return SRSLTE_ERROR_INVALID_INPUTS;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -66,7 +66,16 @@ const uint32_t f2_list[SRSLTE_NOF_TC_CB_SIZES] = { 10, 12, 42, 16, 18, 20, 22, 2
|
|||
280, 142, 480, 146, 444, 120, 152, 462, 234, 158, 80, 96, 902, 166, 336,
|
||||
170, 86, 174, 176, 178, 120, 182, 184, 186, 94, 190, 480 };
|
||||
|
||||
int srslte_tc_interl_LTE_gen(srslte_tc_interl_t *h, uint32_t long_cb) {
|
||||
int srslte_tc_interl_LTE_gen(srslte_tc_interl_t *h, uint32_t long_cb)
|
||||
{
|
||||
return srslte_tc_interl_LTE_gen_interl(h, long_cb, 1);
|
||||
}
|
||||
|
||||
#define deinter(x,win) ((x%(long_cb/win))*(win)+x/(long_cb/win))
|
||||
#define inter(x,win) ((x%win)*(long_cb/win)+x/win)
|
||||
|
||||
|
||||
int srslte_tc_interl_LTE_gen_interl(srslte_tc_interl_t *h, uint32_t long_cb, uint32_t interl_win) {
|
||||
uint32_t cb_table_idx, f1, f2;
|
||||
uint64_t i, j;
|
||||
|
||||
|
@ -92,6 +101,19 @@ int srslte_tc_interl_LTE_gen(srslte_tc_interl_t *h, uint32_t long_cb) {
|
|||
h->forward[i] = (uint32_t) j;
|
||||
h->reverse[j] = (uint32_t) i;
|
||||
}
|
||||
if (interl_win != 1) {
|
||||
uint16_t *f = malloc(long_cb*sizeof(uint16_t));
|
||||
uint16_t *r = malloc(long_cb*sizeof(uint16_t));
|
||||
memcpy(f, h->forward, long_cb*sizeof(uint16_t));
|
||||
memcpy(r, h->reverse, long_cb*sizeof(uint16_t));
|
||||
for (i = 0; i < long_cb; i++) {
|
||||
h->forward[i] = deinter(f[inter(i,interl_win)],interl_win);
|
||||
h->reverse[i] = deinter(r[inter(i,interl_win)],interl_win);
|
||||
}
|
||||
free(f);
|
||||
free(r);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
|
|
@ -30,7 +30,6 @@
|
|||
#include <stdint.h>
|
||||
|
||||
#include "srslte/phy/fec/tc_interl.h"
|
||||
#include "srslte/phy/fec/turbocoder.h"
|
||||
|
||||
#define TURBO_SRSLTE_TCOD_RATE 3
|
||||
|
||||
|
|
|
@ -183,7 +183,7 @@ int main(int argc, char **argv) {
|
|||
srslte_rm_turbo_rx(buff_f, BUFFSZ, rm_bits_f, nof_e_bits, bits_f, long_cb_enc, rv_idx, 0);
|
||||
|
||||
bzero(bits2_s, long_cb_enc*sizeof(short));
|
||||
srslte_rm_turbo_rx_lut(rm_bits_s, bits2_s, nof_e_bits, cb_idx, rv_idx);
|
||||
srslte_rm_turbo_rx_lut_(rm_bits_s, bits2_s, nof_e_bits, cb_idx, rv_idx, false);
|
||||
|
||||
for (int i=0;i<long_cb_enc;i++) {
|
||||
if (bits_f[i] != bits2_s[i]) {
|
||||
|
|
|
@ -96,8 +96,15 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
}
|
||||
|
||||
/* Create CRC for Transport Block, it is not currently used but it is required */
|
||||
srslte_crc_t crc_tb = {};
|
||||
if (srslte_crc_init(&crc_tb, SRSLTE_LTE_CRC24A, 24)) {
|
||||
printf("error initialising CRC\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
srslte_tcod_encode(&tcod, input_bits, output_bits, long_cb);
|
||||
srslte_tcod_encode_lut(&tcod, NULL, input_bytes, parity, len);
|
||||
srslte_tcod_encode_lut(&tcod, &crc_tb, NULL, input_bytes, parity, len, false);
|
||||
|
||||
srslte_bit_unpack_vector(parity, parity_bits, 2*(long_cb+4));
|
||||
|
||||
|
|
|
@ -50,14 +50,16 @@ int nof_cb = 1;
|
|||
int nof_iterations = MAX_ITERATIONS;
|
||||
int test_known_data = 0;
|
||||
int test_errors = 0;
|
||||
int nof_repetitions = 1;
|
||||
int nof_repetitions = 1;
|
||||
|
||||
srslte_tdec_impl_type_t tdec_type;
|
||||
|
||||
#define SNR_POINTS 4
|
||||
#define SNR_MIN 1.0
|
||||
#define SNR_MAX 8.0
|
||||
|
||||
void usage(char *prog) {
|
||||
printf("Usage: %s [nlesv]\n", prog);
|
||||
printf("Usage: %s [kcinNledts]\n", prog);
|
||||
printf(
|
||||
"\t-k Test with known data (ignores frame_length) [Default disabled]\n");
|
||||
printf("\t-c nof_cb in parallel [Default %d]\n", nof_cb);
|
||||
|
@ -66,13 +68,14 @@ void usage(char *prog) {
|
|||
printf("\t-N nof_repetitions [Default %d]\n", nof_repetitions);
|
||||
printf("\t-l frame_length [Default %d]\n", frame_length);
|
||||
printf("\t-e ebno in dB [Default scan]\n");
|
||||
printf("\t-d Decoder implementation type: 0: Generic, 1: SSE, 2: SSE-window\n");
|
||||
printf("\t-t test: check errors on exit [Default disabled]\n");
|
||||
printf("\t-s seed [Default 0=time]\n");
|
||||
}
|
||||
|
||||
void parse_args(int argc, char **argv) {
|
||||
int opt;
|
||||
while ((opt = getopt(argc, argv, "cinNlstvekt")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "kcinNledts")) != -1) {
|
||||
switch (opt) {
|
||||
case 'c':
|
||||
nof_cb = atoi(argv[optind]);
|
||||
|
@ -95,6 +98,9 @@ void parse_args(int argc, char **argv) {
|
|||
case 'l':
|
||||
frame_length = atoi(argv[optind]);
|
||||
break;
|
||||
case 'd':
|
||||
tdec_type = (srslte_tdec_impl_type_t) atoi(argv[optind]);
|
||||
break;
|
||||
case 'e':
|
||||
ebno_db = atof(argv[optind]);
|
||||
break;
|
||||
|
@ -117,7 +123,7 @@ int main(int argc, char **argv) {
|
|||
float *llr;
|
||||
short *llr_s;
|
||||
uint8_t *llr_c;
|
||||
uint8_t *data_tx, *data_rx, *data_rx_bytes[SRSLTE_TDEC_MAX_NPAR], *symbols;
|
||||
uint8_t *data_tx, *data_rx, *data_rx_bytes, *symbols;
|
||||
uint32_t i, j;
|
||||
float var[SNR_POINTS];
|
||||
uint32_t snr_points;
|
||||
|
@ -159,12 +165,10 @@ int main(int argc, char **argv) {
|
|||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
for (int cb=0;cb<SRSLTE_TDEC_MAX_NPAR;cb++) {
|
||||
data_rx_bytes[cb] = srslte_vec_malloc(frame_length * sizeof(uint8_t));
|
||||
if (!data_rx_bytes[cb]) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
data_rx_bytes = srslte_vec_malloc(frame_length * sizeof(uint8_t));
|
||||
if (!data_rx_bytes) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
symbols = srslte_vec_malloc(coded_length * sizeof(uint8_t));
|
||||
|
@ -193,11 +197,13 @@ int main(int argc, char **argv) {
|
|||
exit(-1);
|
||||
}
|
||||
|
||||
if (srslte_tdec_init(&tdec, frame_length)) {
|
||||
if (srslte_tdec_init_manual(&tdec, frame_length, tdec_type)) {
|
||||
fprintf(stderr, "Error initiating Turbo decoder\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
srslte_tdec_force_not_sb(&tdec);
|
||||
|
||||
float ebno_inc, esno_db;
|
||||
ebno_inc = (SNR_MAX - SNR_MIN) / SNR_POINTS;
|
||||
if (ebno_db == 100.0) {
|
||||
|
@ -246,7 +252,7 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
|
||||
/* decoder */
|
||||
srslte_tdec_reset(&tdec, frame_length);
|
||||
srslte_tdec_new_cb(&tdec, frame_length);
|
||||
|
||||
uint32_t t;
|
||||
if (nof_iterations == -1) {
|
||||
|
@ -255,36 +261,21 @@ int main(int argc, char **argv) {
|
|||
t = nof_iterations;
|
||||
}
|
||||
|
||||
int16_t *input[SRSLTE_TDEC_MAX_NPAR];
|
||||
uint8_t *output[SRSLTE_TDEC_MAX_NPAR];
|
||||
|
||||
for (int n=0;n<SRSLTE_TDEC_MAX_NPAR;n++) {
|
||||
if (n < nof_cb) {
|
||||
input[n] = llr_s;
|
||||
output[n] = data_rx_bytes[n];
|
||||
} else {
|
||||
input[n] = NULL;
|
||||
output[n] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday(&tdata[1], NULL);
|
||||
for (int k=0;k<nof_repetitions;k++) {
|
||||
srslte_tdec_run_all_par(&tdec, input, output, t, frame_length);
|
||||
srslte_tdec_run_all(&tdec, llr_s, data_rx_bytes, t, frame_length);
|
||||
}
|
||||
gettimeofday(&tdata[2], NULL);
|
||||
get_time_interval(tdata);
|
||||
mean_usec = (float) mean_usec * 0.9 + (float) (tdata[0].tv_usec/nof_repetitions) * 0.1;
|
||||
mean_usec = (tdata[0].tv_sec*1e6+tdata[0].tv_usec)/nof_repetitions;
|
||||
|
||||
frame_cnt++;
|
||||
uint32_t errors_this = 0;
|
||||
for (int cb=0;cb<nof_cb;cb++) {
|
||||
srslte_bit_unpack_vector(data_rx_bytes[cb], data_rx, frame_length);
|
||||
|
||||
errors_this=srslte_bit_diff(data_tx, data_rx, frame_length);
|
||||
//printf("error[%d]=%d\n", cb, errors_this);
|
||||
errors += errors_this;
|
||||
}
|
||||
srslte_bit_unpack_vector(data_rx_bytes, data_rx, frame_length);
|
||||
|
||||
errors_this=srslte_bit_diff(data_tx, data_rx, frame_length);
|
||||
//printf("error[%d]=%d\n", cb, errors_this);
|
||||
errors += errors_this;
|
||||
printf("Eb/No: %2.2f %10d/%d ", SNR_MIN + i * ebno_inc, frame_cnt, nof_frames);
|
||||
printf("BER: %.2e ", (float) errors / (nof_cb*frame_cnt * frame_length));
|
||||
printf("%3.1f Mbps (%6.2f usec)", (float) (nof_cb*frame_length) / mean_usec, mean_usec);
|
||||
|
@ -301,10 +292,8 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
}
|
||||
|
||||
for (int cb=0;cb<SRSLTE_TDEC_MAX_NPAR;cb++) {
|
||||
if (data_rx_bytes[cb]) {
|
||||
free(data_rx_bytes[cb]);
|
||||
}
|
||||
if (data_rx_bytes) {
|
||||
free(data_rx_bytes);
|
||||
}
|
||||
free(data_tx);
|
||||
free(symbols);
|
||||
|
|
|
@ -193,7 +193,13 @@ int srslte_tcod_encode(srslte_tcod_t *h, uint8_t *input, uint8_t *output, uint32
|
|||
}
|
||||
|
||||
/* Expects bytes and produces bytes. The systematic and parity bits are interlaced in the output */
|
||||
int srslte_tcod_encode_lut(srslte_tcod_t *h, srslte_crc_t *crc, uint8_t *input, uint8_t *parity, uint32_t cblen_idx)
|
||||
int srslte_tcod_encode_lut(srslte_tcod_t *h,
|
||||
srslte_crc_t *crc_tb,
|
||||
srslte_crc_t *crc_cb,
|
||||
uint8_t *input,
|
||||
uint8_t *parity,
|
||||
uint32_t cblen_idx,
|
||||
bool last_cb)
|
||||
{
|
||||
if (cblen_idx < 188) {
|
||||
uint32_t long_cb = (uint32_t) srslte_cbsegm_cbsize(cblen_idx);
|
||||
|
@ -204,20 +210,24 @@ int srslte_tcod_encode_lut(srslte_tcod_t *h, srslte_crc_t *crc, uint8_t *input,
|
|||
}
|
||||
|
||||
/* Reset CRC */
|
||||
if (crc) {
|
||||
srslte_crc_set_init(crc, 0);
|
||||
if (crc_cb) {
|
||||
srslte_crc_set_init(crc_cb, 0);
|
||||
}
|
||||
|
||||
/* Parity bits for the 1st constituent encoders */
|
||||
uint8_t state0 = 0;
|
||||
if (crc) {
|
||||
if (crc_cb) {
|
||||
int block_size_nocrc = (long_cb - crc_cb->order - ((last_cb) ? crc_tb->order : 0)) / 8;
|
||||
|
||||
/* if CRC pointer is given */
|
||||
for (int i = 0; i < (long_cb - crc->order) / 8; i++) {
|
||||
for (int i = 0; i < block_size_nocrc; i++) {
|
||||
uint8_t in = input[i];
|
||||
|
||||
/* Put byte in CRC and save latest checksum */
|
||||
srslte_crc_checksum_put_byte(crc, in);
|
||||
/* Put byte in TB CRC and save latest checksum */
|
||||
srslte_crc_checksum_put_byte(crc_tb, in);
|
||||
|
||||
/* Put byte in CB CRC and save latest checksum */
|
||||
srslte_crc_checksum_put_byte(crc_cb, in);
|
||||
|
||||
/* Run actual encoder */
|
||||
tcod_lut_t l = tcod_lut[state0][in];
|
||||
|
@ -225,10 +235,27 @@ int srslte_tcod_encode_lut(srslte_tcod_t *h, srslte_crc_t *crc, uint8_t *input,
|
|||
state0 = l.next_state;
|
||||
}
|
||||
|
||||
uint32_t checksum = (uint32_t) srslte_crc_checksum_get(crc);
|
||||
for (int i = 0; i < crc->order / 8; i++) {
|
||||
int mask_shift = 8 * (crc->order / 8 - i - 1);
|
||||
int idx = (long_cb - crc->order) / 8 + i;
|
||||
if (last_cb) {
|
||||
uint32_t checksum = (uint32_t) srslte_crc_checksum_get(crc_tb);
|
||||
for (int i = 0; i < crc_tb->order / 8; i++) {
|
||||
int mask_shift = 8 * (crc_tb->order / 8 - i - 1);
|
||||
int idx = block_size_nocrc + i;
|
||||
uint8_t in = (uint8_t) ((checksum >> mask_shift) & 0xff);
|
||||
|
||||
/* Put byte in CB CRC and save latest checksum */
|
||||
srslte_crc_checksum_put_byte(crc_cb, in);
|
||||
|
||||
input[idx] = in;
|
||||
tcod_lut_t l = tcod_lut[state0][in];
|
||||
parity[idx] = l.output;
|
||||
state0 = l.next_state;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t checksum = (uint32_t) srslte_crc_checksum_get(crc_cb);
|
||||
for (int i = 0; i < crc_cb->order / 8; i++) {
|
||||
int mask_shift = 8 * (crc_cb->order / 8 - i - 1);
|
||||
int idx = (long_cb - crc_cb->order) / 8 + i;
|
||||
uint8_t in = (uint8_t) ((checksum >> mask_shift) & 0xff);
|
||||
|
||||
input[idx] = in;
|
||||
|
@ -239,11 +266,31 @@ int srslte_tcod_encode_lut(srslte_tcod_t *h, srslte_crc_t *crc, uint8_t *input,
|
|||
|
||||
} else {
|
||||
/* No CRC given */
|
||||
for (uint32_t i = 0; i < long_cb / 8; i++) {
|
||||
tcod_lut_t l = tcod_lut[state0][input[i]];
|
||||
int block_size_nocrc = (long_cb - ((last_cb) ? crc_tb->order : 0)) / 8;
|
||||
|
||||
for (uint32_t i = 0; i < block_size_nocrc; i++) {
|
||||
uint8_t in = input[i];
|
||||
|
||||
srslte_crc_checksum_put_byte(crc_tb, in);
|
||||
|
||||
tcod_lut_t l = tcod_lut[state0][in];
|
||||
parity[i] = l.output;
|
||||
state0 = l.next_state;
|
||||
}
|
||||
|
||||
if (last_cb) {
|
||||
uint32_t checksum = (uint32_t) srslte_crc_checksum_get(crc_tb);
|
||||
for (int i = 0; i < crc_tb->order / 8; i++) {
|
||||
int mask_shift = 8 * (crc_tb->order / 8 - i - 1);
|
||||
int idx = block_size_nocrc + i;
|
||||
uint8_t in = (uint8_t) ((checksum >> mask_shift) & 0xff);
|
||||
|
||||
input[idx] = in;
|
||||
tcod_lut_t l = tcod_lut[state0][in];
|
||||
parity[idx] = l.output;
|
||||
state0 = l.next_state;
|
||||
}
|
||||
}
|
||||
}
|
||||
parity[long_cb / 8] = 0; // will put tail here later
|
||||
|
||||
|
|
|
@ -24,151 +24,543 @@
|
|||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "srslte/phy/fec/turbodecoder.h"
|
||||
#include "srslte/phy/fec/turbodecoder_gen.h"
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include "srslte/phy/fec/turbodecoder_simd.h"
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <strings.h>
|
||||
#include <srslte/srslte.h>
|
||||
|
||||
#include "srslte/phy/utils/vector.h"
|
||||
#include "srslte/phy/fec/turbodecoder.h"
|
||||
|
||||
#define debug_enabled 0
|
||||
|
||||
/* Generic (no SSE) implementation */
|
||||
#include "srslte/phy/fec/turbodecoder_gen.h"
|
||||
srslte_tdec_16bit_impl_t gen_impl = {
|
||||
tdec_gen_init,
|
||||
tdec_gen_free,
|
||||
tdec_gen_dec,
|
||||
tdec_gen_extract_input,
|
||||
tdec_gen_decision_byte
|
||||
};
|
||||
|
||||
/* SSE no-window implementation */
|
||||
#include "srslte/phy/fec/turbodecoder_sse.h"
|
||||
srslte_tdec_16bit_impl_t sse_impl = {
|
||||
tdec_sse_init,
|
||||
tdec_sse_free,
|
||||
tdec_sse_dec,
|
||||
tdec_sse_extract_input,
|
||||
tdec_sse_decision_byte
|
||||
};
|
||||
|
||||
/* SSE window implementation */
|
||||
#ifdef LV_HAVE_SSE
|
||||
#define WINIMP_IS_SSE16
|
||||
#include "srslte/phy/fec/turbodecoder_win.h"
|
||||
#undef WINIMP_IS_SSE16
|
||||
|
||||
srslte_tdec_16bit_impl_t sse16_win_impl = {
|
||||
tdec_winsse16_init,
|
||||
tdec_winsse16_free,
|
||||
tdec_winsse16_dec,
|
||||
tdec_winsse16_extract_input,
|
||||
tdec_winsse16_decision_byte
|
||||
};
|
||||
#endif
|
||||
|
||||
/* AVX window implementation */
|
||||
#ifdef LV_HAVE_AVX
|
||||
#define WINIMP_IS_AVX16
|
||||
#include "srslte/phy/fec/turbodecoder_win.h"
|
||||
#undef WINIMP_IS_AVX16
|
||||
srslte_tdec_16bit_impl_t avx16_win_impl = {
|
||||
tdec_winavx16_init,
|
||||
tdec_winavx16_free,
|
||||
tdec_winavx16_dec,
|
||||
tdec_winavx16_extract_input,
|
||||
tdec_winavx16_decision_byte
|
||||
};
|
||||
#endif
|
||||
|
||||
/* SSE window implementation */
|
||||
#ifdef LV_HAVE_SSE
|
||||
#define WINIMP_IS_SSE8
|
||||
#include "srslte/phy/fec/turbodecoder_win.h"
|
||||
#undef WINIMP_IS_SSE8
|
||||
|
||||
srslte_tdec_8bit_impl_t sse8_win_impl = {
|
||||
tdec_winsse8_init,
|
||||
tdec_winsse8_free,
|
||||
tdec_winsse8_dec,
|
||||
tdec_winsse8_extract_input,
|
||||
tdec_winsse8_decision_byte
|
||||
};
|
||||
#endif
|
||||
|
||||
/* AVX window implementation */
|
||||
#ifdef LV_HAVE_AVX
|
||||
#define WINIMP_IS_AVX8
|
||||
#include "srslte/phy/fec/turbodecoder_win.h"
|
||||
#undef WINIMP_IS_AVX8
|
||||
srslte_tdec_8bit_impl_t avx8_win_impl = {
|
||||
tdec_winavx8_init,
|
||||
tdec_winavx8_free,
|
||||
tdec_winavx8_dec,
|
||||
tdec_winavx8_extract_input,
|
||||
tdec_winavx8_decision_byte
|
||||
};
|
||||
#endif
|
||||
|
||||
#define AUTO_16_SSE 0
|
||||
#define AUTO_16_SSEWIN 1
|
||||
#define AUTO_16_AVXWIN 2
|
||||
#define AUTO_8_SSEWIN 0
|
||||
#define AUTO_8_AVXWIN 1
|
||||
|
||||
|
||||
// Include interfaces for 8 and 16 bit decoder implementations
|
||||
#define LLR_IS_8BIT
|
||||
#include "srslte/phy/fec/turbodecoder_iter.h"
|
||||
#undef LLR_IS_8BIT
|
||||
|
||||
#define LLR_IS_16BIT
|
||||
#include "srslte/phy/fec/turbodecoder_iter.h"
|
||||
#undef LLR_IS_16BIT
|
||||
|
||||
|
||||
int srslte_tdec_init(srslte_tdec_t * h, uint32_t max_long_cb) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
return srslte_tdec_simd_init(&h->tdec_simd, SRSLTE_TDEC_MAX_NPAR, max_long_cb);
|
||||
#else
|
||||
h->input_conv = srslte_vec_malloc(sizeof(float) * (3*max_long_cb+12));
|
||||
if (!h->input_conv) {
|
||||
perror("malloc");
|
||||
return -1;
|
||||
}
|
||||
return srslte_tdec_gen_init(&h->tdec_gen, max_long_cb);
|
||||
#endif
|
||||
return srslte_tdec_init_manual(h, max_long_cb, SRSLTE_TDEC_AUTO);
|
||||
}
|
||||
|
||||
void srslte_tdec_free(srslte_tdec_t * h) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
srslte_tdec_simd_free(&h->tdec_simd);
|
||||
#else
|
||||
uint32_t interleaver_idx(uint32_t nof_subblocks) {
|
||||
switch (nof_subblocks) {
|
||||
case 32:
|
||||
return 3;
|
||||
case 16:
|
||||
return 2;
|
||||
case 8:
|
||||
return 1;
|
||||
case 1:
|
||||
return 0;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Initializes the turbo decoder object */
|
||||
int srslte_tdec_init_manual(srslte_tdec_t * h, uint32_t max_long_cb, srslte_tdec_impl_type_t dec_type)
|
||||
{
|
||||
int ret = -1;
|
||||
bzero(h, sizeof(srslte_tdec_t));
|
||||
uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL;
|
||||
|
||||
h->dec_type = dec_type;
|
||||
|
||||
// Set manual
|
||||
switch(dec_type) {
|
||||
case SRSLTE_TDEC_AUTO:
|
||||
break;
|
||||
case SRSLTE_TDEC_SSE:
|
||||
h->dec16[0] = &sse_impl;
|
||||
h->current_llr_type = SRSLTE_TDEC_16;
|
||||
break;
|
||||
case SRSLTE_TDEC_SSE_WINDOW:
|
||||
h->dec16[0] = &sse16_win_impl;
|
||||
h->current_llr_type = SRSLTE_TDEC_16;
|
||||
break;
|
||||
case SRSLTE_TDEC_GENERIC:
|
||||
h->dec16[0] = &gen_impl;
|
||||
h->current_llr_type = SRSLTE_TDEC_16;
|
||||
break;
|
||||
case SRSLTE_TDEC_SSE8_WINDOW:
|
||||
h->dec8[0] = &sse8_win_impl;
|
||||
h->current_llr_type = SRSLTE_TDEC_8;
|
||||
break;
|
||||
#ifdef LV_HAVE_AVX
|
||||
case SRSLTE_TDEC_AVX_WINDOW:
|
||||
h->dec16[0] = &avx16_win_impl;
|
||||
h->current_llr_type = SRSLTE_TDEC_16;
|
||||
break;
|
||||
case SRSLTE_TDEC_AVX8_WINDOW:
|
||||
h->dec8[0] = &avx8_win_impl;
|
||||
h->current_llr_type = SRSLTE_TDEC_8;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
fprintf(stderr, "Error decoder %d not supported\n", dec_type);
|
||||
goto clean_and_exit;
|
||||
}
|
||||
|
||||
h->max_long_cb = max_long_cb;
|
||||
|
||||
h->app1 = srslte_vec_malloc(sizeof(int16_t) * len);
|
||||
if (!h->app1) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->app2 = srslte_vec_malloc(sizeof(int16_t) * len);
|
||||
if (!h->app2) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->ext1 = srslte_vec_malloc(sizeof(int16_t) * len);
|
||||
if (!h->ext1) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->ext2 = srslte_vec_malloc(sizeof(int16_t) * len);
|
||||
if (!h->ext2) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->syst0 = srslte_vec_malloc(sizeof(int16_t) * len);
|
||||
if (!h->syst0) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->parity0 = srslte_vec_malloc(sizeof(int16_t) * len);
|
||||
if (!h->parity0) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->parity1 = srslte_vec_malloc(sizeof(int16_t) * len);
|
||||
if (!h->parity1) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->input_conv = srslte_vec_malloc(sizeof(int16_t) * (len * 3+32*3));
|
||||
if (!h->input_conv) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
|
||||
if (dec_type == SRSLTE_TDEC_AUTO) {
|
||||
h->dec16[AUTO_16_SSE] = &sse_impl;
|
||||
h->dec16[AUTO_16_SSEWIN] = &sse16_win_impl;
|
||||
h->dec8[AUTO_8_SSEWIN] = &sse8_win_impl;
|
||||
#ifdef LV_HAVE_AVX
|
||||
h->dec16[AUTO_16_AVXWIN] = &avx16_win_impl;
|
||||
h->dec8[AUTO_8_AVXWIN] = &avx8_win_impl;
|
||||
#endif
|
||||
|
||||
for (int td=0;td<SRSLTE_TDEC_NOF_AUTO_MODES_16;td++) {
|
||||
if (h->dec16[td]) {
|
||||
if ((h->nof_blocks16[td] = h->dec16[td]->tdec_init(&h->dec16_hdlr[td], h->max_long_cb))<0) {
|
||||
goto clean_and_exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int td=0;td<SRSLTE_TDEC_NOF_AUTO_MODES_8;td++) {
|
||||
if (h->dec8[td]) {
|
||||
if ((h->nof_blocks8[td] = h->dec8[td]->tdec_init(&h->dec8_hdlr[td], h->max_long_cb))<0) {
|
||||
goto clean_and_exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute 1 interleaver for each possible nof_subblocks (1, 8, 16 or 32)
|
||||
for (int s=0;s<4;s++) {
|
||||
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
|
||||
if (srslte_tc_interl_init(&h->interleaver[s][i], srslte_cbsegm_cbsize(i)) < 0) {
|
||||
goto clean_and_exit;
|
||||
}
|
||||
srslte_tc_interl_LTE_gen_interl(&h->interleaver[s][i], srslte_cbsegm_cbsize(i), s?(8<<(s-1)):1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
uint32_t nof_subblocks;
|
||||
if (dec_type < SRSLTE_TDEC_SSE8_WINDOW) {
|
||||
if ((h->nof_blocks16[0] = h->dec16[0]->tdec_init(&h->dec16_hdlr[0], h->max_long_cb))<0) {
|
||||
goto clean_and_exit;
|
||||
}
|
||||
nof_subblocks = h->nof_blocks16[0];
|
||||
} else {
|
||||
if ((h->nof_blocks8[0] = h->dec8[0]->tdec_init(&h->dec8_hdlr[0], h->max_long_cb))<0) {
|
||||
goto clean_and_exit;
|
||||
}
|
||||
nof_subblocks = h->nof_blocks8[0];
|
||||
}
|
||||
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
|
||||
if (srslte_tc_interl_init(&h->interleaver[interleaver_idx(nof_subblocks)][i], srslte_cbsegm_cbsize(i)) < 0) {
|
||||
goto clean_and_exit;
|
||||
}
|
||||
srslte_tc_interl_LTE_gen_interl(&h->interleaver[interleaver_idx(nof_subblocks)][i], srslte_cbsegm_cbsize(i), nof_subblocks);
|
||||
}
|
||||
}
|
||||
|
||||
h->current_cbidx = -1;
|
||||
ret = 0;
|
||||
|
||||
clean_and_exit:
|
||||
if (ret == -1) {
|
||||
srslte_tdec_free(h);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void srslte_tdec_free(srslte_tdec_t * h)
|
||||
{
|
||||
if (h->app1) {
|
||||
free(h->app1);
|
||||
}
|
||||
if (h->app2) {
|
||||
free(h->app2);
|
||||
}
|
||||
if (h->ext1) {
|
||||
free(h->ext1);
|
||||
}
|
||||
if (h->ext2) {
|
||||
free(h->ext2);
|
||||
}
|
||||
if (h->syst0) {
|
||||
free(h->syst0);
|
||||
}
|
||||
if (h->parity0) {
|
||||
free(h->parity0);
|
||||
}
|
||||
if (h->parity1) {
|
||||
free(h->parity1);
|
||||
}
|
||||
if (h->input_conv) {
|
||||
free(h->input_conv);
|
||||
}
|
||||
srslte_tdec_gen_free(&h->tdec_gen);
|
||||
#endif
|
||||
|
||||
for (int td=0;td<SRSLTE_TDEC_NOF_AUTO_MODES_8;td++) {
|
||||
if (h->dec8[td] && h->dec8_hdlr[td]) {
|
||||
h->dec8[td]->tdec_free(h->dec8_hdlr[td]);
|
||||
}
|
||||
}
|
||||
for (int td=0;td<SRSLTE_TDEC_NOF_AUTO_MODES_16;td++) {
|
||||
if (h->dec16[td] && h->dec16_hdlr[td]) {
|
||||
h->dec16[td]->tdec_free(h->dec16_hdlr[td]);
|
||||
}
|
||||
}
|
||||
for (int s=0;s<4;s++) {
|
||||
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
|
||||
srslte_tc_interl_free(&h->interleaver[s][i]);
|
||||
}
|
||||
}
|
||||
|
||||
bzero(h, sizeof(srslte_tdec_t));
|
||||
}
|
||||
|
||||
int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
return srslte_tdec_simd_reset(&h->tdec_simd, long_cb);
|
||||
#else
|
||||
return srslte_tdec_gen_reset(&h->tdec_gen, long_cb);
|
||||
#endif
|
||||
void srslte_tdec_force_not_sb(srslte_tdec_t *h) {
|
||||
h->force_not_sb = true;
|
||||
}
|
||||
|
||||
int srslte_tdec_reset_cb(srslte_tdec_t * h, uint32_t cb_idx) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
return srslte_tdec_simd_reset_cb(&h->tdec_simd, cb_idx);
|
||||
#else
|
||||
return srslte_tdec_gen_reset(&h->tdec_gen, h->tdec_gen.current_cb_len);
|
||||
#endif
|
||||
}
|
||||
|
||||
int srslte_tdec_get_nof_iterations_cb(srslte_tdec_t * h, uint32_t cb_idx)
|
||||
static void tdec_decision_byte(srslte_tdec_t * h, uint8_t *output)
|
||||
{
|
||||
#ifdef LV_HAVE_SSE
|
||||
return srslte_tdec_simd_get_nof_iterations_cb(&h->tdec_simd, cb_idx);
|
||||
#else
|
||||
return h->tdec_gen.n_iter;
|
||||
#endif
|
||||
if (h->current_llr_type == SRSLTE_TDEC_16) {
|
||||
h->dec16[h->current_dec]->tdec_decision_byte(!(h->n_iter%2)?h->app1:h->ext1, output, h->current_long_cb);
|
||||
} else {
|
||||
h->dec8[h->current_dec]->tdec_decision_byte(!(h->n_iter%2)?(int8_t*)h->app1:(int8_t*)h->ext1, output, h->current_long_cb);
|
||||
}
|
||||
}
|
||||
|
||||
void srslte_tdec_iteration_par(srslte_tdec_t * h, int16_t* input[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
srslte_tdec_simd_iteration(&h->tdec_simd, input, long_cb);
|
||||
#else
|
||||
srslte_vec_convert_if(input[0], 0.01, h->input_conv, 3*long_cb+12);
|
||||
srslte_tdec_gen_iteration(&h->tdec_gen, h->input_conv, long_cb);
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_tdec_iteration(srslte_tdec_t * h, int16_t* input, uint32_t long_cb) {
|
||||
int16_t *input_par[SRSLTE_TDEC_MAX_NPAR];
|
||||
input_par[0] = input;
|
||||
return srslte_tdec_iteration_par(h, input_par, long_cb);
|
||||
}
|
||||
|
||||
void srslte_tdec_decision_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
return srslte_tdec_simd_decision(&h->tdec_simd, output, long_cb);
|
||||
#else
|
||||
return srslte_tdec_gen_decision(&h->tdec_gen, output[0], long_cb);
|
||||
#endif
|
||||
}
|
||||
|
||||
uint32_t srslte_tdec_get_nof_parallel(srslte_tdec_t *h) {
|
||||
#ifdef LV_HAVE_AVX2
|
||||
return 2;
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) {
|
||||
uint8_t *output_par[SRSLTE_TDEC_MAX_NPAR];
|
||||
output_par[0] = output;
|
||||
srslte_tdec_decision_par(h, output_par, long_cb);
|
||||
}
|
||||
|
||||
void srslte_tdec_decision_byte_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
srslte_tdec_simd_decision_byte(&h->tdec_simd, output, long_cb);
|
||||
#else
|
||||
srslte_tdec_gen_decision_byte(&h->tdec_gen, output[0], long_cb);
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_tdec_decision_byte_par_cb(srslte_tdec_t * h, uint8_t *output, uint32_t cb_idx, uint32_t long_cb) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
srslte_tdec_simd_decision_byte_cb(&h->tdec_simd, output, cb_idx, long_cb);
|
||||
#else
|
||||
srslte_tdec_gen_decision_byte(&h->tdec_gen, output, long_cb);
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) {
|
||||
uint8_t *output_par[SRSLTE_TDEC_MAX_NPAR];
|
||||
output_par[0] = output;
|
||||
srslte_tdec_decision_byte_par(h, output_par, long_cb);
|
||||
}
|
||||
|
||||
int srslte_tdec_run_all_par(srslte_tdec_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t nof_iterations, uint32_t long_cb) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
return srslte_tdec_simd_run_all(&h->tdec_simd, input, output, nof_iterations, long_cb);
|
||||
#else
|
||||
srslte_vec_convert_if(input[0], 0.01, h->input_conv, 3*long_cb+12);
|
||||
return srslte_tdec_gen_run_all(&h->tdec_gen, h->input_conv, output[0], nof_iterations, long_cb);
|
||||
#endif
|
||||
}
|
||||
|
||||
int srslte_tdec_run_all(srslte_tdec_t * h, int16_t * input, uint8_t *output, uint32_t nof_iterations, uint32_t long_cb)
|
||||
/* Returns number of subblocks in automatic mode for this long_cb */
|
||||
uint32_t srslte_tdec_autoimp_get_subblocks(uint32_t long_cb)
|
||||
{
|
||||
uint8_t *output_par[SRSLTE_TDEC_MAX_NPAR];
|
||||
output_par[0] = output;
|
||||
int16_t *input_par[SRSLTE_TDEC_MAX_NPAR];
|
||||
input_par[0] = input;
|
||||
|
||||
return srslte_tdec_run_all_par(h, input_par, output_par, nof_iterations, long_cb);
|
||||
#ifdef LV_HAVE_AVX
|
||||
if (!(long_cb%16) && long_cb > 800) {
|
||||
return 16;
|
||||
} else
|
||||
#endif
|
||||
if (!(long_cb%8) && long_cb > 400) {
|
||||
return 8;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int tdec_sb_idx(uint32_t long_cb) {
|
||||
uint32_t nof_sb = srslte_tdec_autoimp_get_subblocks(long_cb);
|
||||
switch(nof_sb) {
|
||||
case 16:
|
||||
return AUTO_16_AVXWIN;
|
||||
case 8:
|
||||
return AUTO_16_SSEWIN;
|
||||
case 0:
|
||||
return AUTO_16_SSE;
|
||||
}
|
||||
fprintf(stderr, "Error in tdec_sb_idx() invalid nof_sb=%d\n", nof_sb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t srslte_tdec_autoimp_get_subblocks_8bit(uint32_t long_cb)
|
||||
{
|
||||
#ifdef LV_HAVE_AVX
|
||||
if (!(long_cb%32) && long_cb > 2048) {
|
||||
return 32;
|
||||
} else
|
||||
#endif
|
||||
if (!(long_cb%16) && long_cb > 800) {
|
||||
return 16;
|
||||
} else if (!(long_cb%8) && long_cb > 400) {
|
||||
return 8;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int tdec_sb_idx_8(uint32_t long_cb) {
|
||||
uint32_t nof_sb = srslte_tdec_autoimp_get_subblocks_8bit(long_cb);
|
||||
switch(nof_sb) {
|
||||
case 32:
|
||||
return AUTO_8_AVXWIN;
|
||||
case 16:
|
||||
return AUTO_8_SSEWIN;
|
||||
case 8:
|
||||
return 10+AUTO_16_SSEWIN;
|
||||
case 0:
|
||||
return 10+AUTO_16_SSE;
|
||||
}
|
||||
fprintf(stderr, "Error in tdec_sb_idx_8() invalid nof_sb=%d\n", nof_sb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TODO: Implement SSE version. Don't really a problem since this only called at very low rates
|
||||
static void convert_8_to_16(int8_t *in, int16_t *out, uint32_t len)
|
||||
{
|
||||
for (int i=0;i<len;i++) {
|
||||
out[i] = (int16_t) in[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void convert_16_to_8(int16_t *in, int8_t *out, uint32_t len)
|
||||
{
|
||||
for (int i=0;i<len;i++) {
|
||||
out[i] = (int8_t) in[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void tdec_iteration_8(srslte_tdec_t * h, int8_t * input)
|
||||
{
|
||||
// Select decoder if in auto mode
|
||||
if (h->dec_type == SRSLTE_TDEC_AUTO) {
|
||||
h->current_llr_type = SRSLTE_TDEC_8;
|
||||
h->current_dec = tdec_sb_idx_8(h->current_long_cb);
|
||||
h->current_inter_idx = interleaver_idx(h->nof_blocks8[h->current_dec]);
|
||||
|
||||
// If long_cb is not multiple of any 8-bit decoder, use a 16-bit decoder and do type conversion
|
||||
if (h->current_dec >= 10) {
|
||||
h->current_llr_type = SRSLTE_TDEC_16;
|
||||
h->current_dec -= 10;
|
||||
h->current_inter_idx = interleaver_idx(h->nof_blocks16[h->current_dec]);
|
||||
}
|
||||
} else {
|
||||
h->current_dec = 0;
|
||||
}
|
||||
|
||||
if (h->current_llr_type == SRSLTE_TDEC_16) {
|
||||
if (!h->n_iter) {
|
||||
convert_8_to_16(input, h->input_conv, 3*h->current_long_cb+12);
|
||||
}
|
||||
run_tdec_iteration_16bit(h, h->input_conv);
|
||||
} else {
|
||||
run_tdec_iteration_8bit(h, input);
|
||||
}
|
||||
}
|
||||
|
||||
static void tdec_iteration_16(srslte_tdec_t * h, int16_t * input)
|
||||
{
|
||||
// Select decoder if in auto mode
|
||||
if (h->dec_type == SRSLTE_TDEC_AUTO) {
|
||||
h->current_llr_type = SRSLTE_TDEC_16;
|
||||
h->current_dec = tdec_sb_idx(h->current_long_cb);
|
||||
} else {
|
||||
h->current_dec = 0;
|
||||
}
|
||||
h->current_inter_idx = interleaver_idx(h->nof_blocks16[h->current_dec]);
|
||||
|
||||
if (h->current_llr_type == SRSLTE_TDEC_8) {
|
||||
|
||||
h->current_inter_idx = interleaver_idx(h->nof_blocks8[h->current_dec]);
|
||||
|
||||
if (!h->n_iter) {
|
||||
convert_16_to_8(input, h->input_conv, 3*h->current_long_cb+12);
|
||||
}
|
||||
run_tdec_iteration_8bit(h, h->input_conv);
|
||||
} else {
|
||||
run_tdec_iteration_16bit(h, input);
|
||||
}
|
||||
}
|
||||
|
||||
/* Resets the decoder and sets the codeblock length */
|
||||
int srslte_tdec_new_cb(srslte_tdec_t * h, uint32_t long_cb)
|
||||
{
|
||||
if (long_cb > h->max_long_cb) {
|
||||
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
|
||||
h->max_long_cb);
|
||||
return -1;
|
||||
}
|
||||
|
||||
h->n_iter = 0;
|
||||
h->current_long_cb = long_cb;
|
||||
h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
|
||||
if (h->current_cbidx < 0) {
|
||||
fprintf(stderr, "Invalid CB length %d\n", long_cb);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void srslte_tdec_iteration(srslte_tdec_t * h, int16_t * input, uint8_t *output)
|
||||
{
|
||||
if (h->current_cbidx >= 0) {
|
||||
tdec_iteration_16(h, input);
|
||||
tdec_decision_byte(h, output);
|
||||
}
|
||||
}
|
||||
|
||||
/* Runs nof_iterations iterations and decides the output bits */
|
||||
int srslte_tdec_run_all(srslte_tdec_t * h, int16_t * input, uint8_t *output,
|
||||
uint32_t nof_iterations, uint32_t long_cb)
|
||||
{
|
||||
if (srslte_tdec_new_cb(h, long_cb)) {
|
||||
return SRSLTE_ERROR;
|
||||
}
|
||||
|
||||
do {
|
||||
tdec_iteration_16(h, input);
|
||||
} while (h->n_iter < nof_iterations);
|
||||
|
||||
tdec_decision_byte(h, output);
|
||||
|
||||
return SRSLTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
void srslte_tdec_iteration_8bit(srslte_tdec_t * h, int8_t * input, uint8_t *output)
|
||||
{
|
||||
if (h->current_cbidx >= 0) {
|
||||
tdec_iteration_8(h, input);
|
||||
tdec_decision_byte(h, output);
|
||||
}
|
||||
}
|
||||
|
||||
/* Runs nof_iterations iterations and decides the output bits */
|
||||
int srslte_tdec_run_all_8bit(srslte_tdec_t * h, int8_t * input, uint8_t *output,
|
||||
uint32_t nof_iterations, uint32_t long_cb)
|
||||
{
|
||||
if (srslte_tdec_new_cb(h, long_cb)) {
|
||||
return SRSLTE_ERROR;
|
||||
}
|
||||
|
||||
do {
|
||||
tdec_iteration_8(h, input);
|
||||
} while (h->n_iter < nof_iterations);
|
||||
|
||||
tdec_decision_byte(h, output);
|
||||
|
||||
return SRSLTE_SUCCESS;
|
||||
}
|
||||
|
||||
int srslte_tdec_get_nof_iterations(srslte_tdec_t * h)
|
||||
{
|
||||
return h->n_iter;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,475 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "srslte/phy/fec/turbodecoder_simd.h"
|
||||
#include "srslte/phy/utils/vector.h"
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#define NUMSTATES 8
|
||||
#define NINPUTS 2
|
||||
#define TAIL 3
|
||||
#define TOTALTAIL 12
|
||||
|
||||
#define INF 10000
|
||||
#define ZERO 0
|
||||
|
||||
|
||||
#ifdef LV_HAVE_AVX2
|
||||
|
||||
#include <smmintrin.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
|
||||
// Number of CB processed in parllel in AVX
|
||||
#define NCB 2
|
||||
|
||||
/*
|
||||
static void print_256i(__m256i x) {
|
||||
int16_t *s = (int16_t*) &x;
|
||||
printf("[%d", s[0]);
|
||||
for (int i=1;i<16;i++) {
|
||||
printf(",%d", s[i]);
|
||||
}
|
||||
printf("]\n");
|
||||
}
|
||||
*/
|
||||
|
||||
/* Computes the horizontal MAX from 8 16-bit integers using the minpos_epu16 SSE4.1 instruction */
|
||||
static inline int16_t hMax0(__m256i masked_value)
|
||||
{
|
||||
__m128i tmp1 = _mm256_extractf128_si256(masked_value, 0);
|
||||
__m128i tmp3 = _mm_minpos_epu16(tmp1);
|
||||
return (int16_t)(_mm_cvtsi128_si32(tmp3));
|
||||
}
|
||||
|
||||
static inline int16_t hMax1(__m256i masked_value)
|
||||
{
|
||||
__m128i tmp1 = _mm256_extractf128_si256(masked_value, 1);
|
||||
__m128i tmp3 = _mm_minpos_epu16(tmp1);
|
||||
return (int16_t)(_mm_cvtsi128_si32(tmp3));
|
||||
}
|
||||
|
||||
/* Computes beta values */
|
||||
void map_avx_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb)
|
||||
{
|
||||
int k;
|
||||
uint32_t end = long_cb + 3;
|
||||
const __m256i *alphaPtr = (const __m256i*) s->alpha;
|
||||
|
||||
__m256i beta_k = _mm256_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0, -INF, -INF, -INF, -INF, -INF, -INF, -INF, 0);
|
||||
__m256i g, bp, bn, alpha_k;
|
||||
|
||||
/* Define the shuffle constant for the positive beta */
|
||||
__m256i shuf_bp = _mm256_set_epi8(
|
||||
// 1st CB
|
||||
15+16, 14+16, // 7
|
||||
7+16, 6+16, // 3
|
||||
5+16, 4+16, // 2
|
||||
13+16, 12+16, // 6
|
||||
11+16, 10+16, // 5
|
||||
3+16, 2+16, // 1
|
||||
1+16, 0+16, // 0
|
||||
9+16, 8+16, // 4
|
||||
|
||||
// 2nd CB
|
||||
15, 14, // 7
|
||||
7, 6, // 3
|
||||
5, 4, // 2
|
||||
13, 12, // 6
|
||||
11, 10, // 5
|
||||
3, 2, // 1
|
||||
1, 0, // 0
|
||||
9, 8 // 4
|
||||
);
|
||||
|
||||
/* Define the shuffle constant for the negative beta */
|
||||
__m256i shuf_bn = _mm256_set_epi8(
|
||||
7+16, 6+16, // 3
|
||||
15+16, 14+16, // 7
|
||||
13+16, 12+16, // 6
|
||||
5+16, 4+16, // 2
|
||||
3+16, 2+16, // 1
|
||||
11+16, 10+16, // 5
|
||||
9+16, 8+16, // 4
|
||||
1+16, 0+16, // 0
|
||||
|
||||
7, 6, // 3
|
||||
15, 14, // 7
|
||||
13, 12, // 6
|
||||
5, 4, // 2
|
||||
3, 2, // 1
|
||||
11, 10, // 5
|
||||
9, 8, // 4
|
||||
1, 0 // 0
|
||||
);
|
||||
|
||||
alphaPtr += long_cb-1;
|
||||
|
||||
/* Define shuffle for branch costs */
|
||||
__m256i shuf_g[4];
|
||||
shuf_g[3] = _mm256_set_epi8(3+16,2+16,1+16,0+16,1+16,0+16,3+16,2+16,3+16,2+16,1+16,0+16,1+16,0+16,3+16,2+16,
|
||||
3,2,1,0,1,0,3,2,3,2,1,0,1,0,3,2);
|
||||
shuf_g[2] = _mm256_set_epi8(7+16,6+16,5+16,4+16,5+16,4+16,7+16,6+16,7+16,6+16,5+16,4+16,5+16,4+16,7+16,6+16,
|
||||
7,6,5,4,5,4,7,6,7,6,5,4,5,4,7,6);
|
||||
shuf_g[1] = _mm256_set_epi8(11+16,10+16,9+16,8+16,9+16,8+16,11+16,10+16,11+16,10+16,9+16,8+16,9+16,8+16,11+16,10+16,
|
||||
11,10,9,8,9,8,11,10,11,10,9,8,9,8,11,10);
|
||||
shuf_g[0] = _mm256_set_epi8(15+16,14+16,13+16,12+16,13+16,12+16,15+16,14+16,15+16,14+16,13+16,12+16,13+16,12+16,15+16,14+16,
|
||||
15,14,13,12,13,12,15,14,15,14,13,12,13,12,15,14);
|
||||
|
||||
/* Define shuffle for beta normalization */
|
||||
__m256i shuf_norm = _mm256_set_epi8(17,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0);
|
||||
|
||||
__m256i gv;
|
||||
int16_t *b = &s->branch[2*NCB*long_cb-16];
|
||||
__m256i *gPtr = (__m256i*) b;
|
||||
|
||||
/* This defines a beta computation step:
|
||||
* Adds and substracts the branch metrics to the previous beta step,
|
||||
* shuffles the states according to the trellis path and selects maximum state
|
||||
*/
|
||||
#define BETA_STEP(g) bp = _mm256_add_epi16(beta_k, g);\
|
||||
bn = _mm256_sub_epi16(beta_k, g);\
|
||||
bp = _mm256_shuffle_epi8(bp, shuf_bp);\
|
||||
bn = _mm256_shuffle_epi8(bn, shuf_bn);\
|
||||
beta_k = _mm256_max_epi16(bp, bn);
|
||||
|
||||
/* Loads the alpha metrics from memory and adds them to the temporal bn and bp
|
||||
* metrics. Then computes horizontal maximum of both metrics and computes difference
|
||||
*/
|
||||
#define BETA_STEP_CNT(c,d) g = _mm256_shuffle_epi8(gv, shuf_g[c]);\
|
||||
BETA_STEP(g)\
|
||||
alpha_k = _mm256_load_si256(alphaPtr);\
|
||||
alphaPtr--;\
|
||||
bp = _mm256_add_epi16(bp, alpha_k);\
|
||||
bn = _mm256_add_epi16(bn, alpha_k);\
|
||||
bn = _mm256_sub_epi16(_mm256_set1_epi16(0x7FFF), bn);\
|
||||
bp = _mm256_sub_epi16(_mm256_set1_epi16(0x7FFF), bp);\
|
||||
output[0][k-d] = hMax0(bn) - hMax0(bp);\
|
||||
output[1][k-d] = hMax1(bn) - hMax1(bp);
|
||||
|
||||
/* The tail does not require to load alpha or produce outputs. Only update
|
||||
* beta metrics accordingly */
|
||||
for (k=end-1; k>=long_cb; k--) {
|
||||
int16_t g0_1 = s->branch[2*NCB*k];
|
||||
int16_t g1_1 = s->branch[2*NCB*k+1];
|
||||
int16_t g0_2 = s->branch[2*NCB*k+6];
|
||||
int16_t g1_2 = s->branch[2*NCB*k+6+1];
|
||||
g = _mm256_set_epi16(g1_2, g0_2, g0_2, g1_2, g1_2, g0_2, g0_2, g1_2, g1_1, g0_1, g0_1, g1_1, g1_1, g0_1, g0_1, g1_1);
|
||||
BETA_STEP(g);
|
||||
}
|
||||
|
||||
/* We inline 2 trelis steps for each normalization */
|
||||
__m256i norm;
|
||||
for (; k >= 0; k-=8) {
|
||||
gv = _mm256_load_si256(gPtr);
|
||||
gPtr--;
|
||||
BETA_STEP_CNT(0,0);
|
||||
BETA_STEP_CNT(1,1);
|
||||
BETA_STEP_CNT(2,2);
|
||||
BETA_STEP_CNT(3,3);
|
||||
norm = _mm256_shuffle_epi8(beta_k, shuf_norm);
|
||||
beta_k = _mm256_sub_epi16(beta_k, norm);
|
||||
gv = _mm256_load_si256(gPtr);
|
||||
gPtr--;
|
||||
BETA_STEP_CNT(0,4);
|
||||
BETA_STEP_CNT(1,5);
|
||||
BETA_STEP_CNT(2,6);
|
||||
BETA_STEP_CNT(3,7);
|
||||
norm = _mm256_shuffle_epi8(beta_k, shuf_norm);
|
||||
beta_k = _mm256_sub_epi16(beta_k, norm);
|
||||
}
|
||||
}
|
||||
|
||||
/* Computes alpha metrics */
|
||||
void map_avx_alpha(map_gen_t * s, uint32_t long_cb)
|
||||
{
|
||||
uint32_t k;
|
||||
int16_t *alpha1 = s->alpha;
|
||||
int16_t *alpha2 = &s->alpha[8];
|
||||
uint32_t i;
|
||||
|
||||
alpha1[0] = 0;
|
||||
alpha2[0] = 0;
|
||||
for (i = 1; i < 8; i++) {
|
||||
alpha1[i] = -INF;
|
||||
alpha2[i] = -INF;
|
||||
}
|
||||
|
||||
/* Define the shuffle constant for the positive alpha */
|
||||
__m256i shuf_ap = _mm256_set_epi8(
|
||||
|
||||
// 1st CB
|
||||
31, 30, // 7
|
||||
25, 24, // 4
|
||||
23, 22, // 3
|
||||
17, 16, // 0
|
||||
29, 28, // 6
|
||||
27, 26, // 5
|
||||
21, 20, // 2
|
||||
19, 18, // 1
|
||||
|
||||
// 2nd CB
|
||||
15, 14, // 7
|
||||
9, 8, // 4
|
||||
7, 6, // 3
|
||||
1, 0, // 0
|
||||
13, 12, // 6
|
||||
11, 10, // 5
|
||||
5, 4, // 2
|
||||
3, 2 // 1
|
||||
);
|
||||
|
||||
/* Define the shuffle constant for the negative alpha */
|
||||
__m256i shuf_an = _mm256_set_epi8(
|
||||
|
||||
// 1nd CB
|
||||
29, 28, // 6
|
||||
27, 26, // 5
|
||||
21, 20, // 2
|
||||
19, 18, // 1
|
||||
31, 30, // 7
|
||||
25, 24, // 4
|
||||
23, 22, // 3
|
||||
17, 16, // 0
|
||||
|
||||
// 2nd CB
|
||||
13, 12, // 6
|
||||
11, 10, // 5
|
||||
5, 4, // 2
|
||||
3, 2, // 1
|
||||
15, 14, // 7
|
||||
9, 8, // 4
|
||||
7, 6, // 3
|
||||
1, 0 // 0
|
||||
);
|
||||
|
||||
/* Define shuffle for branch costs */
|
||||
__m256i shuf_g[4];
|
||||
shuf_g[0] = _mm256_set_epi8(3+16,2+16,3+16,2+16,1+16,0+16,1+16,0+16,1+16,0+16,1+16,0+16,3+16,2+16,3+16,2+16,
|
||||
3,2,3,2,1,0,1,0,1,0,1,0,3,2,3,2);
|
||||
shuf_g[1] = _mm256_set_epi8(7+16,6+16,7+16,6+16,5+16,4+16,5+16,4+16,5+16,4+16,5+16,4+16,7+16,6+16,7+16,6+16,
|
||||
7,6,7,6,5,4,5,4,5,4,5,4,7,6,7,6);
|
||||
shuf_g[2] = _mm256_set_epi8(11+16,10+16,11+16,10+16,9+16,8+16,9+16,8+16,9+16,8+16,9+16,8+16,11+16,10+16,11+16,10+16,
|
||||
11,10,11,10,9,8,9,8,9,8,9,8,11,10,11,10);
|
||||
shuf_g[3] = _mm256_set_epi8(15+16,14+16,15+16,14+16,13+16,12+16,13+16,12+16,13+16,12+16,13+16,12+16,15+16,14+16,15+16,14+16,
|
||||
15,14,15,14,13,12,13,12,13,12,13,12,15,14,15,14);
|
||||
|
||||
__m256i shuf_norm = _mm256_set_epi8(17,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0);
|
||||
|
||||
__m256i* alphaPtr = (__m256i*) s->alpha;
|
||||
alphaPtr++;
|
||||
|
||||
__m256i gv;
|
||||
__m256i *gPtr = (__m256i*) s->branch;
|
||||
__m256i g, ap, an;
|
||||
|
||||
__m256i alpha_k = _mm256_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0, -INF, -INF, -INF, -INF, -INF, -INF, -INF, 0);
|
||||
|
||||
/* This defines a alpha computation step:
|
||||
* Adds and substracts the branch metrics to the previous alpha step,
|
||||
* shuffles the states according to the trellis path and selects maximum state
|
||||
*/
|
||||
#define ALPHA_STEP(c) g = _mm256_shuffle_epi8(gv, shuf_g[c]); \
|
||||
ap = _mm256_add_epi16(alpha_k, g);\
|
||||
an = _mm256_sub_epi16(alpha_k, g);\
|
||||
ap = _mm256_shuffle_epi8(ap, shuf_ap);\
|
||||
an = _mm256_shuffle_epi8(an, shuf_an);\
|
||||
alpha_k = _mm256_max_epi16(ap, an);\
|
||||
_mm256_store_si256(alphaPtr, alpha_k);\
|
||||
alphaPtr++;\
|
||||
|
||||
|
||||
/* In this loop, we compute 8 steps and normalize twice for each branch metrics memory load */
|
||||
__m256i norm;
|
||||
for (k = 0; k < long_cb/8; k++) {
|
||||
gv = _mm256_load_si256(gPtr);
|
||||
|
||||
gPtr++;
|
||||
ALPHA_STEP(0);
|
||||
ALPHA_STEP(1);
|
||||
ALPHA_STEP(2);
|
||||
ALPHA_STEP(3);
|
||||
norm = _mm256_shuffle_epi8(alpha_k, shuf_norm);
|
||||
alpha_k = _mm256_sub_epi16(alpha_k, norm);
|
||||
gv = _mm256_load_si256(gPtr);
|
||||
gPtr++;
|
||||
ALPHA_STEP(0);
|
||||
ALPHA_STEP(1);
|
||||
ALPHA_STEP(2);
|
||||
ALPHA_STEP(3);
|
||||
norm = _mm256_shuffle_epi8(alpha_k, shuf_norm);
|
||||
alpha_k = _mm256_sub_epi16(alpha_k, norm);
|
||||
}
|
||||
}
|
||||
|
||||
void map_sse_gamma_single(int16_t *output, int16_t *input, int16_t *app, int16_t *parity)
|
||||
{
|
||||
__m128i res00, res10, res01, res11, res0, res1;
|
||||
__m128i in, ap, pa, g1, g0;
|
||||
|
||||
__m128i *inPtr = (__m128i*) input;
|
||||
__m128i *appPtr = (__m128i*) app;
|
||||
__m128i *paPtr = (__m128i*) parity;
|
||||
__m128i *resPtr = (__m128i*) output;
|
||||
|
||||
__m128i res00_mask = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0);
|
||||
__m128i res10_mask = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8);
|
||||
__m128i res01_mask = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff);
|
||||
__m128i res11_mask = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff);
|
||||
|
||||
in = _mm_load_si128(inPtr);
|
||||
inPtr++;
|
||||
pa = _mm_load_si128(paPtr);
|
||||
paPtr++;
|
||||
|
||||
if (appPtr) {
|
||||
ap = _mm_load_si128(appPtr);
|
||||
appPtr++;
|
||||
in = _mm_add_epi16(ap, in);
|
||||
}
|
||||
|
||||
g1 = _mm_add_epi16(in, pa);
|
||||
g0 = _mm_sub_epi16(in, pa);
|
||||
|
||||
g1 = _mm_srai_epi16(g1, 1);
|
||||
g0 = _mm_srai_epi16(g0, 1);
|
||||
|
||||
res00 = _mm_shuffle_epi8(g0, res00_mask);
|
||||
res10 = _mm_shuffle_epi8(g0, res10_mask);
|
||||
res01 = _mm_shuffle_epi8(g1, res01_mask);
|
||||
res11 = _mm_shuffle_epi8(g1, res11_mask);
|
||||
|
||||
res0 = _mm_or_si128(res00, res01);
|
||||
res1 = _mm_or_si128(res10, res11);
|
||||
|
||||
_mm_store_si128(resPtr, res0);
|
||||
resPtr++;
|
||||
_mm_store_si128(resPtr, res1);
|
||||
resPtr++;
|
||||
}
|
||||
|
||||
|
||||
/* Compute branch metrics (gamma) */
|
||||
void map_avx_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t cbidx, uint32_t long_cb)
|
||||
{
|
||||
__m128i res10, res20, res11, res21, res1, res2;
|
||||
__m256i in, ap, pa, g1, g0;
|
||||
|
||||
__m256i *inPtr = (__m256i*) input;
|
||||
__m256i *appPtr = (__m256i*) app;
|
||||
__m256i *paPtr = (__m256i*) parity;
|
||||
__m128i *resPtr = (__m128i*) h->branch;
|
||||
|
||||
if (cbidx) {
|
||||
resPtr++;
|
||||
}
|
||||
|
||||
__m128i res10_mask = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0);
|
||||
__m128i res11_mask = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff);
|
||||
|
||||
__m128i res20_mask = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8);
|
||||
__m128i res21_mask = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff);
|
||||
|
||||
for (int i=0;i<long_cb/16;i++) {
|
||||
in = _mm256_load_si256(inPtr);
|
||||
inPtr++;
|
||||
pa = _mm256_load_si256(paPtr);
|
||||
paPtr++;
|
||||
|
||||
if (appPtr) {
|
||||
ap = _mm256_load_si256(appPtr);
|
||||
appPtr++;
|
||||
in = _mm256_add_epi16(ap, in);
|
||||
}
|
||||
|
||||
g0 = _mm256_sub_epi16(in, pa);
|
||||
g1 = _mm256_add_epi16(in, pa);
|
||||
|
||||
g0 = _mm256_srai_epi16(g0, 1);
|
||||
g1 = _mm256_srai_epi16(g1, 1);
|
||||
|
||||
__m128i g0_t = _mm256_extractf128_si256(g0, 0);
|
||||
__m128i g1_t = _mm256_extractf128_si256(g1, 0);
|
||||
|
||||
res10 = _mm_shuffle_epi8(g0_t, res10_mask);
|
||||
res11 = _mm_shuffle_epi8(g1_t, res11_mask);
|
||||
|
||||
res20 = _mm_shuffle_epi8(g0_t, res20_mask);
|
||||
res21 = _mm_shuffle_epi8(g1_t, res21_mask);
|
||||
|
||||
res1 = _mm_or_si128(res10, res11);
|
||||
res2 = _mm_or_si128(res20, res21);
|
||||
|
||||
_mm_store_si128(resPtr, res1);
|
||||
resPtr++;
|
||||
resPtr++;
|
||||
_mm_store_si128(resPtr, res2);
|
||||
resPtr++;
|
||||
resPtr++;
|
||||
|
||||
g0_t = _mm256_extractf128_si256(g0, 1);
|
||||
g1_t = _mm256_extractf128_si256(g1, 1);
|
||||
|
||||
res10 = _mm_shuffle_epi8(g0_t, res10_mask);
|
||||
res11 = _mm_shuffle_epi8(g1_t, res11_mask);
|
||||
|
||||
res20 = _mm_shuffle_epi8(g0_t, res20_mask);
|
||||
res21 = _mm_shuffle_epi8(g1_t, res21_mask);
|
||||
|
||||
res1 = _mm_or_si128(res10, res11);
|
||||
res2 = _mm_or_si128(res20, res21);
|
||||
|
||||
_mm_store_si128(resPtr, res1);
|
||||
resPtr++;
|
||||
resPtr++;
|
||||
_mm_store_si128(resPtr, res2);
|
||||
resPtr++;
|
||||
resPtr++;
|
||||
|
||||
}
|
||||
|
||||
if (long_cb%16) {
|
||||
map_sse_gamma_single((int16_t*) resPtr, (int16_t*) inPtr, (int16_t*) appPtr, (int16_t*) paPtr);
|
||||
}
|
||||
|
||||
for (int i=long_cb;i<long_cb+3;i++) {
|
||||
h->branch[2*i*NCB+cbidx*6] = (input[i] - parity[i])/2;
|
||||
h->branch[2*i*NCB+cbidx*6+1] = (input[i] + parity[i])/2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -1,400 +1,273 @@
|
|||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "srslte/phy/fec/turbodecoder_gen.h"
|
||||
#include "srslte/phy/utils/vector.h"
|
||||
|
||||
#define NUMSTATES 8
|
||||
#define NINPUTS 2
|
||||
#define TAIL 3
|
||||
#define TOTALTAIL 12
|
||||
|
||||
#define INF 9e4
|
||||
#define ZERO 9e-4
|
||||
|
||||
/************************************************
|
||||
*
|
||||
* MAP_GEN is the MAX-LOG-MAP generic implementation of the
|
||||
* Decoder
|
||||
*
|
||||
************************************************/
|
||||
static void map_gen_beta(srslte_map_gen_vl_t * s, float * input, float * parity,
|
||||
uint32_t long_cb)
|
||||
{
|
||||
float m_b[8], new[8], old[8];
|
||||
float x, y, xy;
|
||||
int k;
|
||||
uint32_t end = long_cb + SRSLTE_TCOD_RATE;
|
||||
float *beta = s->beta;
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
old[i] = beta[8 * (end) + i];
|
||||
}
|
||||
|
||||
for (k = end - 1; k >= 0; k--) {
|
||||
x = input[k];
|
||||
y = parity[k];
|
||||
|
||||
xy = x + y;
|
||||
|
||||
m_b[0] = old[4] + xy;
|
||||
m_b[1] = old[4];
|
||||
m_b[2] = old[5] + y;
|
||||
m_b[3] = old[5] + x;
|
||||
m_b[4] = old[6] + x;
|
||||
m_b[5] = old[6] + y;
|
||||
m_b[6] = old[7];
|
||||
m_b[7] = old[7] + xy;
|
||||
|
||||
new[0] = old[0];
|
||||
new[1] = old[0] + xy;
|
||||
new[2] = old[1] + x;
|
||||
new[3] = old[1] + y;
|
||||
new[4] = old[2] + y;
|
||||
new[5] = old[2] + x;
|
||||
new[6] = old[3] + xy;
|
||||
new[7] = old[3];
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (m_b[i] > new[i])
|
||||
new[i] = m_b[i];
|
||||
old[i] = new[i];
|
||||
beta[8 * k + i] = old[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void map_gen_alpha(srslte_map_gen_vl_t * s, float * input, float * parity, float * output,
|
||||
uint32_t long_cb)
|
||||
{
|
||||
float m_b[8], new[8], old[8], max1[8], max0[8];
|
||||
float m1, m0;
|
||||
float x, y, xy;
|
||||
float out;
|
||||
uint32_t k;
|
||||
uint32_t end = long_cb;
|
||||
float *beta = s->beta;
|
||||
uint32_t i;
|
||||
|
||||
old[0] = 0;
|
||||
for (i = 1; i < 8; i++) {
|
||||
old[i] = -INF;
|
||||
}
|
||||
|
||||
for (k = 1; k < end + 1; k++) {
|
||||
x = input[k - 1];
|
||||
y = parity[k - 1];
|
||||
|
||||
xy = x + y;
|
||||
|
||||
m_b[0] = old[0];
|
||||
m_b[1] = old[3] + y;
|
||||
m_b[2] = old[4] + y;
|
||||
m_b[3] = old[7];
|
||||
m_b[4] = old[1];
|
||||
m_b[5] = old[2] + y;
|
||||
m_b[6] = old[5] + y;
|
||||
m_b[7] = old[6];
|
||||
|
||||
new[0] = old[1] + xy;
|
||||
new[1] = old[2] + x;
|
||||
new[2] = old[5] + x;
|
||||
new[3] = old[6] + xy;
|
||||
new[4] = old[0] + xy;
|
||||
new[5] = old[3] + x;
|
||||
new[6] = old[4] + x;
|
||||
new[7] = old[7] + xy;
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
max0[i] = m_b[i] + beta[8 * k + i];
|
||||
max1[i] = new[i] + beta[8 * k + i];
|
||||
}
|
||||
|
||||
m1 = max1[0];
|
||||
m0 = max0[0];
|
||||
|
||||
for (i = 1; i < 8; i++) {
|
||||
if (max1[i] > m1)
|
||||
m1 = max1[i];
|
||||
if (max0[i] > m0)
|
||||
m0 = max0[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (m_b[i] > new[i])
|
||||
new[i] = m_b[i];
|
||||
old[i] = new[i];
|
||||
}
|
||||
|
||||
out = m1 - m0;
|
||||
output[k - 1] = out;
|
||||
}
|
||||
}
|
||||
|
||||
static int map_gen_init(srslte_map_gen_vl_t * h, int max_long_cb)
|
||||
{
|
||||
bzero(h, sizeof(srslte_map_gen_vl_t));
|
||||
h->beta = srslte_vec_malloc(sizeof(float) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
|
||||
if (!h->beta) {
|
||||
perror("srslte_vec_malloc");
|
||||
return -1;
|
||||
}
|
||||
h->max_long_cb = max_long_cb;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void map_gen_free(srslte_map_gen_vl_t * h)
|
||||
{
|
||||
if (h->beta) {
|
||||
free(h->beta);
|
||||
}
|
||||
bzero(h, sizeof(srslte_map_gen_vl_t));
|
||||
}
|
||||
|
||||
static void map_gen_dec(srslte_map_gen_vl_t * h, float * input, float * parity, float * output,
|
||||
uint32_t long_cb)
|
||||
{
|
||||
uint32_t k;
|
||||
|
||||
h->beta[(long_cb + TAIL) * NUMSTATES] = 0;
|
||||
for (k = 1; k < NUMSTATES; k++)
|
||||
h->beta[(long_cb + TAIL) * NUMSTATES + k] = -INF;
|
||||
|
||||
map_gen_beta(h, input, parity, long_cb);
|
||||
map_gen_alpha(h, input, parity, output, long_cb);
|
||||
}
|
||||
|
||||
/************************************************
|
||||
*
|
||||
* TURBO DECODER INTERFACE
|
||||
*
|
||||
************************************************/
|
||||
int srslte_tdec_gen_init(srslte_tdec_gen_t * h, uint32_t max_long_cb)
|
||||
{
|
||||
int ret = -1;
|
||||
bzero(h, sizeof(srslte_tdec_gen_t));
|
||||
uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL;
|
||||
|
||||
h->max_long_cb = max_long_cb;
|
||||
|
||||
h->llr1 = srslte_vec_malloc(sizeof(float) * len);
|
||||
if (!h->llr1) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->llr2 = srslte_vec_malloc(sizeof(float) * len);
|
||||
if (!h->llr2) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->w = srslte_vec_malloc(sizeof(float) * len);
|
||||
if (!h->w) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->syst = srslte_vec_malloc(sizeof(float) * len);
|
||||
if (!h->syst) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->parity = srslte_vec_malloc(sizeof(float) * len);
|
||||
if (!h->parity) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
|
||||
if (map_gen_init(&h->dec, h->max_long_cb)) {
|
||||
goto clean_and_exit;
|
||||
}
|
||||
|
||||
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
|
||||
if (srslte_tc_interl_init(&h->interleaver[i], srslte_cbsegm_cbsize(i)) < 0) {
|
||||
goto clean_and_exit;
|
||||
}
|
||||
srslte_tc_interl_LTE_gen(&h->interleaver[i], srslte_cbsegm_cbsize(i));
|
||||
}
|
||||
h->current_cbidx = -1;
|
||||
ret = 0;
|
||||
clean_and_exit:if (ret == -1) {
|
||||
srslte_tdec_gen_free(h);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void srslte_tdec_gen_free(srslte_tdec_gen_t * h)
|
||||
{
|
||||
if (h->llr1) {
|
||||
free(h->llr1);
|
||||
}
|
||||
if (h->llr2) {
|
||||
free(h->llr2);
|
||||
}
|
||||
if (h->w) {
|
||||
free(h->w);
|
||||
}
|
||||
if (h->syst) {
|
||||
free(h->syst);
|
||||
}
|
||||
if (h->parity) {
|
||||
free(h->parity);
|
||||
}
|
||||
|
||||
map_gen_free(&h->dec);
|
||||
|
||||
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
|
||||
srslte_tc_interl_free(&h->interleaver[i]);
|
||||
}
|
||||
|
||||
bzero(h, sizeof(srslte_tdec_gen_t));
|
||||
}
|
||||
|
||||
void srslte_tdec_gen_iteration(srslte_tdec_gen_t * h, float * input, uint32_t long_cb)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
if (h->current_cbidx >= 0) {
|
||||
|
||||
uint16_t *inter = h->interleaver[h->current_cbidx].forward;
|
||||
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
|
||||
|
||||
// Prepare systematic and parity bits for MAP DEC #1
|
||||
for (i = 0; i < long_cb; i++) {
|
||||
h->syst[i] = input[SRSLTE_TCOD_RATE * i] + h->w[i];
|
||||
h->parity[i] = input[SRSLTE_TCOD_RATE * i + 1];
|
||||
}
|
||||
for (i = long_cb; i < long_cb + SRSLTE_TCOD_RATE; i++) {
|
||||
h->syst[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb)];
|
||||
h->parity[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb) + 1];
|
||||
}
|
||||
|
||||
// Run MAP DEC #1
|
||||
map_gen_dec(&h->dec, h->syst, h->parity, h->llr1, long_cb);
|
||||
|
||||
// Prepare systematic and parity bits for MAP DEC #1
|
||||
for (i = 0; i < long_cb; i++) {
|
||||
h->syst[i] = h->llr1[inter[i]]
|
||||
- h->w[inter[i]];
|
||||
h->parity[i] = input[SRSLTE_TCOD_RATE * i + 2];
|
||||
}
|
||||
for (i = long_cb; i < long_cb + SRSLTE_TCOD_RATE; i++) {
|
||||
h->syst[i] =
|
||||
input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE + NINPUTS * (i - long_cb)];
|
||||
h->parity[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE
|
||||
+ NINPUTS * (i - long_cb) + 1];
|
||||
}
|
||||
|
||||
// Run MAP DEC #2
|
||||
map_gen_dec(&h->dec, h->syst, h->parity, h->llr2, long_cb);
|
||||
|
||||
//printf("llr2=");
|
||||
//srslte_vec_fprint_f(stdout, h->llr2, long_cb);
|
||||
|
||||
|
||||
// Update a-priori LLR from the last iteration
|
||||
for (i = 0; i < long_cb; i++) {
|
||||
h->w[i] += h->llr2[deinter[i]] - h->llr1[i];
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Error CB index not set (call srslte_tdec_gen_reset() first\n");
|
||||
}
|
||||
|
||||
// Increase number of iterations
|
||||
h->n_iter++;
|
||||
}
|
||||
|
||||
int srslte_tdec_gen_reset(srslte_tdec_gen_t * h, uint32_t long_cb)
|
||||
{
|
||||
if (long_cb > h->max_long_cb) {
|
||||
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
|
||||
h->max_long_cb);
|
||||
return -1;
|
||||
}
|
||||
memset(h->w, 0, sizeof(float) * long_cb);
|
||||
h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
|
||||
h->current_cb_len = long_cb;
|
||||
if (h->current_cbidx < 0) {
|
||||
fprintf(stderr, "Invalid CB length %d\n", long_cb);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void srslte_tdec_gen_decision(srslte_tdec_gen_t * h, uint8_t *output, uint32_t long_cb)
|
||||
{
|
||||
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
|
||||
uint32_t i;
|
||||
for (i = 0; i < long_cb; i++) {
|
||||
output[i] = (h->llr2[deinter[i]] > 0) ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
void srslte_tdec_gen_decision_byte(srslte_tdec_gen_t * h, uint8_t *output, uint32_t long_cb)
|
||||
{
|
||||
uint32_t i;
|
||||
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
|
||||
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
|
||||
|
||||
// long_cb is always byte aligned
|
||||
for (i = 0; i < long_cb/8; i++) {
|
||||
uint8_t out0 = h->llr2[deinter[8*i+0]]>0?mask[0]:0;
|
||||
uint8_t out1 = h->llr2[deinter[8*i+1]]>0?mask[1]:0;
|
||||
uint8_t out2 = h->llr2[deinter[8*i+2]]>0?mask[2]:0;
|
||||
uint8_t out3 = h->llr2[deinter[8*i+3]]>0?mask[3]:0;
|
||||
uint8_t out4 = h->llr2[deinter[8*i+4]]>0?mask[4]:0;
|
||||
uint8_t out5 = h->llr2[deinter[8*i+5]]>0?mask[5]:0;
|
||||
uint8_t out6 = h->llr2[deinter[8*i+6]]>0?mask[6]:0;
|
||||
uint8_t out7 = h->llr2[deinter[8*i+7]]>0?mask[7]:0;
|
||||
|
||||
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
|
||||
}
|
||||
}
|
||||
|
||||
int srslte_tdec_gen_run_all(srslte_tdec_gen_t * h, float * input, uint8_t *output,
|
||||
uint32_t nof_iterations, uint32_t long_cb)
|
||||
{
|
||||
uint32_t iter = 0;
|
||||
|
||||
if (srslte_tdec_gen_reset(h, long_cb)) {
|
||||
return SRSLTE_ERROR;
|
||||
}
|
||||
|
||||
do {
|
||||
srslte_tdec_gen_iteration(h, input, long_cb);
|
||||
iter++;
|
||||
} while (iter < nof_iterations);
|
||||
|
||||
srslte_tdec_gen_decision_byte(h, output, long_cb);
|
||||
|
||||
return SRSLTE_SUCCESS;
|
||||
}
|
||||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "srslte/phy/fec/turbodecoder_gen.h"
|
||||
#include "srslte/phy/utils/vector.h"
|
||||
|
||||
#define NUMSTATES 8
|
||||
#define NINPUTS 2
|
||||
#define TAIL 3
|
||||
#define TOTALTAIL 12
|
||||
|
||||
#define INF 10000
|
||||
|
||||
#define debug_enabled 0
|
||||
|
||||
#if debug_enabled
|
||||
#define debug_state printf("k=%5d, in=%5d, pa=%3d, out=%5d, alpha=", k, x, parity[k-1], out); srslte_vec_fprint_s(stdout, alpha, 8); \
|
||||
printf(", beta="); srslte_vec_fprint_s(stdout, &beta[8*(k)], 8); printf("\n");
|
||||
#else
|
||||
#define debug_state
|
||||
#endif
|
||||
|
||||
/************************************************
|
||||
*
|
||||
* MAP_GEN is the MAX-LOG-MAP generic implementation of the
|
||||
* Decoder
|
||||
*
|
||||
************************************************/
|
||||
static void map_gen_beta(tdec_gen_t *s, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb)
|
||||
{
|
||||
int16_t m_b[8], new[8], old[8];
|
||||
int16_t x, y, xy;
|
||||
int k;
|
||||
uint32_t end = long_cb + SRSLTE_TCOD_RATE;
|
||||
int16_t *beta = s->beta;
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
old[i] = beta[8 * (end) + i];
|
||||
}
|
||||
|
||||
for (k = end - 1; k >= 0; k--) {
|
||||
x = input[k];
|
||||
if (app && k<long_cb) {
|
||||
x += app[k];
|
||||
}
|
||||
y = parity[k];
|
||||
|
||||
xy = x + y;
|
||||
|
||||
m_b[0] = old[4] + xy;
|
||||
m_b[1] = old[4];
|
||||
m_b[2] = old[5] + y;
|
||||
m_b[3] = old[5] + x;
|
||||
m_b[4] = old[6] + x;
|
||||
m_b[5] = old[6] + y;
|
||||
m_b[6] = old[7];
|
||||
m_b[7] = old[7] + xy;
|
||||
|
||||
new[0] = old[0];
|
||||
new[1] = old[0] + xy;
|
||||
new[2] = old[1] + x;
|
||||
new[3] = old[1] + y;
|
||||
new[4] = old[2] + y;
|
||||
new[5] = old[2] + x;
|
||||
new[6] = old[3] + xy;
|
||||
new[7] = old[3];
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (m_b[i] > new[i])
|
||||
new[i] = m_b[i];
|
||||
old[i] = new[i];
|
||||
beta[8 * k + i] = old[i];
|
||||
}
|
||||
|
||||
if ((k%4)==0 && k < long_cb) {
|
||||
for (i = 1; i < 8; i++) {
|
||||
old[i] -= old[0];
|
||||
}
|
||||
old[0] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void map_gen_alpha(tdec_gen_t *s, int16_t *input, int16_t *app, int16_t *parity, int16_t *output, uint32_t long_cb)
|
||||
{
|
||||
int16_t m_b[8], new[8], old[8], max1[8], max0[8];
|
||||
int16_t m1, m0;
|
||||
int16_t x, y, xy;
|
||||
int16_t out;
|
||||
uint32_t k;
|
||||
uint32_t end = long_cb;
|
||||
int16_t *beta = s->beta;
|
||||
uint32_t i;
|
||||
|
||||
old[0] = 0;
|
||||
for (i = 1; i < 8; i++) {
|
||||
old[i] = -INF;
|
||||
}
|
||||
|
||||
#if debug_enabled
|
||||
int16_t alpha[8];
|
||||
#endif
|
||||
|
||||
for (k = 1; k < end + 1; k++) {
|
||||
x = input[k - 1];
|
||||
if (app) {
|
||||
x += app[k - 1];
|
||||
}
|
||||
y = parity[k - 1];
|
||||
|
||||
xy = x + y;
|
||||
|
||||
#if debug_enabled
|
||||
memcpy(alpha, old, sizeof(int16_t)*8);
|
||||
#endif
|
||||
|
||||
m_b[0] = old[0];
|
||||
m_b[1] = old[3] + y;
|
||||
m_b[2] = old[4] + y;
|
||||
m_b[3] = old[7];
|
||||
m_b[4] = old[1];
|
||||
m_b[5] = old[2] + y;
|
||||
m_b[6] = old[5] + y;
|
||||
m_b[7] = old[6];
|
||||
|
||||
new[0] = old[1] + xy;
|
||||
new[1] = old[2] + x;
|
||||
new[2] = old[5] + x;
|
||||
new[3] = old[6] + xy;
|
||||
new[4] = old[0] + xy;
|
||||
new[5] = old[3] + x;
|
||||
new[6] = old[4] + x;
|
||||
new[7] = old[7] + xy;
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
max0[i] = m_b[i] + beta[8 * k + i];
|
||||
max1[i] = new[i] + beta[8 * k + i];
|
||||
}
|
||||
|
||||
m1 = max1[0];
|
||||
m0 = max0[0];
|
||||
|
||||
for (i = 1; i < 8; i++) {
|
||||
if (max1[i] > m1)
|
||||
m1 = max1[i];
|
||||
if (max0[i] > m0)
|
||||
m0 = max0[i];
|
||||
}
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (m_b[i] > new[i])
|
||||
new[i] = m_b[i];
|
||||
old[i] = new[i]; }
|
||||
|
||||
if ((k%4)==0) {
|
||||
for (i = 1; i < 8; i++) {
|
||||
old[i] -= old[0];
|
||||
}
|
||||
old[0] = 0;
|
||||
}
|
||||
|
||||
out = m1 - m0;
|
||||
output[k - 1] = out;
|
||||
|
||||
debug_state;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int tdec_gen_init(void **hh, uint32_t max_long_cb)
|
||||
{
|
||||
*hh = calloc(1, sizeof(tdec_gen_t));
|
||||
|
||||
tdec_gen_t *h = (tdec_gen_t*) *hh;
|
||||
|
||||
h->beta = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
|
||||
if (!h->beta) {
|
||||
perror("srslte_vec_malloc");
|
||||
return -1;
|
||||
}
|
||||
h->max_long_cb = max_long_cb;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void tdec_gen_free(void *hh)
|
||||
{
|
||||
tdec_gen_t *h = (tdec_gen_t*) hh;
|
||||
if (h->beta) {
|
||||
free(h->beta);
|
||||
}
|
||||
bzero(h, sizeof(tdec_gen_t));
|
||||
}
|
||||
|
||||
void tdec_gen_dec(void *hh, int16_t *input, int16_t *app, int16_t *parity, int16_t *output, uint32_t long_cb)
|
||||
{
|
||||
tdec_gen_t *h = (tdec_gen_t*) hh;
|
||||
|
||||
h->beta[(long_cb + TAIL) * NUMSTATES] = 0;
|
||||
for (uint32_t k = 1; k < NUMSTATES; k++)
|
||||
h->beta[(long_cb + TAIL) * NUMSTATES + k] = -INF;
|
||||
|
||||
map_gen_beta(h, input, app, parity, long_cb);
|
||||
map_gen_alpha(h, input, app, parity, output, long_cb);
|
||||
}
|
||||
|
||||
void tdec_gen_extract_input(int16_t *input, int16_t *syst, int16_t *app2, int16_t *parity0, int16_t *parity1, uint32_t long_cb)
|
||||
{
|
||||
// Prepare systematic and parity bits for MAP DEC #1
|
||||
for (uint32_t i = 0; i < long_cb; i++) {
|
||||
syst[i] = input[SRSLTE_TCOD_RATE * i];
|
||||
parity0[i] = input[SRSLTE_TCOD_RATE * i + 1];
|
||||
parity1[i] = input[SRSLTE_TCOD_RATE * i + 2];
|
||||
}
|
||||
for (uint32_t i = long_cb; i < long_cb + SRSLTE_TCOD_RATE; i++) {
|
||||
syst[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb)];
|
||||
parity0[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * (i - long_cb) + 1];
|
||||
|
||||
app2[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE + NINPUTS * (i - long_cb)];
|
||||
parity1[i] = input[SRSLTE_TCOD_RATE * long_cb + NINPUTS * SRSLTE_TCOD_RATE
|
||||
+ NINPUTS * (i - long_cb) + 1];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void tdec_gen_decision_byte(int16_t *app1, uint8_t *output, uint32_t long_cb)
|
||||
{
|
||||
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
|
||||
|
||||
// long_cb is always byte aligned
|
||||
for (uint32_t i = 0; i < long_cb/8; i++) {
|
||||
uint8_t out0 = app1[8*i+0]>0?mask[0]:0;
|
||||
uint8_t out1 = app1[8*i+1]>0?mask[1]:0;
|
||||
uint8_t out2 = app1[8*i+2]>0?mask[2]:0;
|
||||
uint8_t out3 = app1[8*i+3]>0?mask[3]:0;
|
||||
uint8_t out4 = app1[8*i+4]>0?mask[4]:0;
|
||||
uint8_t out5 = app1[8*i+5]>0?mask[5]:0;
|
||||
uint8_t out6 = app1[8*i+6]>0?mask[6]:0;
|
||||
uint8_t out7 = app1[8*i+7]>0?mask[7]:0;
|
||||
|
||||
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,542 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "srslte/phy/fec/turbodecoder_simd.h"
|
||||
#include "srslte/phy/utils/vector.h"
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
#define NUMSTATES 8
|
||||
#define NINPUTS 2
|
||||
#define TAIL 3
|
||||
#define TOTALTAIL 12
|
||||
|
||||
#define INF 10000
|
||||
#define ZERO 0
|
||||
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <smmintrin.h>
|
||||
|
||||
// Define SSE/AVX implementations
|
||||
void map_sse_beta(map_gen_t * s, int16_t * output, uint32_t long_cb);
|
||||
void map_sse_alpha(map_gen_t * s, uint32_t long_cb);
|
||||
void map_sse_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb);
|
||||
|
||||
#ifdef LV_HAVE_AVX2
|
||||
void map_avx_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb);
|
||||
void map_avx_alpha(map_gen_t * s, uint32_t long_cb);
|
||||
void map_avx_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t cbidx, uint32_t long_cb);
|
||||
#endif
|
||||
|
||||
|
||||
void map_simd_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb)
|
||||
{
|
||||
if (nof_cb == 1) {
|
||||
map_sse_beta(s, output[0], long_cb);
|
||||
}
|
||||
#ifdef LV_HAVE_AVX2
|
||||
else if (nof_cb == 2) {
|
||||
map_avx_beta(s, output, long_cb);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void map_simd_alpha(map_gen_t * s, uint32_t nof_cb, uint32_t long_cb)
|
||||
{
|
||||
if (nof_cb == 1) {
|
||||
map_sse_alpha(s, long_cb);
|
||||
}
|
||||
#ifdef LV_HAVE_AVX2
|
||||
else if (nof_cb == 2) {
|
||||
map_avx_alpha(s, long_cb);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
void map_simd_gamma(map_gen_t * s, int16_t *input, int16_t *app, int16_t *parity, uint32_t cbidx, uint32_t nof_cb, uint32_t long_cb)
|
||||
{
|
||||
if (nof_cb == 1) {
|
||||
map_sse_gamma(s, input, app, parity, long_cb);
|
||||
}
|
||||
#ifdef LV_HAVE_AVX2
|
||||
else if (nof_cb == 2) {
|
||||
map_avx_gamma(s, input, app, parity, cbidx, long_cb);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Inititalizes constituent decoder object */
|
||||
int map_simd_init(map_gen_t * h, uint32_t max_par_cb, uint32_t max_long_cb)
|
||||
{
|
||||
bzero(h, sizeof(map_gen_t));
|
||||
|
||||
h->max_par_cb = max_par_cb;
|
||||
h->max_long_cb = max_long_cb;
|
||||
|
||||
h->alpha = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES * h->max_par_cb);
|
||||
if (!h->alpha) {
|
||||
perror("srslte_vec_malloc");
|
||||
return -1;
|
||||
}
|
||||
h->branch = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES * h->max_par_cb);
|
||||
if (!h->branch) {
|
||||
perror("srslte_vec_malloc");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void map_simd_free(map_gen_t * h)
|
||||
{
|
||||
if (h->alpha) {
|
||||
free(h->alpha);
|
||||
}
|
||||
if (h->branch) {
|
||||
free(h->branch);
|
||||
}
|
||||
bzero(h, sizeof(map_gen_t));
|
||||
}
|
||||
|
||||
/* Runs one instance of a decoder */
|
||||
void map_simd_dec(map_gen_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR], int16_t *app[SRSLTE_TDEC_MAX_NPAR], int16_t * parity[SRSLTE_TDEC_MAX_NPAR],
|
||||
int16_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t cb_mask, uint32_t long_cb)
|
||||
{
|
||||
|
||||
uint32_t nof_cb = 1;
|
||||
int16_t *outptr[SRSLTE_TDEC_MAX_NPAR] = { NULL, NULL };
|
||||
|
||||
// Compute branch metrics
|
||||
switch(cb_mask) {
|
||||
case 1:
|
||||
nof_cb = 1;
|
||||
outptr[0] = output[0];
|
||||
map_simd_gamma(h, input[0], app?app[0]:NULL, parity[0], 0, 1, long_cb);
|
||||
break;
|
||||
case 2:
|
||||
nof_cb = 1;
|
||||
outptr[0] = output[1];
|
||||
map_simd_gamma(h, input[1], app?app[1]:NULL, parity[1], 0, 1, long_cb);
|
||||
break;
|
||||
case 3:
|
||||
nof_cb = 2;
|
||||
for (int i=0;i<2;i++) {
|
||||
outptr[i] = output[i];
|
||||
map_simd_gamma(h, input[i], app?app[i]:NULL, parity[i], i, 2, long_cb);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Forward recursion
|
||||
map_simd_alpha(h, nof_cb, long_cb);
|
||||
|
||||
// Backwards recursion + LLR computation
|
||||
map_simd_beta(h, outptr, nof_cb, long_cb);
|
||||
}
|
||||
|
||||
/* Initializes the turbo decoder object */
|
||||
int srslte_tdec_simd_init(srslte_tdec_simd_t * h, uint32_t max_par_cb, uint32_t max_long_cb)
|
||||
{
|
||||
int ret = -1;
|
||||
bzero(h, sizeof(srslte_tdec_simd_t));
|
||||
uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL;
|
||||
|
||||
h->max_long_cb = max_long_cb;
|
||||
h->max_par_cb = max_par_cb;
|
||||
|
||||
for (int i=0;i<h->max_par_cb;i++) {
|
||||
h->app1[i] = srslte_vec_malloc(sizeof(int16_t) * len);
|
||||
if (!h->app1[i]) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->app2[i] = srslte_vec_malloc(sizeof(int16_t) * len);
|
||||
if (!h->app2[i]) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->ext1[i] = srslte_vec_malloc(sizeof(int16_t) * len);
|
||||
if (!h->ext1[i]) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->ext2[i] = srslte_vec_malloc(sizeof(int16_t) * len);
|
||||
if (!h->ext2[i]) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->syst[i] = srslte_vec_malloc(sizeof(int16_t) * len);
|
||||
if (!h->syst[i]) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->parity0[i] = srslte_vec_malloc(sizeof(int16_t) * len);
|
||||
if (!h->parity0[i]) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->parity1[i] = srslte_vec_malloc(sizeof(int16_t) * len);
|
||||
if (!h->parity1[i]) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (map_simd_init(&h->dec, h->max_par_cb, h->max_long_cb)) {
|
||||
goto clean_and_exit;
|
||||
}
|
||||
|
||||
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
|
||||
if (srslte_tc_interl_init(&h->interleaver[i], srslte_cbsegm_cbsize(i)) < 0) {
|
||||
goto clean_and_exit;
|
||||
}
|
||||
srslte_tc_interl_LTE_gen(&h->interleaver[i], srslte_cbsegm_cbsize(i));
|
||||
}
|
||||
h->current_cbidx = -1;
|
||||
h->cb_mask = 0;
|
||||
ret = 0;
|
||||
clean_and_exit:if (ret == -1) {
|
||||
srslte_tdec_simd_free(h);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void srslte_tdec_simd_free(srslte_tdec_simd_t * h)
|
||||
{
|
||||
for (int i=0;i<h->max_par_cb;i++) {
|
||||
if (h->app1[i]) {
|
||||
free(h->app1[i]);
|
||||
}
|
||||
if (h->app2[i]) {
|
||||
free(h->app2[i]);
|
||||
}
|
||||
if (h->ext1[i]) {
|
||||
free(h->ext1[i]);
|
||||
}
|
||||
if (h->ext2[i]) {
|
||||
free(h->ext2[i]);
|
||||
}
|
||||
if (h->syst[i]) {
|
||||
free(h->syst[i]);
|
||||
}
|
||||
if (h->parity0[i]) {
|
||||
free(h->parity0[i]);
|
||||
}
|
||||
if (h->parity1[i]) {
|
||||
free(h->parity1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
map_simd_free(&h->dec);
|
||||
|
||||
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
|
||||
srslte_tc_interl_free(&h->interleaver[i]);
|
||||
}
|
||||
|
||||
bzero(h, sizeof(srslte_tdec_simd_t));
|
||||
}
|
||||
|
||||
/* Deinterleaves the 3 streams from the input (systematic and 2 parity bits) into
|
||||
* 3 buffers ready to be used by compute_gamma()
|
||||
*/
|
||||
void deinterleave_input_simd(srslte_tdec_simd_t *h, int16_t *input, uint32_t cbidx, uint32_t long_cb) {
|
||||
uint32_t i;
|
||||
|
||||
__m128i *inputPtr = (__m128i*) input;
|
||||
__m128i in0, in1, in2;
|
||||
__m128i s0, s1, s2, s;
|
||||
__m128i p00, p01, p02, p0;
|
||||
__m128i p10, p11, p12, p1;
|
||||
|
||||
__m128i *sysPtr = (__m128i*) h->syst[cbidx];
|
||||
__m128i *pa0Ptr = (__m128i*) h->parity0[cbidx];
|
||||
__m128i *pa1Ptr = (__m128i*) h->parity1[cbidx];
|
||||
|
||||
// pick bits 0, 3, 6 from 1st word
|
||||
__m128i s0_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0);
|
||||
// pick bits 1, 4, 7 from 2st word
|
||||
__m128i s1_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff);
|
||||
// pick bits 2, 5 from 3rd word
|
||||
__m128i s2_mask = _mm_set_epi8(11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
|
||||
|
||||
// pick bits 1, 4, 7 from 1st word
|
||||
__m128i p00_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,15,14,9,8,3,2);
|
||||
// pick bits 2, 5, from 2st word
|
||||
__m128i p01_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff);
|
||||
// pick bits 0, 3, 6 from 3rd word
|
||||
__m128i p02_mask = _mm_set_epi8(13,12,7,6,1,0,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
|
||||
|
||||
// pick bits 2, 5 from 1st word
|
||||
__m128i p10_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4);
|
||||
// pick bits 0, 3, 6, from 2st word
|
||||
__m128i p11_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0,0xff,0xff,0xff,0xff);
|
||||
// pick bits 1, 4, 7 from 3rd word
|
||||
__m128i p12_mask = _mm_set_epi8(15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
|
||||
|
||||
// Split systematic and parity bits
|
||||
for (i = 0; i < long_cb/8; i++) {
|
||||
|
||||
in0 = _mm_load_si128(inputPtr); inputPtr++;
|
||||
in1 = _mm_load_si128(inputPtr); inputPtr++;
|
||||
in2 = _mm_load_si128(inputPtr); inputPtr++;
|
||||
|
||||
/* Deinterleave Systematic bits */
|
||||
s0 = _mm_shuffle_epi8(in0, s0_mask);
|
||||
s1 = _mm_shuffle_epi8(in1, s1_mask);
|
||||
s2 = _mm_shuffle_epi8(in2, s2_mask);
|
||||
s = _mm_or_si128(s0, s1);
|
||||
s = _mm_or_si128(s, s2);
|
||||
|
||||
_mm_store_si128(sysPtr, s);
|
||||
sysPtr++;
|
||||
|
||||
/* Deinterleave parity 0 bits */
|
||||
p00 = _mm_shuffle_epi8(in0, p00_mask);
|
||||
p01 = _mm_shuffle_epi8(in1, p01_mask);
|
||||
p02 = _mm_shuffle_epi8(in2, p02_mask);
|
||||
p0 = _mm_or_si128(p00, p01);
|
||||
p0 = _mm_or_si128(p0, p02);
|
||||
|
||||
_mm_store_si128(pa0Ptr, p0);
|
||||
pa0Ptr++;
|
||||
|
||||
/* Deinterleave parity 1 bits */
|
||||
p10 = _mm_shuffle_epi8(in0, p10_mask);
|
||||
p11 = _mm_shuffle_epi8(in1, p11_mask);
|
||||
p12 = _mm_shuffle_epi8(in2, p12_mask);
|
||||
p1 = _mm_or_si128(p10, p11);
|
||||
p1 = _mm_or_si128(p1, p12);
|
||||
|
||||
_mm_store_si128(pa1Ptr, p1);
|
||||
pa1Ptr++;
|
||||
|
||||
}
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
h->syst[cbidx][i+long_cb] = input[3*long_cb + 2*i];
|
||||
h->parity0[cbidx][i+long_cb] = input[3*long_cb + 2*i + 1];
|
||||
}
|
||||
for (i = 0; i < 3; i++) {
|
||||
h->app2[cbidx][i+long_cb] = input[3*long_cb + 6 + 2*i];
|
||||
h->parity1[cbidx][i+long_cb] = input[3*long_cb + 6 + 2*i + 1];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Runs 1 turbo decoder iteration */
|
||||
void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb)
|
||||
{
|
||||
|
||||
int16_t *tmp_app[SRSLTE_TDEC_MAX_NPAR];
|
||||
|
||||
if (h->current_cbidx >= 0) {
|
||||
uint16_t *inter = h->interleaver[h->current_cbidx].forward;
|
||||
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
|
||||
|
||||
#ifndef LV_HAVE_AVX2
|
||||
input[1] = NULL;
|
||||
#endif
|
||||
|
||||
h->cb_mask = (input[0]?1:0) | (input[1]?2:0);
|
||||
|
||||
for (int i=0;i<h->max_par_cb;i++) {
|
||||
if (h->n_iter[i] == 0 && input[i]) {
|
||||
//printf("deinterleaveing %d\n",i);
|
||||
deinterleave_input_simd(h, input[i], i, long_cb);
|
||||
}
|
||||
}
|
||||
|
||||
// Add apriori information to decoder 1
|
||||
for (int i=0;i<h->max_par_cb;i++) {
|
||||
if (h->n_iter[i] > 0 && input[i]) {
|
||||
srslte_vec_sub_sss(h->app1[i], h->ext1[i], h->app1[i], long_cb);
|
||||
}
|
||||
}
|
||||
|
||||
// Run MAP DEC #1
|
||||
for (int i=0;i<h->max_par_cb;i++) {
|
||||
if (input[i]) {
|
||||
tmp_app[i] = h->n_iter[i]?h->app1[i]:NULL;
|
||||
} else {
|
||||
tmp_app[i] = NULL;
|
||||
}
|
||||
}
|
||||
map_simd_dec(&h->dec, h->syst, tmp_app, h->parity0, h->ext1, h->cb_mask, long_cb);
|
||||
|
||||
// Convert aposteriori information into extrinsic information
|
||||
for (int i=0;i<h->max_par_cb;i++) {
|
||||
if (h->n_iter[i] > 0 && input[i]) {
|
||||
srslte_vec_sub_sss(h->ext1[i], h->app1[i], h->ext1[i], long_cb);
|
||||
}
|
||||
}
|
||||
|
||||
// Interleave extrinsic output of DEC1 to form apriori info for decoder 2
|
||||
for (int i=0;i<h->max_par_cb;i++) {
|
||||
if (input[i]) {
|
||||
srslte_vec_lut_sss(h->ext1[i], deinter, h->app2[i], long_cb);
|
||||
}
|
||||
}
|
||||
|
||||
// Run MAP DEC #2. 2nd decoder uses apriori information as systematic bits
|
||||
map_simd_dec(&h->dec, h->app2, NULL, h->parity1, h->ext2, h->cb_mask, long_cb);
|
||||
|
||||
// Deinterleaved extrinsic bits become apriori info for decoder 1
|
||||
for (int i=0;i<h->max_par_cb;i++) {
|
||||
if (input[i]) {
|
||||
srslte_vec_lut_sss(h->ext2[i], inter, h->app1[i], long_cb);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i=0;i<h->max_par_cb;i++) {
|
||||
if (input[i]) {
|
||||
h->n_iter[i]++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Error CB index not set (call srslte_tdec_simd_reset() first\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* Resets the decoder and sets the codeblock length */
|
||||
int srslte_tdec_simd_reset(srslte_tdec_simd_t * h, uint32_t long_cb)
|
||||
{
|
||||
if (long_cb > h->max_long_cb) {
|
||||
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
|
||||
h->max_long_cb);
|
||||
return -1;
|
||||
}
|
||||
for (int i=0;i<h->max_par_cb;i++) {
|
||||
h->n_iter[i] = 0;
|
||||
}
|
||||
h->cb_mask = 0;
|
||||
h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
|
||||
if (h->current_cbidx < 0) {
|
||||
fprintf(stderr, "Invalid CB length %d\n", long_cb);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int srslte_tdec_simd_reset_cb(srslte_tdec_simd_t * h, uint32_t cb_idx)
|
||||
{
|
||||
h->n_iter[cb_idx] = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int srslte_tdec_simd_get_nof_iterations_cb(srslte_tdec_simd_t * h, uint32_t cb_idx)
|
||||
{
|
||||
return h->n_iter[cb_idx];
|
||||
}
|
||||
|
||||
void tdec_simd_decision(srslte_tdec_simd_t * h, uint8_t *output, uint32_t cbidx, uint32_t long_cb)
|
||||
{
|
||||
__m128i zero = _mm_set1_epi16(0);
|
||||
__m128i lsb_mask = _mm_set1_epi16(1);
|
||||
|
||||
__m128i *appPtr = (__m128i*) h->app1[cbidx];
|
||||
__m128i *outPtr = (__m128i*) output;
|
||||
__m128i ap, out, out0, out1;
|
||||
|
||||
for (uint32_t i = 0; i < long_cb/16; i++) {
|
||||
ap = _mm_load_si128(appPtr); appPtr++;
|
||||
out0 = _mm_and_si128(_mm_cmpgt_epi16(ap, zero), lsb_mask);
|
||||
ap = _mm_load_si128(appPtr); appPtr++;
|
||||
out1 = _mm_and_si128(_mm_cmpgt_epi16(ap, zero), lsb_mask);
|
||||
|
||||
out = _mm_packs_epi16(out0, out1);
|
||||
_mm_store_si128(outPtr, out);
|
||||
outPtr++;
|
||||
}
|
||||
if (long_cb%16) {
|
||||
for (int i=0;i<8;i++) {
|
||||
output[long_cb-8+i] = h->app1[cbidx][long_cb-8+i]>0?1:0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void srslte_tdec_simd_decision(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb)
|
||||
{
|
||||
for (int i=0;i<h->max_par_cb;i++) {
|
||||
tdec_simd_decision(h, output[i], i, long_cb);
|
||||
}
|
||||
}
|
||||
|
||||
void srslte_tdec_simd_decision_byte_cb(srslte_tdec_simd_t * h, uint8_t *output, uint32_t cbidx, uint32_t long_cb)
|
||||
{
|
||||
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
|
||||
|
||||
// long_cb is always byte aligned
|
||||
for (uint32_t i = 0; i < long_cb/8; i++) {
|
||||
uint8_t out0 = h->app1[cbidx][8*i+0]>0?mask[0]:0;
|
||||
uint8_t out1 = h->app1[cbidx][8*i+1]>0?mask[1]:0;
|
||||
uint8_t out2 = h->app1[cbidx][8*i+2]>0?mask[2]:0;
|
||||
uint8_t out3 = h->app1[cbidx][8*i+3]>0?mask[3]:0;
|
||||
uint8_t out4 = h->app1[cbidx][8*i+4]>0?mask[4]:0;
|
||||
uint8_t out5 = h->app1[cbidx][8*i+5]>0?mask[5]:0;
|
||||
uint8_t out6 = h->app1[cbidx][8*i+6]>0?mask[6]:0;
|
||||
uint8_t out7 = h->app1[cbidx][8*i+7]>0?mask[7]:0;
|
||||
|
||||
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
|
||||
}
|
||||
}
|
||||
|
||||
void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t long_cb)
|
||||
{
|
||||
for (int i=0;i<h->max_par_cb;i++) {
|
||||
if (output[i]) {
|
||||
srslte_tdec_simd_decision_byte_cb(h, output[i], i, long_cb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Runs nof_iterations iterations and decides the output bits */
|
||||
int srslte_tdec_simd_run_all(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_MAX_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t nof_iterations, uint32_t long_cb)
|
||||
{
|
||||
if (srslte_tdec_simd_reset(h, long_cb)) {
|
||||
return SRSLTE_ERROR;
|
||||
}
|
||||
|
||||
do {
|
||||
srslte_tdec_simd_iteration(h, input, long_cb);
|
||||
} while (h->n_iter[0] < nof_iterations);
|
||||
|
||||
srslte_tdec_simd_decision_byte(h, output, long_cb);
|
||||
|
||||
return SRSLTE_SUCCESS;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -1,299 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "srslte/phy/fec/turbodecoder_simd_inter.h"
|
||||
#include "srslte/phy/utils/vector.h"
|
||||
|
||||
#define TOTALTAIL 12
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <smmintrin.h>
|
||||
|
||||
void map_see_inter_alpha(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, uint32_t long_cb);
|
||||
void map_sse_inter_beta(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, int16_t * output, uint32_t long_cb);
|
||||
void sse_inter_update_w(srslte_tdec_simd_inter_t *h, uint16_t *deinter, uint32_t long_cb);
|
||||
void sse_inter_extract_syst1(srslte_tdec_simd_inter_t *h, uint16_t *inter, uint32_t long_cb);
|
||||
|
||||
|
||||
static void map_sse_inter_dec(srslte_tdec_simd_inter_t * h, int16_t * input, int16_t * parity, int16_t * output,
|
||||
uint32_t long_cb)
|
||||
{
|
||||
map_see_inter_alpha(h, input, parity, long_cb);
|
||||
map_sse_inter_beta(h, input, parity, output, long_cb);
|
||||
}
|
||||
|
||||
/************************************************
|
||||
*
|
||||
* TURBO DECODER INTERFACE
|
||||
*
|
||||
************************************************/
|
||||
int srslte_tdec_simd_inter_init(srslte_tdec_simd_inter_t * h, uint32_t max_par_cb, uint32_t max_long_cb)
|
||||
{
|
||||
int ret = -1;
|
||||
bzero(h, sizeof(srslte_tdec_simd_inter_t));
|
||||
uint32_t len = max_long_cb + 12;
|
||||
|
||||
h->max_long_cb = max_long_cb;
|
||||
h->max_par_cb = max_par_cb;
|
||||
|
||||
h->llr1 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
|
||||
if (!h->llr1) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->llr2 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
|
||||
if (!h->llr2) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->w = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
|
||||
if (!h->w) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->syst0 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
|
||||
if (!h->syst0) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->syst1 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
|
||||
if (!h->syst1) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->parity0 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
|
||||
if (!h->parity0) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->parity1 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
|
||||
if (!h->parity1) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
h->alpha = srslte_vec_malloc(sizeof(int16_t) * 8*(len+12) * h->max_par_cb);
|
||||
if (!h->alpha) {
|
||||
perror("srslte_vec_malloc");
|
||||
goto clean_and_exit;
|
||||
}
|
||||
|
||||
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
|
||||
if (srslte_tc_interl_init(&h->interleaver[i], srslte_cbsegm_cbsize(i)) < 0) {
|
||||
goto clean_and_exit;
|
||||
}
|
||||
srslte_tc_interl_LTE_gen(&h->interleaver[i], srslte_cbsegm_cbsize(i));
|
||||
}
|
||||
h->current_cbidx = -1;
|
||||
ret = 0;
|
||||
clean_and_exit:if (ret == -1) {
|
||||
srslte_tdec_simd_inter_free(h);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void srslte_tdec_simd_inter_free(srslte_tdec_simd_inter_t * h)
|
||||
{
|
||||
if (h->llr1) {
|
||||
free(h->llr1);
|
||||
}
|
||||
if (h->llr2) {
|
||||
free(h->llr2);
|
||||
}
|
||||
if (h->w) {
|
||||
free(h->w);
|
||||
}
|
||||
if (h->syst0) {
|
||||
free(h->syst0);
|
||||
}
|
||||
if (h->syst1) {
|
||||
free(h->syst1);
|
||||
}
|
||||
if (h->parity0) {
|
||||
free(h->parity0);
|
||||
}
|
||||
if (h->parity1) {
|
||||
free(h->parity1);
|
||||
}
|
||||
if (h->alpha) {
|
||||
free(h->alpha);
|
||||
}
|
||||
|
||||
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
|
||||
srslte_tc_interl_free(&h->interleaver[i]);
|
||||
}
|
||||
|
||||
bzero(h, sizeof(srslte_tdec_simd_inter_t));
|
||||
}
|
||||
|
||||
|
||||
/* Deinterleave for inter-frame parallelization */
|
||||
void extract_input(srslte_tdec_simd_inter_t *h, int16_t *input, uint32_t cbidx, uint32_t long_cb)
|
||||
{
|
||||
for (int i=0;i<long_cb;i++) {
|
||||
h->syst0[h->max_par_cb*i+cbidx] = input[3*i+0];
|
||||
h->parity0[h->max_par_cb*i+cbidx] = input[3*i+1];
|
||||
h->parity1[h->max_par_cb*i+cbidx] = input[3*i+2];
|
||||
}
|
||||
for (int i = long_cb; i < long_cb + 3; i++) {
|
||||
h->syst0[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb)];
|
||||
h->syst1[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb)];
|
||||
h->parity0[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb) + 1];
|
||||
h->parity0[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb) + 2];
|
||||
}
|
||||
}
|
||||
|
||||
void srslte_tdec_simd_inter_iteration(srslte_tdec_simd_inter_t * h, int16_t *input[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb)
|
||||
{
|
||||
|
||||
if (h->current_cbidx >= 0) {
|
||||
|
||||
uint16_t *inter = h->interleaver[h->current_cbidx].forward;
|
||||
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
|
||||
|
||||
// Prepare systematic and parity bits for MAP DEC #1
|
||||
for (int i=0;i<nof_cb;i++) {
|
||||
if (h->n_iter[i] == 0) {
|
||||
extract_input(h, input[i], i, long_cb);
|
||||
}
|
||||
srslte_vec_sum_sss(h->syst0, h->w, h->syst0, long_cb*h->max_par_cb);
|
||||
}
|
||||
|
||||
// Run MAP DEC #1
|
||||
map_sse_inter_dec(h, h->syst0, h->parity0, h->llr1, long_cb);
|
||||
|
||||
// Prepare systematic and parity bits for MAP DEC #1
|
||||
sse_inter_extract_syst1(h, inter, long_cb);
|
||||
|
||||
// Run MAP DEC #2
|
||||
map_sse_inter_dec(h, h->syst1, h->parity1, h->llr2, long_cb);
|
||||
|
||||
// Update a-priori LLR from the last iteration
|
||||
sse_inter_update_w(h, deinter, long_cb);
|
||||
|
||||
} else {
|
||||
fprintf(stderr, "Error CB index not set (call srslte_tdec_simd_inter_reset() first\n");
|
||||
}
|
||||
}
|
||||
|
||||
int srslte_tdec_simd_inter_reset_cb(srslte_tdec_simd_inter_t * h, uint32_t cb_idx)
|
||||
{
|
||||
for (int i=0;i<h->current_long_cb;i++) {
|
||||
h->w[h->max_par_cb*i+cb_idx] = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int srslte_tdec_simd_inter_reset(srslte_tdec_simd_inter_t * h, uint32_t long_cb)
|
||||
{
|
||||
if (long_cb > h->max_long_cb) {
|
||||
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
|
||||
h->max_long_cb);
|
||||
return -1;
|
||||
}
|
||||
h->current_long_cb = long_cb;
|
||||
h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
|
||||
if (h->current_cbidx < 0) {
|
||||
fprintf(stderr, "Invalid CB length %d\n", long_cb);
|
||||
return -1;
|
||||
}
|
||||
memset(h->w, 0, sizeof(int16_t) * long_cb * h->max_par_cb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void srslte_tdec_simd_inter_decision_cb(srslte_tdec_simd_inter_t * h, uint8_t *output, uint32_t cb_idx, uint32_t long_cb)
|
||||
{
|
||||
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
|
||||
uint32_t i;
|
||||
for (i = 0; i < long_cb; i++) {
|
||||
output[i] = (h->llr2[h->max_par_cb*deinter[i]+cb_idx] > 0) ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
void srslte_tdec_simd_inter_decision(srslte_tdec_simd_inter_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb)
|
||||
{
|
||||
for (int i=0;i<nof_cb;i++) {
|
||||
srslte_tdec_simd_inter_decision_cb(h, output[i], i, long_cb);
|
||||
}
|
||||
}
|
||||
|
||||
void srslte_tdec_simd_inter_decision_byte_cb(srslte_tdec_simd_inter_t * h, uint8_t *output, uint32_t cb_idx, uint32_t long_cb)
|
||||
{
|
||||
uint32_t i;
|
||||
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
|
||||
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
|
||||
|
||||
#define indexOf_cb(idx, cb) (h->max_par_cb*(deinter[8*i+idx])+cb)
|
||||
|
||||
// long_cb is always byte aligned
|
||||
for (i = 0; i < long_cb/8; i++) {
|
||||
uint8_t out0 = h->llr2[indexOf_cb(0, cb_idx)]>0?mask[0]:0;
|
||||
uint8_t out1 = h->llr2[indexOf_cb(1, cb_idx)]>0?mask[1]:0;
|
||||
uint8_t out2 = h->llr2[indexOf_cb(2, cb_idx)]>0?mask[2]:0;
|
||||
uint8_t out3 = h->llr2[indexOf_cb(3, cb_idx)]>0?mask[3]:0;
|
||||
uint8_t out4 = h->llr2[indexOf_cb(4, cb_idx)]>0?mask[4]:0;
|
||||
uint8_t out5 = h->llr2[indexOf_cb(5, cb_idx)]>0?mask[5]:0;
|
||||
uint8_t out6 = h->llr2[indexOf_cb(6, cb_idx)]>0?mask[6]:0;
|
||||
uint8_t out7 = h->llr2[indexOf_cb(7, cb_idx)]>0?mask[7]:0;
|
||||
|
||||
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
|
||||
}
|
||||
}
|
||||
|
||||
void srslte_tdec_simd_inter_decision_byte(srslte_tdec_simd_inter_t * h, uint8_t *output[SRSLTE_TDEC_MAX_NPAR], uint32_t nof_cb, uint32_t long_cb)
|
||||
{
|
||||
for (int i=0;i<nof_cb;i++) {
|
||||
srslte_tdec_simd_inter_decision_byte_cb(h, output[i], i, long_cb);
|
||||
}
|
||||
}
|
||||
|
||||
int srslte_tdec_simd_inter_run_all(srslte_tdec_simd_inter_t * h,
|
||||
int16_t *input[SRSLTE_TDEC_MAX_NPAR], uint8_t *output[SRSLTE_TDEC_MAX_NPAR],
|
||||
uint32_t nof_iterations, uint32_t nof_cb, uint32_t long_cb)
|
||||
{
|
||||
uint32_t iter = 0;
|
||||
|
||||
if (srslte_tdec_simd_inter_reset(h, long_cb)) {
|
||||
return SRSLTE_ERROR;
|
||||
}
|
||||
|
||||
do {
|
||||
srslte_tdec_simd_inter_iteration(h, input, nof_cb, long_cb);
|
||||
iter++;
|
||||
} while (iter < nof_iterations);
|
||||
|
||||
srslte_tdec_simd_inter_decision_byte(h, output, nof_cb, long_cb);
|
||||
|
||||
return SRSLTE_SUCCESS;
|
||||
}
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -1,202 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* \section COPYRIGHT
|
||||
*
|
||||
* Copyright 2013-2015 Software Radio Systems Limited
|
||||
*
|
||||
* \section LICENSE
|
||||
*
|
||||
* This file is part of the srsLTE library.
|
||||
*
|
||||
* srsLTE is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of
|
||||
* the License, or (at your option) any later version.
|
||||
*
|
||||
* srsLTE is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* A copy of the GNU Affero General Public License can be found in
|
||||
* the LICENSE file in the top-level directory of this distribution
|
||||
* and at http://www.gnu.org/licenses/.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "srslte/phy/fec/turbodecoder_simd_inter.h"
|
||||
#include "srslte/phy/utils/vector.h"
|
||||
|
||||
|
||||
#define NCB 8
|
||||
|
||||
#define INF 10000
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <smmintrin.h>
|
||||
|
||||
void sse_inter_extract_syst1(srslte_tdec_simd_inter_t *h, uint16_t *inter, uint32_t long_cb)
|
||||
{
|
||||
__m128i *llr1Ptr = (__m128i*) h->llr1;
|
||||
__m128i *wPtr = (__m128i*) h->w;
|
||||
__m128i *syst1Ptr = (__m128i*) h->syst1;
|
||||
|
||||
for (int i = 0; i < long_cb; i++) {
|
||||
__m128i llr1 = _mm_load_si128(&llr1Ptr[inter[i]]);
|
||||
__m128i w = _mm_load_si128(&wPtr[inter[i]]);
|
||||
_mm_store_si128(syst1Ptr++, _mm_sub_epi16(llr1, w));
|
||||
}
|
||||
}
|
||||
|
||||
void sse_inter_update_w(srslte_tdec_simd_inter_t *h, uint16_t *deinter, uint32_t long_cb)
|
||||
{
|
||||
__m128i *llr1Ptr = (__m128i*) h->llr1;
|
||||
__m128i *llr2Ptr = (__m128i*) h->llr2;
|
||||
__m128i *wPtr = (__m128i*) h->w;
|
||||
__m128i *syst1Ptr = (__m128i*) h->syst1;
|
||||
|
||||
for (int i = 0; i < long_cb; i++) {
|
||||
__m128i llr1 = _mm_load_si128(llr1Ptr++);
|
||||
__m128i w = _mm_load_si128(wPtr++);
|
||||
__m128i llr2 = _mm_load_si128(&llr2Ptr[deinter[i]]);
|
||||
|
||||
_mm_store_si128(syst1Ptr++, _mm_add_epi16(w, _mm_sub_epi16(llr2, llr1)));
|
||||
}
|
||||
}
|
||||
|
||||
/* Computes beta values */
|
||||
void map_sse_inter_beta(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, int16_t * output, uint32_t long_cb)
|
||||
{
|
||||
__m128i m_b[8], new[8], old[8], max1[8], max0[8];
|
||||
__m128i x, y, xy;
|
||||
__m128i m1, m0;
|
||||
uint32_t end = long_cb + 3;
|
||||
uint32_t i;
|
||||
|
||||
__m128i *inputPtr = (__m128i*) input;
|
||||
__m128i *parityPtr = (__m128i*) parity;
|
||||
__m128i *outputPtr = (__m128i*) output;
|
||||
__m128i *alphaPtr = (__m128i*) s->alpha;
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
old[i] = _mm_set1_epi16(0);
|
||||
}
|
||||
|
||||
for (int k = end - 1; k >= 0; k--) {
|
||||
x = _mm_load_si128(inputPtr++);
|
||||
y = _mm_load_si128(parityPtr++);
|
||||
|
||||
xy = _mm_add_epi16(x,y);
|
||||
|
||||
m_b[0] = _mm_add_epi16(old[4], xy);
|
||||
m_b[1] = old[4];
|
||||
m_b[2] = _mm_add_epi16(old[5], y);
|
||||
m_b[3] = _mm_add_epi16(old[5], x);
|
||||
m_b[4] = _mm_add_epi16(old[6], x);
|
||||
m_b[5] = _mm_add_epi16(old[6], y);
|
||||
m_b[6] = old[7];
|
||||
m_b[7] = _mm_add_epi16(old[7], xy);
|
||||
|
||||
new[0] = old[0];
|
||||
new[1] = _mm_add_epi16(old[0], xy);
|
||||
new[2] = _mm_add_epi16(old[1], x);
|
||||
new[3] = _mm_add_epi16(old[1], y);
|
||||
new[4] = _mm_add_epi16(old[2], y);
|
||||
new[5] = _mm_add_epi16(old[2], x);
|
||||
new[6] = _mm_add_epi16(old[3], xy);
|
||||
new[7] = old[3];
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
__m128i alpha = _mm_load_si128(alphaPtr++);
|
||||
max0[i] = _mm_add_epi16(alpha, m_b[i]);
|
||||
max1[i] = _mm_add_epi16(alpha, new[i]);
|
||||
}
|
||||
|
||||
m1 = _mm_max_epi16(max1[0], max1[1]);
|
||||
m0 = _mm_max_epi16(max0[0], max0[1]);
|
||||
|
||||
for (i = 2; i < 8; i++) {
|
||||
m1 = _mm_max_epi16(m1, max1[i]);
|
||||
m0 = _mm_max_epi16(m0, max0[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
new[i] = _mm_max_epi16(m_b[i], new[i]);
|
||||
old[i] = new[i];
|
||||
}
|
||||
|
||||
__m128i out = _mm_sub_epi16(m1, m0);
|
||||
_mm_store_si128(outputPtr++, out);
|
||||
|
||||
// normalize
|
||||
if ((k%4)==0) {
|
||||
for (int i=1;i<8;i++) {
|
||||
_mm_sub_epi16(old[i], old[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Computes alpha metrics */
|
||||
void map_see_inter_alpha(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, uint32_t long_cb)
|
||||
{
|
||||
__m128i m_b[8], new[8], old[8];
|
||||
__m128i x, y, xy;
|
||||
uint32_t k;
|
||||
|
||||
__m128i *inputPtr = (__m128i*) input;
|
||||
__m128i *parityPtr = (__m128i*) parity;
|
||||
__m128i *alphaPtr = (__m128i*) s->alpha;
|
||||
|
||||
old[0] = _mm_set1_epi16(0);
|
||||
for (int i = 1; i < 8; i++) {
|
||||
old[i] = _mm_set1_epi16(-INF);
|
||||
}
|
||||
|
||||
for (k = 0; k < long_cb; k++) {
|
||||
x = _mm_load_si128(inputPtr++);
|
||||
y = _mm_load_si128(parityPtr++);
|
||||
|
||||
xy = _mm_add_epi16(x,y);
|
||||
|
||||
m_b[0] = old[0];
|
||||
m_b[1] = _mm_add_epi16(old[3], y);
|
||||
m_b[2] = _mm_add_epi16(old[4], y);
|
||||
m_b[3] = old[7];
|
||||
m_b[4] = old[1];
|
||||
m_b[5] = _mm_add_epi16(old[2], y);
|
||||
m_b[6] = _mm_add_epi16(old[5], y);
|
||||
m_b[7] = old[6];
|
||||
|
||||
new[0] = _mm_add_epi16(old[1], xy);
|
||||
new[1] = _mm_add_epi16(old[2], x);
|
||||
new[2] = _mm_add_epi16(old[5], x);
|
||||
new[3] = _mm_add_epi16(old[6], xy);
|
||||
new[4] = _mm_add_epi16(old[0], xy);
|
||||
new[5] = _mm_add_epi16(old[3], x);
|
||||
new[6] = _mm_add_epi16(old[4], x);
|
||||
new[7] = _mm_add_epi16(old[7], xy);
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
new[i] = _mm_max_epi16(m_b[i], new[i]);
|
||||
old[i] = new[i];
|
||||
_mm_store_si128(alphaPtr++, old[i]);
|
||||
}
|
||||
|
||||
// normalize
|
||||
if ((k%4)==0) {
|
||||
for (int i=1;i<8;i++) {
|
||||
_mm_sub_epi16(old[i], old[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -32,18 +32,26 @@
|
|||
#include "srslte/phy/utils/bit.h"
|
||||
#include "srslte/phy/modem/demod_soft.h"
|
||||
|
||||
// AVX implementation not useful for integers. Wait for AVX2
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <smmintrin.h>
|
||||
void demod_16qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols);
|
||||
#endif
|
||||
|
||||
|
||||
#define SCALE_SHORT_CONV_QPSK 100
|
||||
#define SCALE_SHORT_CONV_QAM16 400
|
||||
#define SCALE_SHORT_CONV_QAM64 700
|
||||
|
||||
#define SCALE_BYTE_CONV_QPSK 20
|
||||
#define SCALE_BYTE_CONV_QAM16 30
|
||||
#define SCALE_BYTE_CONV_QAM64 40
|
||||
|
||||
void demod_bpsk_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols) {
|
||||
for (int i=0;i<nsymbols;i++) {
|
||||
llr[i] = (int8_t) -SCALE_BYTE_CONV_QPSK*(crealf(symbols[i]) + cimagf(symbols[i]))/sqrt(2);
|
||||
}
|
||||
}
|
||||
|
||||
void demod_bpsk_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
|
||||
for (int i=0;i<nsymbols;i++) {
|
||||
llr[i] = (short) -SCALE_SHORT_CONV_QPSK*(crealf(symbols[i]) + cimagf(symbols[i]))/sqrt(2);
|
||||
|
@ -56,6 +64,10 @@ void demod_bpsk_lte(const cf_t *symbols, float *llr, int nsymbols) {
|
|||
}
|
||||
}
|
||||
|
||||
void demod_qpsk_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols) {
|
||||
srslte_vec_convert_fb((const float*) symbols, -SCALE_BYTE_CONV_QPSK*sqrt(2), llr, nsymbols*2);
|
||||
}
|
||||
|
||||
void demod_qpsk_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
|
||||
srslte_vec_convert_fi((const float*) symbols, -SCALE_SHORT_CONV_QPSK*sqrt(2), llr, nsymbols*2);
|
||||
}
|
||||
|
@ -87,9 +99,11 @@ void demod_16qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols) {
|
|||
__m128i result11, result12, result22, result21;
|
||||
__m128 scale_v = _mm_set1_ps(-SCALE_SHORT_CONV_QAM16);
|
||||
__m128i shuffle_negated_1 = _mm_set_epi8(0xff,0xff,0xff,0xff,7,6,5,4,0xff,0xff,0xff,0xff,3,2,1,0);
|
||||
__m128i shuffle_negated_2 = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,13,12,0xff,0xff,0xff,0xff,11,10,9,8);
|
||||
__m128i shuffle_abs_1 = _mm_set_epi8(7,6,5,4,0xff,0xff,0xff,0xff,3,2,1,0,0xff,0xff,0xff,0xff);
|
||||
|
||||
__m128i shuffle_negated_2 = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,13,12,0xff,0xff,0xff,0xff,11,10,9,8);
|
||||
__m128i shuffle_abs_2 = _mm_set_epi8(15,14,13,12,0xff,0xff,0xff,0xff,11,10,9,8,0xff,0xff,0xff,0xff);
|
||||
|
||||
for (int i=0;i<nsymbols/4;i++) {
|
||||
symbol1 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
|
||||
symbol2 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
|
||||
|
@ -120,6 +134,60 @@ void demod_16qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols) {
|
|||
llr[4*i+3] = abs(yim)-2*SCALE_SHORT_CONV_QAM16/sqrt(10);
|
||||
}
|
||||
}
|
||||
|
||||
void demod_16qam_lte_b_sse(const cf_t *symbols, int8_t *llr, int nsymbols) {
|
||||
float *symbolsPtr = (float*) symbols;
|
||||
__m128i *resultPtr = (__m128i*) llr;
|
||||
__m128 symbol1, symbol2, symbol3, symbol4;
|
||||
__m128i symbol_i1, symbol_i2, symbol_i3, symbol_i4, symbol_i, symbol_abs, symbol_12, symbol_34;
|
||||
__m128i offset = _mm_set1_epi8(2*SCALE_BYTE_CONV_QAM16/sqrt(10));
|
||||
__m128i result1n, result1a, result2n, result2a;
|
||||
__m128 scale_v = _mm_set1_ps(-SCALE_BYTE_CONV_QAM16);
|
||||
|
||||
__m128i shuffle_negated_1 = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0);
|
||||
__m128i shuffle_abs_1 = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff);
|
||||
|
||||
__m128i shuffle_negated_2 = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8);
|
||||
__m128i shuffle_abs_2 = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff);
|
||||
|
||||
for (int i=0;i<nsymbols/8;i++) {
|
||||
symbol1 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
|
||||
symbol2 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
|
||||
symbol3 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
|
||||
symbol4 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
|
||||
symbol_i1 = _mm_cvtps_epi32(_mm_mul_ps(symbol1, scale_v));
|
||||
symbol_i2 = _mm_cvtps_epi32(_mm_mul_ps(symbol2, scale_v));
|
||||
symbol_i3 = _mm_cvtps_epi32(_mm_mul_ps(symbol3, scale_v));
|
||||
symbol_i4 = _mm_cvtps_epi32(_mm_mul_ps(symbol4, scale_v));
|
||||
symbol_12 = _mm_packs_epi32(symbol_i1, symbol_i2);
|
||||
symbol_34 = _mm_packs_epi32(symbol_i3, symbol_i4);
|
||||
symbol_i = _mm_packs_epi16(symbol_12, symbol_34);
|
||||
|
||||
symbol_abs = _mm_abs_epi8(symbol_i);
|
||||
symbol_abs = _mm_sub_epi8(symbol_abs, offset);
|
||||
|
||||
result1n = _mm_shuffle_epi8(symbol_i, shuffle_negated_1);
|
||||
result1a = _mm_shuffle_epi8(symbol_abs, shuffle_abs_1);
|
||||
|
||||
result2n = _mm_shuffle_epi8(symbol_i, shuffle_negated_2);
|
||||
result2a = _mm_shuffle_epi8(symbol_abs, shuffle_abs_2);
|
||||
|
||||
_mm_store_si128(resultPtr, _mm_or_si128(result1n, result1a)); resultPtr++;
|
||||
_mm_store_si128(resultPtr, _mm_or_si128(result2n, result2a)); resultPtr++;
|
||||
|
||||
}
|
||||
// Demodulate last symbols
|
||||
for (int i=8*(nsymbols/8);i<nsymbols;i++) {
|
||||
short yre = (int8_t) (SCALE_BYTE_CONV_QAM16*crealf(symbols[i]));
|
||||
short yim = (int8_t) (SCALE_BYTE_CONV_QAM16*cimagf(symbols[i]));
|
||||
|
||||
llr[4*i+0] = -yre;
|
||||
llr[4*i+1] = -yim;
|
||||
llr[4*i+2] = abs(yre)-2*SCALE_BYTE_CONV_QAM16/sqrt(10);
|
||||
llr[4*i+3] = abs(yim)-2*SCALE_BYTE_CONV_QAM16/sqrt(10);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void demod_16qam_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
|
||||
|
@ -138,6 +206,22 @@ void demod_16qam_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
|
|||
#endif
|
||||
}
|
||||
|
||||
void demod_16qam_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
demod_16qam_lte_b_sse(symbols, llr, nsymbols);
|
||||
#else
|
||||
for (int i=0;i<nsymbols;i++) {
|
||||
int8_t yre = (int8_t) (SCALE_BYTE_CONV_QAM16*crealf(symbols[i]));
|
||||
int8_t yim = (int8_t) (SCALE_BYTE_CONV_QAM16*cimagf(symbols[i]));
|
||||
|
||||
llr[4*i+0] = -yre;
|
||||
llr[4*i+1] = -yim;
|
||||
llr[4*i+2] = abs(yre)-2*SCALE_BYTE_CONV_QAM16/sqrt(10);
|
||||
llr[4*i+3] = abs(yim)-2*SCALE_BYTE_CONV_QAM16/sqrt(10);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void demod_64qam_lte(const cf_t *symbols, float *llr, int nsymbols)
|
||||
{
|
||||
for (int i=0;i<nsymbols;i++) {
|
||||
|
@ -218,6 +302,76 @@ void demod_64qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols)
|
|||
llr[6*i+5] = abs(llr[6*i+3])-2*SCALE_SHORT_CONV_QAM64/sqrt(42);
|
||||
}
|
||||
}
|
||||
|
||||
void demod_64qam_lte_b_sse(const cf_t *symbols, int8_t *llr, int nsymbols)
|
||||
{
|
||||
float *symbolsPtr = (float*) symbols;
|
||||
__m128i *resultPtr = (__m128i*) llr;
|
||||
__m128 symbol1, symbol2, symbol3, symbol4;
|
||||
__m128i symbol_i1, symbol_i2, symbol_i3, symbol_i4, symbol_i, symbol_abs, symbol_abs2,symbol_12, symbol_34;
|
||||
__m128i offset1 = _mm_set1_epi8(4*SCALE_BYTE_CONV_QAM64/sqrt(42));
|
||||
__m128i offset2 = _mm_set1_epi8(2*SCALE_BYTE_CONV_QAM64/sqrt(42));
|
||||
__m128 scale_v = _mm_set1_ps(-SCALE_BYTE_CONV_QAM64);
|
||||
__m128i result11, result12, result13, result22, result21,result23, result31, result32, result33;
|
||||
|
||||
__m128i shuffle_negated_1 = _mm_set_epi8(0xff,0xff,5,4,0xff,0xff,0xff,0xff,3,2,0xff,0xff,0xff,0xff,1,0);
|
||||
__m128i shuffle_negated_2 = _mm_set_epi8(11,10,0xff,0xff,0xff,0xff,9,8,0xff,0xff,0xff,0xff,7,6,0xff,0xff);
|
||||
__m128i shuffle_negated_3 = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,0xff,0xff,0xff,0xff,13,12,0xff,0xff,0xff,0xff);
|
||||
|
||||
__m128i shuffle_abs_1 = _mm_set_epi8(5,4,0xff,0xff,0xff,0xff,3,2,0xff,0xff,0xff,0xff,1,0,0xff,0xff);
|
||||
__m128i shuffle_abs_2 = _mm_set_epi8(0xff,0xff,0xff,0xff,9,8,0xff,0xff,0xff,0xff,7,6,0xff,0xff,0xff,0xff);
|
||||
__m128i shuffle_abs_3 = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,0xff,0xff,13,12,0xff,0xff,0xff,0xff,11,10);
|
||||
|
||||
__m128i shuffle_abs2_1 = _mm_set_epi8(0xff,0xff,0xff,0xff,3,2,0xff,0xff,0xff,0xff,1,0,0xff,0xff,0xff,0xff);
|
||||
__m128i shuffle_abs2_2 = _mm_set_epi8(0xff,0xff,9,8,0xff,0xff,0xff,0xff,7,6,0xff,0xff,0xff,0xff,5,4);
|
||||
__m128i shuffle_abs2_3 = _mm_set_epi8(15,14,0xff,0xff,0xff,0xff,13,12,0xff,0xff,0xff,0xff,11,10,0xff,0xff);
|
||||
|
||||
for (int i=0;i<nsymbols/8;i++) {
|
||||
symbol1 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
|
||||
symbol2 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
|
||||
symbol3 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
|
||||
symbol4 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
|
||||
symbol_i1 = _mm_cvtps_epi32(_mm_mul_ps(symbol1, scale_v));
|
||||
symbol_i2 = _mm_cvtps_epi32(_mm_mul_ps(symbol2, scale_v));
|
||||
symbol_i3 = _mm_cvtps_epi32(_mm_mul_ps(symbol3, scale_v));
|
||||
symbol_i4 = _mm_cvtps_epi32(_mm_mul_ps(symbol4, scale_v));
|
||||
symbol_12 = _mm_packs_epi32(symbol_i1, symbol_i2);
|
||||
symbol_34 = _mm_packs_epi32(symbol_i3, symbol_i4);
|
||||
symbol_i = _mm_packs_epi16(symbol_12, symbol_34);
|
||||
|
||||
symbol_abs = _mm_abs_epi8(symbol_i);
|
||||
symbol_abs = _mm_sub_epi8(symbol_abs, offset1);
|
||||
symbol_abs2 = _mm_sub_epi8(_mm_abs_epi8(symbol_abs), offset2);
|
||||
|
||||
result11 = _mm_shuffle_epi8(symbol_i, shuffle_negated_1);
|
||||
result12 = _mm_shuffle_epi8(symbol_abs, shuffle_abs_1);
|
||||
result13 = _mm_shuffle_epi8(symbol_abs2, shuffle_abs2_1);
|
||||
|
||||
result21 = _mm_shuffle_epi8(symbol_i, shuffle_negated_2);
|
||||
result22 = _mm_shuffle_epi8(symbol_abs, shuffle_abs_2);
|
||||
result23 = _mm_shuffle_epi8(symbol_abs2, shuffle_abs2_2);
|
||||
|
||||
result31 = _mm_shuffle_epi8(symbol_i, shuffle_negated_3);
|
||||
result32 = _mm_shuffle_epi8(symbol_abs, shuffle_abs_3);
|
||||
result33 = _mm_shuffle_epi8(symbol_abs2, shuffle_abs2_3);
|
||||
|
||||
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result11, result12),result13)); resultPtr++;
|
||||
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result21, result22),result23)); resultPtr++;
|
||||
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result31, result32),result33)); resultPtr++;
|
||||
|
||||
}
|
||||
for (int i=8*(nsymbols/8);i<nsymbols;i++) {
|
||||
float yre = (int8_t) (SCALE_BYTE_CONV_QAM64*crealf(symbols[i]));
|
||||
float yim = (int8_t) (SCALE_BYTE_CONV_QAM64*cimagf(symbols[i]));
|
||||
|
||||
llr[6*i+0] = -yre;
|
||||
llr[6*i+1] = -yim;
|
||||
llr[6*i+2] = abs(yre)-4*SCALE_BYTE_CONV_QAM64/sqrt(42);
|
||||
llr[6*i+3] = abs(yim)-4*SCALE_BYTE_CONV_QAM64/sqrt(42);
|
||||
llr[6*i+4] = abs(llr[6*i+2])-2*SCALE_BYTE_CONV_QAM64/sqrt(42);
|
||||
llr[6*i+5] = abs(llr[6*i+3])-2*SCALE_BYTE_CONV_QAM64/sqrt(42);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -240,6 +394,25 @@ void demod_64qam_lte_s(const cf_t *symbols, short *llr, int nsymbols)
|
|||
#endif
|
||||
}
|
||||
|
||||
void demod_64qam_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols)
|
||||
{
|
||||
#ifdef LV_HAVE_SSE
|
||||
demod_64qam_lte_b_sse(symbols, llr, nsymbols);
|
||||
#else
|
||||
for (int i=0;i<nsymbols;i++) {
|
||||
float yre = (int8_t) (SCALE_BYTE_CONV_QAM64*crealf(symbols[i]));
|
||||
float yim = (int8_t) (SCALE_BYTE_CONV_QAM64*cimagf(symbols[i]));
|
||||
|
||||
llr[6*i+0] = -yre;
|
||||
llr[6*i+1] = -yim;
|
||||
llr[6*i+2] = abs(yre)-4*SCALE_BYTE_CONV_QAM64/sqrt(42);
|
||||
llr[6*i+3] = abs(yim)-4*SCALE_BYTE_CONV_QAM64/sqrt(42);
|
||||
llr[6*i+4] = abs(llr[6*i+2])-2*SCALE_BYTE_CONV_QAM64/sqrt(42);
|
||||
llr[6*i+5] = abs(llr[6*i+3])-2*SCALE_BYTE_CONV_QAM64/sqrt(42);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int srslte_demod_soft_demodulate(srslte_mod_t modulation, const cf_t* symbols, float* llr, int nsymbols) {
|
||||
switch(modulation) {
|
||||
case SRSLTE_MOD_BPSK:
|
||||
|
@ -281,3 +454,24 @@ int srslte_demod_soft_demodulate_s(srslte_mod_t modulation, const cf_t* symbols,
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int srslte_demod_soft_demodulate_b(srslte_mod_t modulation, const cf_t* symbols, int8_t* llr, int nsymbols) {
|
||||
switch(modulation) {
|
||||
case SRSLTE_MOD_BPSK:
|
||||
demod_bpsk_lte_b(symbols, llr, nsymbols);
|
||||
break;
|
||||
case SRSLTE_MOD_QPSK:
|
||||
demod_qpsk_lte_b(symbols, llr, nsymbols);
|
||||
break;
|
||||
case SRSLTE_MOD_16QAM:
|
||||
demod_16qam_lte_b(symbols, llr, nsymbols);
|
||||
break;
|
||||
case SRSLTE_MOD_64QAM:
|
||||
demod_64qam_lte_b(symbols, llr, nsymbols);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Invalid modulation %d\n", modulation);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -112,6 +112,7 @@ int main(int argc, char **argv) {
|
|||
cf_t *symbols;
|
||||
float *llr;
|
||||
short *llr_s;
|
||||
int8_t *llr_b;
|
||||
|
||||
parse_args(argc, argv);
|
||||
|
||||
|
@ -153,13 +154,20 @@ int main(int argc, char **argv) {
|
|||
exit(-1);
|
||||
}
|
||||
|
||||
llr_b = srslte_vec_malloc(sizeof(int8_t) * num_bits);
|
||||
if (!llr_b) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/* generate random data */
|
||||
srand(0);
|
||||
|
||||
int ret = -1;
|
||||
struct timeval t[3];
|
||||
float mean_texec = 0.0;
|
||||
float mean_texec_s = 0.0;
|
||||
float mean_texec_s = 0.0;
|
||||
float mean_texec_b = 0.0;
|
||||
for (int n=0;n<nof_frames;n++) {
|
||||
for (i=0;i<num_bits;i++) {
|
||||
input[i] = rand()%2;
|
||||
|
@ -186,7 +194,16 @@ int main(int argc, char **argv) {
|
|||
if (n > 0) {
|
||||
mean_texec_s = SRSLTE_VEC_CMA((float) t[0].tv_usec, mean_texec_s, n-1);
|
||||
}
|
||||
|
||||
|
||||
gettimeofday(&t[1], NULL);
|
||||
srslte_demod_soft_demodulate_b(modulation, symbols, llr_b, num_bits / mod.nbits_x_symbol);
|
||||
gettimeofday(&t[2], NULL);
|
||||
get_time_interval(t);
|
||||
|
||||
if (n > 0) {
|
||||
mean_texec_b = SRSLTE_VEC_CMA((float) t[0].tv_usec, mean_texec_b, n-1);
|
||||
}
|
||||
|
||||
if (SRSLTE_VERBOSE_ISDEBUG()) {
|
||||
printf("bits=");
|
||||
srslte_vec_fprint_b(stdout, input, num_bits);
|
||||
|
@ -200,6 +217,9 @@ int main(int argc, char **argv) {
|
|||
printf("llr_s=");
|
||||
srslte_vec_fprint_s(stdout, llr_s, num_bits);
|
||||
|
||||
printf("llr_b=");
|
||||
srslte_vec_fprint_bs(stdout, llr_b, num_bits);
|
||||
|
||||
}
|
||||
|
||||
// Check demodulation errors
|
||||
|
@ -212,7 +232,9 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
ret = 0;
|
||||
|
||||
clean_exit:
|
||||
clean_exit:
|
||||
free(llr_b);
|
||||
free(llr_s);
|
||||
free(llr);
|
||||
free(symbols);
|
||||
free(output);
|
||||
|
@ -220,7 +242,7 @@ clean_exit:
|
|||
|
||||
srslte_modem_table_free(&mod);
|
||||
|
||||
printf("Mean Throughput: %.2f/%.2f. Mbps ExTime: %.2f/%.2f us\n",
|
||||
num_bits/mean_texec, num_bits/mean_texec_s, mean_texec, mean_texec_s);
|
||||
printf("Mean Throughput: %.2f/%.2f/%.2f. Mbps ExTime: %.2f/%.2f/%.2f us\n",
|
||||
num_bits/mean_texec, num_bits/mean_texec_s, num_bits/mean_texec_b, mean_texec, mean_texec_s, mean_texec_b);
|
||||
exit(ret);
|
||||
}
|
||||
|
|
|
@ -669,6 +669,50 @@ static int srslte_pdsch_codeword_encode(srslte_pdsch_t *q, srslte_pdsch_cfg_t *c
|
|||
return SRSLTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void csi_correction(srslte_pdsch_t *q, srslte_pdsch_cfg_t *cfg, uint32_t codeword_idx, uint32_t tb_idx, void *e)
|
||||
{
|
||||
|
||||
srslte_ra_nbits_t *nbits = &cfg->nbits[tb_idx];
|
||||
uint32_t qm = 0;
|
||||
switch(cfg->grant.mcs[tb_idx].mod) {
|
||||
|
||||
case SRSLTE_MOD_BPSK:
|
||||
qm = 1;
|
||||
break;
|
||||
case SRSLTE_MOD_QPSK:
|
||||
qm = 2;
|
||||
break;
|
||||
case SRSLTE_MOD_16QAM:
|
||||
qm = 4;
|
||||
break;
|
||||
case SRSLTE_MOD_64QAM:
|
||||
qm = 6;
|
||||
break;
|
||||
default:
|
||||
ERROR("No modulation");
|
||||
}
|
||||
|
||||
const uint32_t csi_max_idx = srslte_vec_max_fi(q->csi[codeword_idx], nbits->nof_bits / qm);
|
||||
float csi_max = 1.0f;
|
||||
if (csi_max_idx < nbits->nof_bits / qm) {
|
||||
csi_max = q->csi[codeword_idx][csi_max_idx];
|
||||
}
|
||||
int8_t *e_b = e;
|
||||
int16_t *e_s = e;
|
||||
for (int i = 0; i < nbits->nof_bits / qm; i++) {
|
||||
const float csi = q->csi[codeword_idx][i] / csi_max;
|
||||
if (q->llr_is_8bit) {
|
||||
for (int k = 0; k < qm; k++) {
|
||||
e_b[qm * i + k] = (int8_t) ((float) e_b[qm * i + k] * csi);
|
||||
}
|
||||
} else {
|
||||
for (int k = 0; k < qm; k++) {
|
||||
e_s[qm * i + k] = (int16_t) ((float) e_s[qm * i + k] * csi);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int srslte_pdsch_codeword_decode(srslte_pdsch_t *q, srslte_pdsch_cfg_t *cfg, srslte_sch_t *dl_sch,
|
||||
srslte_softbuffer_rx_t *softbuffer, uint16_t rnti, uint8_t *data,
|
||||
uint32_t codeword_idx, uint32_t tb_idx, bool *ack) {
|
||||
|
@ -686,47 +730,24 @@ static int srslte_pdsch_codeword_decode(srslte_pdsch_t *q, srslte_pdsch_cfg_t *c
|
|||
* The MAX-log-MAP algorithm used in turbo decoding is unsensitive to SNR estimation,
|
||||
* thus we don't need tot set it in the LLRs normalization
|
||||
*/
|
||||
srslte_demod_soft_demodulate_s(mcs->mod, q->d[codeword_idx], q->e[codeword_idx], nbits->nof_re);
|
||||
if (q->llr_is_8bit) {
|
||||
srslte_demod_soft_demodulate_b(mcs->mod, q->d[codeword_idx], q->e[codeword_idx], nbits->nof_re);
|
||||
} else {
|
||||
srslte_demod_soft_demodulate_s(mcs->mod, q->d[codeword_idx], q->e[codeword_idx], nbits->nof_re);
|
||||
}
|
||||
|
||||
/* Select scrambling sequence */
|
||||
srslte_sequence_t *seq = get_user_sequence(q, rnti, codeword_idx, cfg->sf_idx, nbits->nof_bits);
|
||||
|
||||
/* Bit scrambling */
|
||||
srslte_scrambling_s_offset(seq, q->e[codeword_idx], 0, nbits->nof_bits);
|
||||
|
||||
uint32_t qm = 0;
|
||||
switch(cfg->grant.mcs[tb_idx].mod) {
|
||||
|
||||
case SRSLTE_MOD_BPSK:
|
||||
qm = 1;
|
||||
break;
|
||||
case SRSLTE_MOD_QPSK:
|
||||
qm = 2;
|
||||
break;
|
||||
case SRSLTE_MOD_16QAM:
|
||||
qm = 4;
|
||||
break;
|
||||
case SRSLTE_MOD_64QAM:
|
||||
qm = 6;
|
||||
break;
|
||||
default:
|
||||
ERROR("No modulation");
|
||||
if (q->llr_is_8bit) {
|
||||
srslte_scrambling_sb_offset(seq, q->e[codeword_idx], 0, nbits->nof_bits);
|
||||
} else {
|
||||
srslte_scrambling_s_offset(seq, q->e[codeword_idx], 0, nbits->nof_bits);
|
||||
}
|
||||
|
||||
int16_t *e = q->e[codeword_idx];
|
||||
|
||||
if (q->csi_enabled) {
|
||||
const uint32_t csi_max_idx = srslte_vec_max_fi(q->csi[codeword_idx], nbits->nof_bits / qm);
|
||||
float csi_max = 1.0f;
|
||||
if (csi_max_idx < nbits->nof_bits / qm) {
|
||||
csi_max = q->csi[codeword_idx][csi_max_idx];
|
||||
}
|
||||
for (int i = 0; i < nbits->nof_bits / qm; i++) {
|
||||
const float csi = q->csi[codeword_idx][i] / csi_max;
|
||||
for (int k = 0; k < qm; k++) {
|
||||
e[qm * i + k] = (int16_t) ((float) e[qm * i + k] * csi);
|
||||
}
|
||||
}
|
||||
csi_correction(q, cfg, codeword_idx, tb_idx, q->e[codeword_idx]);
|
||||
}
|
||||
|
||||
/* Return */
|
||||
|
|
|
@ -603,7 +603,11 @@ int srslte_pusch_decode(srslte_pusch_t *q,
|
|||
srslte_dft_precoding(&q->dft_precoding, q->z, q->d, cfg->grant.L_prb, cfg->nbits.nof_symb);
|
||||
|
||||
// Soft demodulation
|
||||
srslte_demod_soft_demodulate_s(cfg->grant.mcs.mod, q->d, q->q, cfg->nbits.nof_re);
|
||||
if (q->llr_is_8bit) {
|
||||
srslte_demod_soft_demodulate_b(cfg->grant.mcs.mod, q->d, q->q, cfg->nbits.nof_re);
|
||||
} else {
|
||||
srslte_demod_soft_demodulate_s(cfg->grant.mcs.mod, q->d, q->q, cfg->nbits.nof_re);
|
||||
}
|
||||
|
||||
// Generate scrambling sequence if not pre-generated
|
||||
srslte_sequence_t *seq = get_user_sequence(q, rnti, cfg->sf_idx, cfg->nbits.nof_bits);
|
||||
|
@ -632,7 +636,11 @@ int srslte_pusch_decode(srslte_pusch_t *q,
|
|||
}
|
||||
|
||||
// Descrambling
|
||||
srslte_scrambling_s_offset(seq, q->q, 0, cfg->nbits.nof_bits);
|
||||
if (q->llr_is_8bit) {
|
||||
srslte_scrambling_sb_offset(seq, q->q, 0, cfg->nbits.nof_bits);
|
||||
} else {
|
||||
srslte_scrambling_s_offset(seq, q->q, 0, cfg->nbits.nof_bits);
|
||||
}
|
||||
|
||||
// Decode
|
||||
ret = srslte_ulsch_uci_decode(&q->ul_sch, cfg, softbuffer, q->q, q->g, data, uci_data);
|
||||
|
|
|
@ -32,12 +32,18 @@
|
|||
#include <stdbool.h>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <srslte/phy/phch/sch.h>
|
||||
#include "srslte/phy/phch/pdsch.h"
|
||||
#include "srslte/phy/utils/bit.h"
|
||||
#include "srslte/phy/utils/debug.h"
|
||||
#include "srslte/phy/utils/vector.h"
|
||||
|
||||
#define SRSLTE_PDSCH_MAX_TDEC_ITERS 4
|
||||
#define SRSLTE_PDSCH_MAX_TDEC_ITERS 10
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <immintrin.h>
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
|
||||
/* 36.213 Table 8.6.3-1: Mapping of HARQ-ACK offset values and the index signalled by higher layers */
|
||||
float beta_harq_offset[16] = {2.0, 2.5, 3.125, 4.0, 5.0, 6.250, 8.0, 10.0,
|
||||
|
@ -184,8 +190,6 @@ static int encode_tb_off(srslte_sch_t *q,
|
|||
uint32_t Qm, uint32_t rv, uint32_t nof_e_bits,
|
||||
uint8_t *data, uint8_t *e_bits, uint32_t w_offset)
|
||||
{
|
||||
uint8_t parity[3] = {0, 0, 0};
|
||||
uint32_t par;
|
||||
uint32_t i;
|
||||
uint32_t cb_len=0, rp=0, wp=0, rlen=0, n_e=0;
|
||||
int ret = SRSLTE_ERROR_INVALID_INPUTS;
|
||||
|
@ -213,17 +217,9 @@ static int encode_tb_off(srslte_sch_t *q,
|
|||
gamma = Gp%cb_segm->C;
|
||||
}
|
||||
|
||||
if (data) {
|
||||
/* Reset TB CRC */
|
||||
srslte_crc_set_init(&q->crc_tb, 0);
|
||||
|
||||
/* Compute transport block CRC */
|
||||
par = srslte_crc_checksum_byte(&q->crc_tb, data, cb_segm->tbs);
|
||||
|
||||
/* parity bits will be appended later */
|
||||
parity[0] = (par&(0xff<<16))>>16;
|
||||
parity[1] = (par&(0xff<<8))>>8;
|
||||
parity[2] = par&0xff;
|
||||
}
|
||||
|
||||
wp = 0;
|
||||
rp = 0;
|
||||
for (i = 0; i < cb_segm->C; i++) {
|
||||
|
@ -252,6 +248,7 @@ static int encode_tb_off(srslte_sch_t *q,
|
|||
cb_len, rlen, wp, rp, n_e);
|
||||
|
||||
if (data) {
|
||||
bool last_cb = false;
|
||||
|
||||
/* Copy data to another buffer, making space for the Codeblock CRC */
|
||||
if (i < cb_segm->C - 1) {
|
||||
|
@ -263,13 +260,19 @@ static int encode_tb_off(srslte_sch_t *q,
|
|||
|
||||
/* Append Transport Block parity bits to the last CB */
|
||||
memcpy(q->cb_in, &data[rp/8], (rlen - 24) * sizeof(uint8_t)/8);
|
||||
memcpy(&q->cb_in[(rlen - 24)/8], parity, 3 * sizeof(uint8_t));
|
||||
last_cb = true;
|
||||
}
|
||||
|
||||
/* Turbo Encoding
|
||||
* If Codeblock CRC is required it is given the CRC instance pointer, otherwise CRC pointer shall be NULL
|
||||
*/
|
||||
srslte_tcod_encode_lut(&q->encoder, (cb_segm->C > 1) ? &q->crc_cb : NULL, q->cb_in, q->parity_bits, cblen_idx);
|
||||
srslte_tcod_encode_lut(&q->encoder,
|
||||
&q->crc_tb,
|
||||
(cb_segm->C > 1) ? &q->crc_cb : NULL,
|
||||
q->cb_in,
|
||||
q->parity_bits,
|
||||
cblen_idx,
|
||||
last_cb);
|
||||
}
|
||||
DEBUG("RM cblen_idx=%d, n_e=%d, wp=%d, nof_e_bits=%d\n",cblen_idx, n_e, wp, nof_e_bits);
|
||||
|
||||
|
@ -304,142 +307,117 @@ static int encode_tb(srslte_sch_t *q,
|
|||
bool decode_tb_cb(srslte_sch_t *q,
|
||||
srslte_softbuffer_rx_t *softbuffer, srslte_cbsegm_t *cb_segm,
|
||||
uint32_t Qm, uint32_t rv, uint32_t nof_e_bits,
|
||||
int16_t *e_bits, uint8_t *data,
|
||||
uint32_t cb_size_group)
|
||||
void *e_bits, uint8_t *data)
|
||||
{
|
||||
|
||||
bool cb_map[SRSLTE_MAX_CODEBLOCKS];
|
||||
|
||||
uint32_t cb_idx[SRSLTE_TDEC_MAX_NPAR];
|
||||
int16_t *decoder_input[SRSLTE_TDEC_MAX_NPAR];
|
||||
|
||||
uint32_t nof_cb = cb_size_group?cb_segm->C2:cb_segm->C1;
|
||||
uint32_t first_cb = cb_size_group?cb_segm->C1:0;
|
||||
uint32_t cb_len = cb_size_group?cb_segm->K2:cb_segm->K1;
|
||||
uint32_t cb_len_idx = cb_size_group?cb_segm->K2_idx:cb_segm->K1_idx;
|
||||
int8_t *e_bits_b = e_bits;
|
||||
int16_t *e_bits_s = e_bits;
|
||||
|
||||
uint32_t rlen = cb_segm->C==1?cb_len:(cb_len-24);
|
||||
uint32_t Gp = nof_e_bits / Qm;
|
||||
uint32_t gamma = cb_segm->C>0?Gp%cb_segm->C:Gp;
|
||||
uint32_t n_e = Qm * (Gp/cb_segm->C);
|
||||
|
||||
if (nof_cb > SRSLTE_MAX_CODEBLOCKS) {
|
||||
if (cb_segm->C > SRSLTE_MAX_CODEBLOCKS) {
|
||||
fprintf(stderr, "Error SRSLTE_MAX_CODEBLOCKS=%d\n", SRSLTE_MAX_CODEBLOCKS);
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i=0;i<srslte_tdec_get_nof_parallel(&q->decoder);i++) {
|
||||
cb_idx[i] = i+first_cb;
|
||||
decoder_input[i] = NULL;
|
||||
}
|
||||
|
||||
uint32_t remaining_cb = 0;
|
||||
for (int i=0;i<nof_cb;i++) {
|
||||
/* Do not process blocks with CRC Ok */
|
||||
cb_map[i] = softbuffer->cb_crc[i];
|
||||
if (softbuffer->cb_crc[i] == false) {
|
||||
remaining_cb ++;
|
||||
}
|
||||
}
|
||||
|
||||
srslte_tdec_reset(&q->decoder, cb_len);
|
||||
|
||||
|
||||
q->nof_iterations = 0;
|
||||
|
||||
while(remaining_cb>0) {
|
||||
|
||||
// Unratematch the codeblocks left to decode
|
||||
for (int i=0;i<srslte_tdec_get_nof_parallel(&q->decoder);i++) {
|
||||
|
||||
if (!decoder_input[i] && remaining_cb > 0) {
|
||||
// Find an unprocessed CB
|
||||
cb_idx[i]=first_cb;
|
||||
while(cb_idx[i]<first_cb+nof_cb-1 && cb_map[cb_idx[i]]) {
|
||||
cb_idx[i]++;
|
||||
for (int cb_idx=0;cb_idx<cb_segm->C;cb_idx++)
|
||||
{
|
||||
/* Do not process blocks with CRC Ok */
|
||||
if (softbuffer->cb_crc[cb_idx] == false) {
|
||||
|
||||
uint32_t cb_len = cb_idx<cb_segm->C1?cb_segm->K1:cb_segm->K2;
|
||||
uint32_t cb_len_idx = cb_idx<cb_segm->C1?cb_segm->K1_idx:cb_segm->K2_idx;
|
||||
|
||||
uint32_t rlen = cb_segm->C==1?cb_len:(cb_len-24);
|
||||
uint32_t Gp = nof_e_bits / Qm;
|
||||
uint32_t gamma = cb_segm->C>0?Gp%cb_segm->C:Gp;
|
||||
uint32_t n_e = Qm * (Gp/cb_segm->C);
|
||||
|
||||
uint32_t rp = cb_idx*n_e;
|
||||
uint32_t n_e2 = n_e;
|
||||
|
||||
if (cb_idx > cb_segm->C - gamma) {
|
||||
n_e2 = n_e+Qm;
|
||||
rp = (cb_segm->C - gamma)*n_e + (cb_idx-(cb_segm->C - gamma))*n_e2;
|
||||
}
|
||||
|
||||
if (q->llr_is_8bit) {
|
||||
if (srslte_rm_turbo_rx_lut_8bit(&e_bits_b[rp], (int8_t*) softbuffer->buffer_f[cb_idx], n_e2, cb_len_idx, rv)) {
|
||||
fprintf(stderr, "Error in rate matching\n");
|
||||
return SRSLTE_ERROR;
|
||||
}
|
||||
if (cb_map[cb_idx[i]] == false) {
|
||||
cb_map[cb_idx[i]] = true;
|
||||
|
||||
uint32_t rp = cb_idx[i]*n_e;
|
||||
uint32_t n_e2 = n_e;
|
||||
|
||||
if (cb_idx[i] > cb_segm->C - gamma) {
|
||||
n_e2 = n_e+Qm;
|
||||
rp = (cb_segm->C - gamma)*n_e + (cb_idx[i]-(cb_segm->C - gamma))*n_e2;
|
||||
}
|
||||
|
||||
INFO("CB %d: rp=%d, n_e=%d, i=%d\n", cb_idx[i], rp, n_e2, i);
|
||||
if (srslte_rm_turbo_rx_lut(&e_bits[rp], softbuffer->buffer_f[cb_idx[i]], n_e2, cb_len_idx, rv)) {
|
||||
fprintf(stderr, "Error in rate matching\n");
|
||||
return SRSLTE_ERROR;
|
||||
}
|
||||
|
||||
decoder_input[i] = softbuffer->buffer_f[cb_idx[i]];
|
||||
} else {
|
||||
if (srslte_rm_turbo_rx_lut(&e_bits_s[rp], softbuffer->buffer_f[cb_idx], n_e2, cb_len_idx, rv)) {
|
||||
fprintf(stderr, "Error in rate matching\n");
|
||||
return SRSLTE_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run 1 iteration for the codeblocks in queue
|
||||
srslte_tdec_iteration_par(&q->decoder, decoder_input, cb_len);
|
||||
|
||||
// Decide output bits and compute CRC
|
||||
for (int i=0;i<srslte_tdec_get_nof_parallel(&q->decoder);i++) {
|
||||
if (decoder_input[i]) {
|
||||
srslte_tdec_decision_byte_par_cb(&q->decoder, q->cb_in, i, cb_len);
|
||||
srslte_tdec_new_cb(&q->decoder, cb_len);
|
||||
|
||||
uint32_t len_crc;
|
||||
srslte_crc_t *crc_ptr;
|
||||
|
||||
if (cb_segm->C > 1) {
|
||||
len_crc = cb_len;
|
||||
crc_ptr = &q->crc_cb;
|
||||
// Run iterations and use CRC for early stopping
|
||||
bool early_stop = false;
|
||||
uint32_t cb_noi = 0;
|
||||
do {
|
||||
if (q->llr_is_8bit) {
|
||||
srslte_tdec_iteration_8bit(&q->decoder, (int8_t*) softbuffer->buffer_f[cb_idx], &data[cb_idx*rlen/8]);
|
||||
} else {
|
||||
len_crc = cb_segm->tbs+24;
|
||||
crc_ptr = &q->crc_tb;
|
||||
srslte_tdec_iteration(&q->decoder, softbuffer->buffer_f[cb_idx], &data[cb_idx*rlen/8]);
|
||||
}
|
||||
q->nof_iterations++;
|
||||
cb_noi++;
|
||||
|
||||
uint32_t len_crc;
|
||||
srslte_crc_t *crc_ptr;
|
||||
|
||||
if (cb_segm->C > 1) {
|
||||
len_crc = cb_len;
|
||||
crc_ptr = &q->crc_cb;
|
||||
} else {
|
||||
len_crc = cb_segm->tbs+24;
|
||||
crc_ptr = &q->crc_tb;
|
||||
}
|
||||
|
||||
// CRC is OK
|
||||
if (!srslte_crc_checksum_byte(crc_ptr, q->cb_in, len_crc)) {
|
||||
if (!srslte_crc_checksum_byte(crc_ptr, &data[cb_idx*rlen/8], len_crc)) {
|
||||
|
||||
memcpy(softbuffer->data[cb_idx[i]], q->cb_in, rlen/8 * sizeof(uint8_t));
|
||||
softbuffer->cb_crc[cb_idx[i]] = true;
|
||||
softbuffer->cb_crc[cb_idx] = true;
|
||||
early_stop = true;
|
||||
|
||||
q->nof_iterations += srslte_tdec_get_nof_iterations_cb(&q->decoder, i);
|
||||
|
||||
// Reset number of iterations for that CB in the decoder
|
||||
srslte_tdec_reset_cb(&q->decoder, i);
|
||||
remaining_cb--;
|
||||
decoder_input[i] = NULL;
|
||||
cb_idx[i] = 0;
|
||||
|
||||
// CRC is error and exceeded maximum iterations for this CB.
|
||||
// Early stop the whole transport block.
|
||||
} else if (srslte_tdec_get_nof_iterations_cb(&q->decoder, i) >= q->max_iterations) {
|
||||
INFO("CB %d: Error. CB is erroneous. remaining_cb=%d, i=%d, first_cb=%d, nof_cb=%d\n",
|
||||
cb_idx[i], remaining_cb, i, first_cb, nof_cb);
|
||||
|
||||
q->nof_iterations += q->max_iterations;
|
||||
srslte_tdec_reset_cb(&q->decoder, i);
|
||||
remaining_cb--;
|
||||
decoder_input[i] = NULL;
|
||||
cb_idx[i] = 0;
|
||||
// CRC is error and exceeded maximum iterations for this CB.
|
||||
// Early stop the whole transport block.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
softbuffer->tb_crc = true;
|
||||
for (int i = 0; i < nof_cb && softbuffer->tb_crc; i++) {
|
||||
/* If one CB failed return false */
|
||||
softbuffer->tb_crc = softbuffer->cb_crc[i];
|
||||
}
|
||||
if (softbuffer->tb_crc) {
|
||||
for (int i = 0; i < nof_cb; i++) {
|
||||
memcpy(&data[i * rlen / 8], softbuffer->data[i], rlen/8 * sizeof(uint8_t));
|
||||
} while (cb_noi < q->max_iterations && !early_stop);
|
||||
|
||||
INFO("CB %d: rp=%d, n_e=%d, cb_len=%d, CRC=%s, rlen=%d, iterations=%d/%d\n",
|
||||
cb_idx, rp, n_e2, cb_len, early_stop?"OK":"KO", rlen, cb_noi, q->max_iterations);
|
||||
|
||||
} else {
|
||||
// Copy decoded data from previous transmissions
|
||||
uint32_t cb_len = cb_idx<cb_segm->C1?cb_segm->K1:cb_segm->K2;
|
||||
uint32_t rlen = cb_segm->C==1?cb_len:(cb_len-24);
|
||||
memcpy(&data[cb_idx*rlen/8], softbuffer->data[cb_idx], rlen/8 * sizeof(uint8_t));
|
||||
}
|
||||
}
|
||||
|
||||
q->nof_iterations /= nof_cb;
|
||||
softbuffer->tb_crc = true;
|
||||
for (int i = 0; i < cb_segm->C && softbuffer->tb_crc; i++) {
|
||||
/* If one CB failed return false */
|
||||
softbuffer->tb_crc = softbuffer->cb_crc[i];
|
||||
}
|
||||
// If TB CRC failed, save correct CB for next retransmission
|
||||
if (!softbuffer->tb_crc) {
|
||||
for (int i = 0; i < cb_segm->C; i++) {
|
||||
if (softbuffer->cb_crc[i]) {
|
||||
uint32_t cb_len = i<cb_segm->C1?cb_segm->K1:cb_segm->K2;
|
||||
uint32_t rlen = cb_segm->C==1?cb_len:(cb_len-24);
|
||||
memcpy(softbuffer->data[i], &data[i * rlen / 8], rlen/8 * sizeof(uint8_t));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
q->nof_iterations /= cb_segm->C;
|
||||
return softbuffer->tb_crc;
|
||||
}
|
||||
|
||||
|
@ -484,18 +462,14 @@ static int decode_tb(srslte_sch_t *q,
|
|||
}
|
||||
|
||||
bool crc_ok = true;
|
||||
|
||||
uint32_t nof_cb_groups = cb_segm->C2>0?2:1;
|
||||
|
||||
|
||||
data[cb_segm->tbs/8+0] = 0;
|
||||
data[cb_segm->tbs/8+1] = 0;
|
||||
data[cb_segm->tbs/8+2] = 0;
|
||||
|
||||
// Process Codeblocks in groups of equal CB size to parallelize according to SRSLTE_TDEC_MAX_NPAR
|
||||
for (uint32_t i=0;i<nof_cb_groups && crc_ok;i++) {
|
||||
crc_ok = decode_tb_cb(q, softbuffer, cb_segm, Qm, rv, nof_e_bits, e_bits, data, i);
|
||||
}
|
||||
|
||||
// Process Codeblocks
|
||||
crc_ok = decode_tb_cb(q, softbuffer, cb_segm, Qm, rv, nof_e_bits, e_bits, data);
|
||||
|
||||
if (crc_ok) {
|
||||
|
||||
uint32_t par_rx = 0, par_tx = 0;
|
||||
|
@ -593,23 +567,287 @@ static void ulsch_interleave_gen(uint32_t H_prime_total, uint32_t N_pusch_symbs,
|
|||
}
|
||||
}
|
||||
|
||||
static void ulsch_interleave_qm2(const uint8_t *g_bits, uint32_t rows, uint32_t cols, uint8_t *q_bits, uint32_t ri_min_row, const uint8_t *ri_present) {
|
||||
uint32_t bit_read_idx = 0;
|
||||
|
||||
for (uint32_t j = 0; j < ri_min_row; j++) {
|
||||
for (uint32_t i = 0; i < cols; i++) {
|
||||
uint32_t k = (i * rows + j) * 2;
|
||||
|
||||
uint32_t read_byte_idx = bit_read_idx / 8;
|
||||
uint32_t read_bit_idx = bit_read_idx % 8;
|
||||
uint32_t write_byte_idx = k / 8;
|
||||
uint32_t write_bit_idx = k % 8;
|
||||
uint8_t w = (g_bits[read_byte_idx] >> (6 - read_bit_idx)) & (uint8_t) 0x03;
|
||||
q_bits[write_byte_idx] |= w << (6 - write_bit_idx);
|
||||
|
||||
bit_read_idx += 2;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t j = ri_min_row; j < rows; j++) {
|
||||
for (uint32_t i = 0; i < cols; i++) {
|
||||
uint32_t k = (i * rows + j) * 2;
|
||||
|
||||
if (ri_present[k]) {
|
||||
/* do nothing */
|
||||
} else {
|
||||
uint32_t read_byte_idx = bit_read_idx / 8;
|
||||
uint32_t read_bit_idx = bit_read_idx % 8;
|
||||
uint32_t write_byte_idx = k / 8;
|
||||
uint32_t write_bit_idx = k % 8;
|
||||
uint8_t w = (g_bits[read_byte_idx] >> (6 - read_bit_idx)) & (uint8_t) 0x03;
|
||||
q_bits[write_byte_idx] |= w << (6 - write_bit_idx);
|
||||
|
||||
bit_read_idx += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ulsch_interleave_qm4(uint8_t *g_bits, uint32_t rows, uint32_t cols, uint8_t *q_bits, uint32_t ri_min_row, const uint8_t *ri_present) {
|
||||
uint32_t bit_read_idx = 0;
|
||||
|
||||
for (uint32_t j = 0; j < ri_min_row; j++) {
|
||||
int32_t i = 0;
|
||||
|
||||
#ifndef LV_HAVE_SSE
|
||||
__m128i _counter = _mm_slli_epi32(_mm_add_epi32(_mm_mullo_epi32(_counter0,_rows),_mm_set1_epi32(j)), 2);
|
||||
uint8_t *_g_bits = &g_bits[bit_read_idx/8];
|
||||
|
||||
/* First bits are aligned to byte */
|
||||
if (0 == (bit_read_idx & 0x3)) {
|
||||
for (; i < (cols - 3); i += 4) {
|
||||
|
||||
uint8_t w1 = *(_g_bits++);
|
||||
uint8_t w2 = *(_g_bits++);
|
||||
|
||||
__m128i _write_byte_idx = _mm_srli_epi32(_counter, 3);
|
||||
__m128i _write_bit_idx = _mm_and_si128(_counter, _7);
|
||||
__m128i _write_shift = _mm_sub_epi32(_4, _write_bit_idx);
|
||||
|
||||
q_bits[_mm_extract_epi32(_write_byte_idx, 0)] |= (w1 >> 0x4) << _mm_extract_epi32(_write_shift, 0);
|
||||
q_bits[_mm_extract_epi32(_write_byte_idx, 1)] |= (w1 & 0xf) << _mm_extract_epi32(_write_shift, 1);
|
||||
q_bits[_mm_extract_epi32(_write_byte_idx, 2)] |= (w2 >> 0x4) << _mm_extract_epi32(_write_shift, 2);
|
||||
q_bits[_mm_extract_epi32(_write_byte_idx, 3)] |= (w2 & 0xf) << _mm_extract_epi32(_write_shift, 3);
|
||||
_counter = _mm_add_epi32(_counter, _inc);
|
||||
}
|
||||
} else {
|
||||
for (; i < (cols - 3); i += 4) {
|
||||
__m128i _write_byte_idx = _mm_srli_epi32(_counter, 3);
|
||||
__m128i _write_bit_idx = _mm_and_si128(_counter, _7);
|
||||
__m128i _write_shift = _mm_sub_epi32(_4, _write_bit_idx);
|
||||
|
||||
uint8_t w1 = *(_g_bits);
|
||||
uint8_t w2 = *(_g_bits++);
|
||||
uint8_t w3 = *(_g_bits++);
|
||||
q_bits[_mm_extract_epi32(_write_byte_idx, 0)] |= (w1 & 0xf) << _mm_extract_epi32(_write_shift, 0);
|
||||
q_bits[_mm_extract_epi32(_write_byte_idx, 1)] |= (w2 >> 0x4) << _mm_extract_epi32(_write_shift, 1);
|
||||
q_bits[_mm_extract_epi32(_write_byte_idx, 2)] |= (w2 & 0xf) << _mm_extract_epi32(_write_shift, 2);
|
||||
q_bits[_mm_extract_epi32(_write_byte_idx, 3)] |= (w3 >> 0x4) << _mm_extract_epi32(_write_shift, 3);
|
||||
|
||||
_counter = _mm_add_epi32(_counter, _inc);
|
||||
}
|
||||
}
|
||||
bit_read_idx += i * 4;
|
||||
#endif /* LV_HAVE_SSE */
|
||||
|
||||
/* Spare bits */
|
||||
for (; i < cols; i++) {
|
||||
uint32_t k = (i * rows + j) * 4;
|
||||
|
||||
uint32_t read_byte_idx = bit_read_idx / 8;
|
||||
uint32_t read_bit_idx = bit_read_idx % 8;
|
||||
uint32_t write_byte_idx = k / 8;
|
||||
uint32_t write_bit_idx = k % 8;
|
||||
uint8_t w = (g_bits[read_byte_idx] >> (4 - read_bit_idx)) & (uint8_t) 0x0f;
|
||||
q_bits[write_byte_idx] |= w << (4 - write_bit_idx);
|
||||
|
||||
bit_read_idx += 4;
|
||||
}
|
||||
}
|
||||
|
||||
/* Do rows containing RI */
|
||||
for (uint32_t j = ri_min_row; j < rows; j++) {
|
||||
for (uint32_t i = 0; i < cols; i++) {
|
||||
uint32_t k = (i * rows + j) * 4;
|
||||
|
||||
if (ri_present[k]) {
|
||||
/* do nothing */
|
||||
} else {
|
||||
uint32_t read_byte_idx = bit_read_idx / 8;
|
||||
uint32_t read_bit_idx = bit_read_idx % 8;
|
||||
uint32_t write_byte_idx = k / 8;
|
||||
uint32_t write_bit_idx = k % 8;
|
||||
uint8_t w = (g_bits[read_byte_idx] >> (4 - read_bit_idx)) & (uint8_t) 0x0f;
|
||||
q_bits[write_byte_idx] |= w << (4 - write_bit_idx);
|
||||
|
||||
bit_read_idx += 4;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static void ulsch_interleave_qm6(const uint8_t *g_bits,
|
||||
uint32_t rows,
|
||||
uint32_t cols,
|
||||
uint8_t *q_bits,
|
||||
uint32_t ri_min_row,
|
||||
const uint8_t *ri_present) {
|
||||
uint32_t bit_read_idx = 0;
|
||||
|
||||
for (uint32_t j = 0; j < ri_min_row; j++) {
|
||||
for (uint32_t i = 0; i < cols; i++) {
|
||||
uint32_t k = (i * rows + j) * 6;
|
||||
|
||||
uint32_t read_byte_idx = bit_read_idx / 8;
|
||||
uint32_t read_bit_idx = bit_read_idx % 8;
|
||||
uint32_t write_byte_idx = k / 8;
|
||||
uint32_t write_bit_idx = k % 8;
|
||||
uint8_t w;
|
||||
|
||||
switch (read_bit_idx) {
|
||||
case 0:
|
||||
w = g_bits[read_byte_idx] >> 2;
|
||||
break;
|
||||
case 2:
|
||||
w = g_bits[read_byte_idx] & (uint8_t) 0x3f;
|
||||
break;
|
||||
case 4:
|
||||
w = ((g_bits[read_byte_idx] << 2) | (g_bits[read_byte_idx + 1] >> 6)) & (uint8_t) 0x3f;
|
||||
break;
|
||||
case 6:
|
||||
w = ((g_bits[read_byte_idx] << 4) | (g_bits[read_byte_idx + 1] >> 4)) & (uint8_t) 0x3f;
|
||||
break;
|
||||
default:
|
||||
w = 0;
|
||||
}
|
||||
|
||||
switch (write_bit_idx) {
|
||||
case 0:
|
||||
q_bits[write_byte_idx] |= w << 2;
|
||||
break;
|
||||
case 2:
|
||||
q_bits[write_byte_idx] |= w;
|
||||
break;
|
||||
case 4:
|
||||
q_bits[write_byte_idx] |= w >> 2;
|
||||
q_bits[write_byte_idx + 1] |= w << 6;
|
||||
break;
|
||||
case 6:
|
||||
q_bits[write_byte_idx] |= w >> 4;
|
||||
q_bits[write_byte_idx + 1] |= w << 4;
|
||||
break;
|
||||
default:
|
||||
/* Do nothing */;
|
||||
}
|
||||
|
||||
bit_read_idx += 6;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t j = ri_min_row; j < rows; j++) {
|
||||
for (uint32_t i = 0; i < cols; i++) {
|
||||
uint32_t k = (i * rows + j) * 6;
|
||||
|
||||
if (ri_present[k]) {
|
||||
/* do nothing */
|
||||
} else {
|
||||
uint32_t read_byte_idx = bit_read_idx / 8;
|
||||
uint32_t read_bit_idx = bit_read_idx % 8;
|
||||
uint32_t write_byte_idx = k / 8;
|
||||
uint32_t write_bit_idx = k % 8;
|
||||
uint8_t w;
|
||||
|
||||
switch (read_bit_idx) {
|
||||
case 0:
|
||||
w = g_bits[read_byte_idx] >> 2;
|
||||
break;
|
||||
case 2:
|
||||
w = g_bits[read_byte_idx] & (uint8_t) 0x3f;
|
||||
break;
|
||||
case 4:
|
||||
w = ((g_bits[read_byte_idx] << 2) | (g_bits[read_byte_idx + 1] >> 6)) & (uint8_t) 0x3f;
|
||||
break;
|
||||
case 6:
|
||||
w = ((g_bits[read_byte_idx] << 4) | (g_bits[read_byte_idx + 1] >> 4)) & (uint8_t) 0x3f;
|
||||
break;
|
||||
default:
|
||||
w = 0;
|
||||
}
|
||||
|
||||
switch (write_bit_idx) {
|
||||
case 0:
|
||||
q_bits[write_byte_idx] |= w << 2;
|
||||
break;
|
||||
case 2:
|
||||
q_bits[write_byte_idx] |= w;
|
||||
break;
|
||||
case 4:
|
||||
q_bits[write_byte_idx] |= w >> 2;
|
||||
q_bits[write_byte_idx + 1] |= w << 6;
|
||||
break;
|
||||
case 6:
|
||||
q_bits[write_byte_idx] |= w >> 4;
|
||||
q_bits[write_byte_idx + 1] |= w << 4;
|
||||
break;
|
||||
default:
|
||||
/* Do nothing */;
|
||||
}
|
||||
|
||||
bit_read_idx += 6;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* UL-SCH channel interleaver according to 5.2.2.8 of 36.212 */
|
||||
void ulsch_interleave(uint8_t *g_bits, uint32_t Qm, uint32_t H_prime_total,
|
||||
uint32_t N_pusch_symbs, uint8_t *q_bits, srslte_uci_bit_t *ri_bits, uint32_t nof_ri_bits,
|
||||
uint8_t *ri_present, uint32_t *inteleaver_lut)
|
||||
{
|
||||
|
||||
|
||||
const uint32_t nof_bits = H_prime_total * Qm;
|
||||
uint32_t rows = H_prime_total / N_pusch_symbs;
|
||||
uint32_t cols = N_pusch_symbs;
|
||||
uint32_t ri_min_row = rows;
|
||||
|
||||
// Prepare ri_bits for fast search using temp_buffer
|
||||
if (nof_ri_bits > 0) {
|
||||
for (uint32_t i=0;i<nof_ri_bits;i++) {
|
||||
ri_present[ri_bits[i].position] = 1;
|
||||
uint32_t ri_row = (ri_bits[i].position / Qm) % rows;
|
||||
|
||||
if (ri_row < ri_min_row) {
|
||||
ri_min_row = ri_row;
|
||||
}
|
||||
|
||||
ri_present[ri_bits[i].position] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if 1
|
||||
bzero(q_bits, nof_bits / 8);
|
||||
switch (Qm) {
|
||||
case 2:
|
||||
ulsch_interleave_qm2(g_bits, rows, cols, q_bits, ri_min_row, ri_present);
|
||||
break;
|
||||
case 4:
|
||||
ulsch_interleave_qm4(g_bits, rows, cols, q_bits, ri_min_row, ri_present);
|
||||
break;
|
||||
case 6:
|
||||
ulsch_interleave_qm6(g_bits, rows, cols, q_bits, ri_min_row, ri_present);
|
||||
break;
|
||||
default:
|
||||
/* This line should never be reached */
|
||||
fprintf(stderr, "Wrong Qm (%d)\n", Qm);
|
||||
}
|
||||
#else
|
||||
// Genearate interleaver table and interleave bits
|
||||
ulsch_interleave_gen(H_prime_total, N_pusch_symbs, Qm, ri_present, inteleaver_lut);
|
||||
srslte_bit_interleave_i(g_bits, q_bits, inteleaver_lut, H_prime_total*Qm);
|
||||
|
||||
#endif
|
||||
|
||||
// Reset temp_buffer because will be reused next time
|
||||
if (nof_ri_bits > 0) {
|
||||
for (uint32_t i=0;i<nof_ri_bits;i++) {
|
||||
|
|
|
@ -62,19 +62,24 @@ uint32_t nof_rx_antennas = 1;
|
|||
bool tb_cw_swap = false;
|
||||
bool enable_coworker = false;
|
||||
uint32_t pmi = 0;
|
||||
char *input_file = NULL;
|
||||
char *input_file = NULL;
|
||||
int M=1;
|
||||
|
||||
bool use_8_bit = false;
|
||||
|
||||
void usage(char *prog) {
|
||||
printf("Usage: %s [fmMcsrtRFpnwav] \n", prog);
|
||||
printf("Usage: %s [fmMbcsrtRFpnwav] \n", prog);
|
||||
printf("\t-f read signal from file [Default generate it with pdsch_encode()]\n");
|
||||
printf("\t-m MCS [Default %d]\n", mcs[0]);
|
||||
printf("\t-M MCS2 [Default %d]\n", mcs[1]);
|
||||
printf("\t-c cell id [Default %d]\n", cell.id);
|
||||
printf("\t-b Use 8-bit LLR [Default 16-bit]\n");
|
||||
printf("\t-s subframe [Default %d]\n", subframe);
|
||||
printf("\t-r rv_idx [Default %d]\n", rv_idx[0]);
|
||||
printf("\t-t rv_idx2 [Default %d]\n", rv_idx[1]);
|
||||
printf("\t-R rnti [Default %d]\n", rnti);
|
||||
printf("\t-F cfi [Default %d]\n", cfi);
|
||||
printf("\t-X Number of repetitions for time measurement [Default %d]\n", M);
|
||||
printf("\t-x Transmission mode [single|diversity|cdd|multiplex] [Default %s]\n", mimo_type_str);
|
||||
printf("\t-n cell.nof_prb [Default %d]\n", cell.nof_prb);
|
||||
printf("\t-a nof_rx_antennas [Default %d]\n", nof_rx_antennas);
|
||||
|
@ -86,7 +91,7 @@ void usage(char *prog) {
|
|||
|
||||
void parse_args(int argc, char **argv) {
|
||||
int opt;
|
||||
while ((opt = getopt(argc, argv, "fmMcsrtRFpnawvxj")) != -1) {
|
||||
while ((opt = getopt(argc, argv, "fmMcsbrtRFpnawvXxj")) != -1) {
|
||||
switch(opt) {
|
||||
case 'f':
|
||||
input_file = argv[optind];
|
||||
|
@ -94,12 +99,18 @@ void parse_args(int argc, char **argv) {
|
|||
case 'm':
|
||||
mcs[0] = (uint32_t) atoi(argv[optind]);
|
||||
break;
|
||||
case 'b':
|
||||
use_8_bit = true;
|
||||
break;
|
||||
case 'M':
|
||||
mcs[1] = (uint32_t) atoi(argv[optind]);
|
||||
break;
|
||||
case 's':
|
||||
subframe = atoi(argv[optind]);
|
||||
break;
|
||||
case 'X':
|
||||
M = (uint32_t) atoi(argv[optind]);
|
||||
break;
|
||||
case 'r':
|
||||
rv_idx[0] = (uint32_t) atoi(argv[optind]);
|
||||
break;
|
||||
|
@ -166,7 +177,6 @@ int main(int argc, char **argv) {
|
|||
int ret = -1;
|
||||
struct timeval t[3];
|
||||
srslte_softbuffer_tx_t *softbuffers_tx[SRSLTE_MAX_CODEWORDS];
|
||||
int M=1;
|
||||
bool acks[SRSLTE_MAX_CODEWORDS] = {false};
|
||||
|
||||
parse_args(argc,argv);
|
||||
|
@ -319,6 +329,9 @@ int main(int argc, char **argv) {
|
|||
goto quit;
|
||||
}
|
||||
|
||||
pdsch_rx.llr_is_8bit = use_8_bit;
|
||||
pdsch_rx.dl_sch.llr_is_8bit = use_8_bit;
|
||||
|
||||
srslte_pdsch_set_rnti(&pdsch_rx, rnti);
|
||||
|
||||
for (i = 0; i < SRSLTE_MAX_CODEWORDS; i++) {
|
||||
|
@ -517,8 +530,8 @@ int main(int argc, char **argv) {
|
|||
for (int byte = 0; byte < grant.mcs[tb].tbs / 8; byte++) {
|
||||
if (data_tx[tb][byte] != data_rx[tb][byte]) {
|
||||
ERROR("Found BYTE (%d) error in TB %d (%02X != %02X), quiting...", byte, tb, data_tx[tb][byte], data_rx[tb][byte]);
|
||||
printf("Tx: "); srslte_vec_fprint_byte(stdout, data_tx[tb], grant.mcs[tb].tbs / 8);
|
||||
printf("Rx: "); srslte_vec_fprint_byte(stdout, data_rx[tb], grant.mcs[tb].tbs / 8);
|
||||
//printf("Tx: "); srslte_vec_fprint_byte(stdout, data_tx[tb], grant.mcs[tb].tbs / 8);
|
||||
//printf("Rx: "); srslte_vec_fprint_byte(stdout, data_rx[tb], grant.mcs[tb].tbs / 8);
|
||||
ret = SRSLTE_ERROR;
|
||||
goto quit;
|
||||
}
|
||||
|
|
|
@ -47,7 +47,12 @@ void srslte_scrambling_s(srslte_sequence_t *s, short *data) {
|
|||
|
||||
void srslte_scrambling_s_offset(srslte_sequence_t *s, short *data, int offset, int len) {
|
||||
assert (len + offset <= s->cur_len);
|
||||
srslte_vec_prod_sss(data, &s->c_short[offset], data, len);
|
||||
srslte_vec_neg_sss(data, &s->c_short[offset], data, len);
|
||||
}
|
||||
|
||||
void srslte_scrambling_sb_offset(srslte_sequence_t *s, int8_t *data, int offset, int len) {
|
||||
assert (len + offset <= s->cur_len);
|
||||
srslte_vec_neg_bbb(data, &s->c_char[offset], data, len);
|
||||
}
|
||||
|
||||
void srslte_scrambling_c(srslte_sequence_t *s, cf_t *data) {
|
||||
|
|
|
@ -177,6 +177,83 @@ int main(int argc, char **argv) {
|
|||
|
||||
free(input_f);
|
||||
free(scrambled_f);
|
||||
|
||||
int16_t *input_s, *scrambled_s;
|
||||
|
||||
// Scramble also shorts
|
||||
input_s= malloc(sizeof(int16_t) * seq.cur_len);
|
||||
if (!input_s) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
scrambled_s = malloc(sizeof(int16_t) * seq.cur_len);
|
||||
if (!scrambled_s) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
for (i=0;i<seq.cur_len;i++) {
|
||||
input_s[i] = 100*(rand()/RAND_MAX)-50;
|
||||
scrambled_s[i] = input_s[i];
|
||||
}
|
||||
|
||||
gettimeofday(&t[1], NULL);
|
||||
srslte_scrambling_s(&seq, scrambled_s);
|
||||
gettimeofday(&t[2], NULL);
|
||||
srslte_scrambling_s(&seq, scrambled_s);
|
||||
|
||||
|
||||
get_time_interval(t);
|
||||
printf("Texec short=%ld us for %d bits\n", t[0].tv_usec, seq.cur_len);
|
||||
|
||||
for (i=0;i<seq.cur_len;i++) {
|
||||
if (scrambled_s[i] != input_s[i]) {
|
||||
printf("Error in %d\n", i);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
free(input_s);
|
||||
free(scrambled_s);
|
||||
|
||||
|
||||
int8_t *input_b, *scrambled_b;
|
||||
|
||||
// Scramble also bytes
|
||||
input_b= malloc(sizeof(int8_t) * seq.cur_len);
|
||||
if (!input_b) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
scrambled_b = malloc(sizeof(int8_t) * seq.cur_len);
|
||||
if (!scrambled_b) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
for (i=0;i<seq.cur_len;i++) {
|
||||
input_b[i] = 100*(rand()/RAND_MAX)-50;
|
||||
scrambled_b[i] = input_b[i];
|
||||
}
|
||||
|
||||
gettimeofday(&t[1], NULL);
|
||||
srslte_scrambling_sb_offset(&seq, scrambled_b, 0, seq.cur_len);
|
||||
gettimeofday(&t[2], NULL);
|
||||
srslte_scrambling_sb_offset(&seq, scrambled_b, 0, seq.cur_len);
|
||||
|
||||
|
||||
get_time_interval(t);
|
||||
printf("Texec char=%ld us for %d bits\n", t[0].tv_usec, seq.cur_len);
|
||||
|
||||
for (i=0;i<seq.cur_len;i++) {
|
||||
if (scrambled_b[i] != input_b[i]) {
|
||||
printf("Error in %d\n", i);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
free(input_b);
|
||||
free(scrambled_b);
|
||||
}
|
||||
printf("Ok\n");
|
||||
srslte_sequence_free(&seq);
|
||||
|
|
|
@ -58,7 +58,11 @@ void srslte_vec_sub_sss(const int16_t *x, const int16_t *y, int16_t *z, const ui
|
|||
srslte_vec_sub_sss_simd(x, y, z, len);
|
||||
}
|
||||
|
||||
// Noise estimation in chest_dl, interpolation
|
||||
void srslte_vec_sub_bbb(const int8_t *x, const int8_t *y, int8_t *z, const uint32_t len) {
|
||||
srslte_vec_sub_bbb_simd(x, y, z, len);
|
||||
}
|
||||
|
||||
// Noise estimation in chest_dl, interpolation
|
||||
void srslte_vec_sub_ccc(const cf_t *x, const cf_t *y, cf_t *z, const uint32_t len) {
|
||||
return srslte_vec_sub_fff((const float*) x,(const float*) y,(float*) z, 2*len);
|
||||
}
|
||||
|
@ -100,10 +104,18 @@ void srslte_vec_convert_fi(const float *x, const float scale, int16_t *z, const
|
|||
srslte_vec_convert_fi_simd(x, z, scale, len);
|
||||
}
|
||||
|
||||
void srslte_vec_convert_fb(const float *x, const float scale, int8_t *z, const uint32_t len) {
|
||||
srslte_vec_convert_fb_simd(x, z, scale, len);
|
||||
}
|
||||
|
||||
void srslte_vec_lut_sss(const short *x, const unsigned short *lut, short *y, const uint32_t len) {
|
||||
srslte_vec_lut_sss_simd(x, lut, y, len);
|
||||
}
|
||||
|
||||
void srslte_vec_lut_bbb(const int8_t *x, const unsigned short *lut, int8_t *y, const uint32_t len) {
|
||||
srslte_vec_lut_bbb_simd(x, lut, y, len);
|
||||
}
|
||||
|
||||
void srslte_vec_lut_sis(const short *x, const unsigned int *lut, short *y, const uint32_t len) {
|
||||
for (int i=0; i < len; i++) {
|
||||
y[lut[i]] = x[i];
|
||||
|
@ -163,6 +175,15 @@ void srslte_vec_fprint_b(FILE *stream, uint8_t *x, const uint32_t len) {
|
|||
fprintf(stream, "];\n");
|
||||
}
|
||||
|
||||
void srslte_vec_fprint_bs(FILE *stream, int8_t *x, const uint32_t len) {
|
||||
int i;
|
||||
fprintf(stream, "[");
|
||||
for (i=0;i<len;i++) {
|
||||
fprintf(stream, "%4d, ", x[i]);
|
||||
}
|
||||
fprintf(stream, "];\n");
|
||||
}
|
||||
|
||||
void srslte_vec_fprint_byte(FILE *stream, uint8_t *x, const uint32_t len) {
|
||||
int i;
|
||||
fprintf(stream, "[");
|
||||
|
@ -185,7 +206,7 @@ void srslte_vec_fprint_s(FILE *stream, short *x, const uint32_t len) {
|
|||
int i;
|
||||
fprintf(stream, "[");
|
||||
for (i=0;i<len;i++) {
|
||||
fprintf(stream, "%d, ", x[i]);
|
||||
fprintf(stream, "%4d, ", x[i]);
|
||||
}
|
||||
fprintf(stream, "];\n");
|
||||
}
|
||||
|
@ -271,11 +292,18 @@ void srslte_vec_prod_fff(const float *x, const float *y, float *z, const uint32_
|
|||
srslte_vec_prod_fff_simd(x, y, z, len);
|
||||
}
|
||||
|
||||
// Scrambling Short
|
||||
void srslte_vec_prod_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len) {
|
||||
srslte_vec_prod_sss_simd(x,y,z,len);
|
||||
}
|
||||
|
||||
// Scrambling
|
||||
void srslte_vec_neg_sss(const int16_t *x, const int16_t *y, int16_t *z, const uint32_t len) {
|
||||
srslte_vec_neg_sss_simd(x,y,z,len);
|
||||
}
|
||||
void srslte_vec_neg_bbb(const int8_t *x, const int8_t *y, int8_t *z, const uint32_t len) {
|
||||
srslte_vec_neg_bbb_simd(x,y,z,len);
|
||||
}
|
||||
|
||||
// CFO and OFDM processing
|
||||
void srslte_vec_prod_ccc(const cf_t *x, const cf_t *y, cf_t *z, const uint32_t len) {
|
||||
srslte_vec_prod_ccc_simd(x,y,z,len);
|
||||
|
|
|
@ -162,6 +162,35 @@ void srslte_vec_sub_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, con
|
|||
}
|
||||
}
|
||||
|
||||
void srslte_vec_sub_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const int len) {
|
||||
int i = 0;
|
||||
#if SRSLTE_SIMD_B_SIZE
|
||||
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
|
||||
for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
|
||||
simd_s_t a = srslte_simd_b_load(&x[i]);
|
||||
simd_s_t b = srslte_simd_b_load(&y[i]);
|
||||
|
||||
simd_s_t r = srslte_simd_b_sub(a, b);
|
||||
|
||||
srslte_simd_b_store(&z[i], r);
|
||||
}
|
||||
} else {
|
||||
for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) {
|
||||
simd_s_t a = srslte_simd_b_loadu(&x[i]);
|
||||
simd_s_t b = srslte_simd_b_loadu(&y[i]);
|
||||
|
||||
simd_s_t r = srslte_simd_b_sub(a, b);
|
||||
|
||||
srslte_simd_b_storeu(&z[i], r);
|
||||
}
|
||||
}
|
||||
#endif /* SRSLTE_SIMD_S_SIZE */
|
||||
|
||||
for(; i < len; i++){
|
||||
z[i] = x[i] - y[i];
|
||||
}
|
||||
}
|
||||
|
||||
void srslte_vec_prod_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len) {
|
||||
int i = 0;
|
||||
#if SRSLTE_SIMD_S_SIZE
|
||||
|
@ -191,12 +220,69 @@ void srslte_vec_prod_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, co
|
|||
}
|
||||
}
|
||||
|
||||
void srslte_vec_neg_sss_simd(const int16_t *x, const int16_t *y, int16_t *z, const int len) {
|
||||
int i = 0;
|
||||
#if SRSLTE_SIMD_S_SIZE
|
||||
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
|
||||
for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) {
|
||||
simd_s_t a = srslte_simd_s_load(&x[i]);
|
||||
simd_s_t b = srslte_simd_s_load(&y[i]);
|
||||
|
||||
simd_s_t r = srslte_simd_s_neg(a, b);
|
||||
|
||||
srslte_simd_s_store(&z[i], r);
|
||||
}
|
||||
} else {
|
||||
for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) {
|
||||
simd_s_t a = srslte_simd_s_loadu(&x[i]);
|
||||
simd_s_t b = srslte_simd_s_loadu(&y[i]);
|
||||
|
||||
simd_s_t r = srslte_simd_s_neg(a, b);
|
||||
|
||||
srslte_simd_s_storeu(&z[i], r);
|
||||
}
|
||||
}
|
||||
#endif /* SRSLTE_SIMD_S_SIZE */
|
||||
|
||||
for(; i < len; i++){
|
||||
z[i] = y[i]<0?-x[i]:x[i];
|
||||
}
|
||||
}
|
||||
|
||||
void srslte_vec_neg_bbb_simd(const int8_t *x, const int8_t *y, int8_t *z, const int len) {
|
||||
int i = 0;
|
||||
#if SRSLTE_SIMD_B_SIZE
|
||||
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
|
||||
for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
|
||||
simd_s_t a = srslte_simd_b_load(&x[i]);
|
||||
simd_s_t b = srslte_simd_b_load(&y[i]);
|
||||
|
||||
simd_s_t r = srslte_simd_b_neg(a, b);
|
||||
|
||||
srslte_simd_b_store(&z[i], r);
|
||||
}
|
||||
} else {
|
||||
for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
|
||||
simd_s_t a = srslte_simd_b_loadu(&x[i]);
|
||||
simd_s_t b = srslte_simd_b_loadu(&y[i]);
|
||||
|
||||
simd_s_t r = srslte_simd_b_neg(a, b);
|
||||
|
||||
srslte_simd_b_storeu(&z[i], r);
|
||||
}
|
||||
}
|
||||
#endif /* SRSLTE_SIMD_S_SIZE */
|
||||
|
||||
for(; i < len; i++){
|
||||
z[i] = y[i]<0?-x[i]:x[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* No improvement with AVX */
|
||||
void srslte_vec_lut_sss_simd(const short *x, const unsigned short *lut, short *y, const int len) {
|
||||
int i = 0;
|
||||
#ifdef LV_HAVE_SSE
|
||||
#if CMAKE_BUILD_TYPE!=Debug
|
||||
|
||||
#ifndef DEBUG_MODE
|
||||
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(lut)) {
|
||||
for (; i < len - 7; i += 8) {
|
||||
__m128i xVal = _mm_load_si128((__m128i *) &x[i]);
|
||||
|
@ -228,6 +314,53 @@ void srslte_vec_lut_sss_simd(const short *x, const unsigned short *lut, short *y
|
|||
}
|
||||
}
|
||||
|
||||
void srslte_vec_lut_bbb_simd(const int8_t *x, const unsigned short *lut, int8_t *y, const int len) {
|
||||
int i = 0;
|
||||
#ifdef LV_HAVE_SSE
|
||||
#ifndef DEBUG_MODE
|
||||
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(lut)) {
|
||||
for (; i < len - 15; i += 16) {
|
||||
__m128i xVal = _mm_load_si128((__m128i *) &x[i]);
|
||||
__m128i lutVal1 = _mm_load_si128((__m128i *) &lut[i]);
|
||||
__m128i lutVal2 = _mm_load_si128((__m128i *) &lut[i+8]);
|
||||
|
||||
for (int k = 0; k < 8; k++) {
|
||||
int8_t x = (int8_t) _mm_extract_epi8(xVal, k);
|
||||
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal1, k);
|
||||
y[l] = (char) x;
|
||||
}
|
||||
for (int k = 0; k < 8; k++) {
|
||||
int8_t x = (int8_t) _mm_extract_epi8(xVal, k+8);
|
||||
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal2, k);
|
||||
y[l] = (char) x;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (; i < len - 15; i += 16) {
|
||||
__m128i xVal = _mm_loadu_si128((__m128i *) &x[i]);
|
||||
__m128i lutVal1 = _mm_loadu_si128((__m128i *) &lut[i]);
|
||||
__m128i lutVal2 = _mm_loadu_si128((__m128i *) &lut[i+8]);
|
||||
|
||||
for (int k = 0; k < 8; k++) {
|
||||
int8_t x = (int8_t) _mm_extract_epi8(xVal, k);
|
||||
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal1, k);
|
||||
y[l] = (char) x;
|
||||
}
|
||||
for (int k = 0; k < 8; k++) {
|
||||
int8_t x = (int8_t) _mm_extract_epi8(xVal, k+8);
|
||||
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal2, k);
|
||||
y[l] = (char) x;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
for (; i < len; i++) {
|
||||
y[lut[i]] = x[i];
|
||||
}
|
||||
}
|
||||
|
||||
void srslte_vec_convert_if_simd(const int16_t *x, float *z, const float scale, const int len) {
|
||||
int i = 0;
|
||||
const float gain = 1.0f / scale;
|
||||
|
@ -295,6 +428,73 @@ void srslte_vec_convert_fi_simd(const float *x, int16_t *z, const float scale, c
|
|||
}
|
||||
}
|
||||
|
||||
#define SRSLTE_IS_ALIGNED_SSE(PTR) (((size_t)(PTR) & 0x0F) == 0)
|
||||
|
||||
void srslte_vec_convert_fb_simd(const float *x, int8_t *z, const float scale, const int len) {
|
||||
int i = 0;
|
||||
|
||||
// Force the use of SSE here instead of AVX since the implementations requires too many permutes across 128-bit boundaries
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
__m128 s = _mm_set1_ps(scale);
|
||||
if (SRSLTE_IS_ALIGNED_SSE(x) && SRSLTE_IS_ALIGNED_SSE(z)) {
|
||||
for (; i < len - 16 + 1; i += 16) {
|
||||
__m128 a = _mm_load_ps(&x[i]);
|
||||
__m128 b = _mm_load_ps(&x[i + 1*4]);
|
||||
__m128 c = _mm_load_ps(&x[i + 2*4]);
|
||||
__m128 d = _mm_load_ps(&x[i + 3*4]);
|
||||
|
||||
__m128 sa = _mm_mul_ps(a, s);
|
||||
__m128 sb = _mm_mul_ps(b, s);
|
||||
__m128 sc = _mm_mul_ps(c, s);
|
||||
__m128 sd = _mm_mul_ps(d, s);
|
||||
|
||||
__m128i ai = _mm_cvttps_epi32(sa);
|
||||
__m128i bi = _mm_cvttps_epi32(sb);
|
||||
__m128i ci = _mm_cvttps_epi32(sc);
|
||||
__m128i di = _mm_cvttps_epi32(sd);
|
||||
__m128i ab = _mm_packs_epi32(ai, bi);
|
||||
__m128i cd = _mm_packs_epi32(ci, di);
|
||||
|
||||
__m128i i8 =_mm_packs_epi16(ab, cd);
|
||||
|
||||
_mm_store_si128((__m128i*)&z[i], i8);
|
||||
}
|
||||
} else {
|
||||
for (; i < len - 16 + 1; i += 16) {
|
||||
__m128 a = _mm_load_ps(&x[i]);
|
||||
__m128 b = _mm_load_ps(&x[i + 1*4]);
|
||||
__m128 c = _mm_load_ps(&x[i + 2*4]);
|
||||
__m128 d = _mm_load_ps(&x[i + 3*4]);
|
||||
|
||||
__m128 sa = _mm_mul_ps(a, s);
|
||||
__m128 sb = _mm_mul_ps(b, s);
|
||||
__m128 sc = _mm_mul_ps(c, s);
|
||||
__m128 sd = _mm_mul_ps(d, s);
|
||||
|
||||
__m128i ai = _mm_cvttps_epi32(sa);
|
||||
__m128i bi = _mm_cvttps_epi32(sb);
|
||||
__m128i ci = _mm_cvttps_epi32(sc);
|
||||
__m128i di = _mm_cvttps_epi32(sd);
|
||||
__m128i ab = _mm_packs_epi32(ai, bi);
|
||||
__m128i cd = _mm_packs_epi32(ci, di);
|
||||
|
||||
__m128i i8 =_mm_packs_epi16(ab, cd);
|
||||
|
||||
_mm_storeu_si128((__m128i*)&z[i], i8);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
#warning srslte_vec_convert_fb_simd not implemented in neon
|
||||
#endif /* HAVE_NEON */
|
||||
|
||||
for(; i < len; i++){
|
||||
z[i] = (int8_t) (x[i] * scale);
|
||||
}
|
||||
}
|
||||
|
||||
float srslte_vec_acc_ff_simd(const float *x, const int len) {
|
||||
int i = 0;
|
||||
float acc_sum = 0.0f;
|
||||
|
|
|
@ -141,18 +141,20 @@ nof_ctrl_symbols = 3
|
|||
#####################################################################
|
||||
# Expert configuration options
|
||||
#
|
||||
# pdsch_max_its: Maximum number of turbo decoder iterations (Default 4)
|
||||
# pusch_max_its: Maximum number of turbo decoder iterations (Default 4)
|
||||
# pusch_8bit_decoder: Use 8-bit for LLR representation and turbo decoder trellis computation (Experimental)
|
||||
# nof_phy_threads: Selects the number of PHY threads (maximum 4, minimum 1, default 2)
|
||||
# metrics_period_secs: Sets the period at which metrics are requested from the UE.
|
||||
# pregenerate_signals: Pregenerate uplink signals after attach. Improves CPU performance.
|
||||
# tx_amplitude: Transmit amplitude factor (set 0-1 to reduce PAPR)
|
||||
# link_failure_nof_err: Number of PUSCH failures after which a radio-link failure is triggered.
|
||||
# a link failure is when SNR<0 and CRC=KO
|
||||
# max_prach_offset_us: Maximum allowed RACH offset (in us)
|
||||
# max_prach_offset_us: Maximum allowed RACH offset (in us)
|
||||
#
|
||||
#####################################################################
|
||||
[expert]
|
||||
#pdsch_max_its = 4
|
||||
#pusch_max_its = 8 # These are half iterations
|
||||
#pusch_8bit_decoder = false
|
||||
#nof_phy_threads = 2
|
||||
#pregenerate_signals = false
|
||||
#tx_amplitude = 0.6
|
||||
|
|
|
@ -41,6 +41,7 @@ namespace srsenb {
|
|||
typedef struct {
|
||||
float max_prach_offset_us;
|
||||
int pusch_max_its;
|
||||
bool pusch_8bit_decoder;
|
||||
float tx_amplitude;
|
||||
int nof_phy_threads;
|
||||
std::string equalizer_mode;
|
||||
|
|
|
@ -157,10 +157,14 @@ void parse_args(all_args_t *args, int argc, char* argv[]) {
|
|||
"Pregenerate uplink signals after attach. Improves CPU performance.")
|
||||
|
||||
("expert.pusch_max_its",
|
||||
bpo::value<int>(&args->expert.phy.pusch_max_its)->default_value(4),
|
||||
bpo::value<int>(&args->expert.phy.pusch_max_its)->default_value(8),
|
||||
"Maximum number of turbo decoder iterations")
|
||||
|
||||
("expert.tx_amplitude",
|
||||
("expert.pusch_8bit_decoder",
|
||||
bpo::value<bool>(&args->expert.phy.pusch_8bit_decoder)->default_value(false),
|
||||
"Use 8-bit for LLR representation and turbo decoder trellis computation (Experimental)")
|
||||
|
||||
("expert.tx_amplitude",
|
||||
bpo::value<float>(&args->expert.phy.tx_amplitude)->default_value(0.6),
|
||||
"Transmit amplitude factor")
|
||||
|
||||
|
|
|
@ -153,7 +153,11 @@ void phch_worker::init(phch_common* phy_, srslte::log *log_h_)
|
|||
srslte_enb_dl_set_amp(&enb_dl, phy->params.tx_amplitude);
|
||||
|
||||
Info("Worker %d configured cell %d PRB\n", get_id(), phy->cell.nof_prb);
|
||||
|
||||
|
||||
if (phy->params.pusch_8bit_decoder) {
|
||||
enb_ul.pusch.llr_is_8bit = true;
|
||||
enb_ul.pusch.ul_sch.llr_is_8bit = true;
|
||||
}
|
||||
initiated = true;
|
||||
running = true;
|
||||
|
||||
|
|
|
@ -214,7 +214,7 @@ void parse_args(all_args_t *args, int argc, char *argv[]) {
|
|||
"Sets the noise estimation algorithm. (Default refs)")
|
||||
|
||||
("expert.pdsch_max_its",
|
||||
bpo::value<int>(&args->expert.phy.pdsch_max_its)->default_value(4),
|
||||
bpo::value<int>(&args->expert.phy.pdsch_max_its)->default_value(8),
|
||||
"Maximum number of turbo decoder iterations")
|
||||
|
||||
("expert.attach_enable_64qam",
|
||||
|
@ -307,7 +307,11 @@ void parse_args(all_args_t *args, int argc, char *argv[]) {
|
|||
bpo::value<bool>(&args->expert.phy.pdsch_csi_enabled)->default_value(true),
|
||||
"Stores the Channel State Information and uses it for weightening the softbits. It is only used in TM1.")
|
||||
|
||||
("rf_calibration.tx_corr_dc_gain", bpo::value<float>(&args->rf_cal.tx_corr_dc_gain)->default_value(0.0),
|
||||
("expert.pdsch_8bit_decoder",
|
||||
bpo::value<bool>(&args->expert.phy.pdsch_8bit_decoder)->default_value(false),
|
||||
"Use 8-bit for LLR representation and turbo decoder trellis computation (Experimental)")
|
||||
|
||||
("rf_calibration.tx_corr_dc_gain", bpo::value<float>(&args->rf_cal.tx_corr_dc_gain)->default_value(0.0),
|
||||
"TX DC offset gain correction")
|
||||
("rf_calibration.tx_corr_dc_phase", bpo::value<float>(&args->rf_cal.tx_corr_dc_phase)->default_value(0.0),
|
||||
"TX DC offset phase correction")
|
||||
|
|
|
@ -139,6 +139,10 @@ bool phch_worker::init(uint32_t max_prb, srslte::log *log_h, srslte::log *log_ph
|
|||
return false;
|
||||
}
|
||||
|
||||
if (phy->args->pdsch_8bit_decoder) {
|
||||
ue_dl.pdsch.llr_is_8bit = true;
|
||||
ue_dl.pdsch.dl_sch.llr_is_8bit = true;
|
||||
}
|
||||
|
||||
srslte_chest_dl_set_rsrp_neighbour(&ue_dl.chest, true);
|
||||
srslte_chest_dl_average_subframe(&ue_dl.chest, phy->args->average_subframe_enabled);
|
||||
|
|
|
@ -204,6 +204,8 @@ enable = false
|
|||
# pdsch_csi_enabled: Stores the Channel State Information and uses it for weightening the softbits. It is only
|
||||
# used in TM1. It is True by default.
|
||||
#
|
||||
# pdsch_8bit_decoder: Use 8-bit for LLR representation and turbo decoder trellis computation (Experimental)
|
||||
#
|
||||
#####################################################################
|
||||
[expert]
|
||||
#ip_netmask = 255.255.255.0
|
||||
|
@ -215,7 +217,7 @@ enable = false
|
|||
#cqi_fixed = 10
|
||||
#snr_ema_coeff = 0.1
|
||||
#snr_estim_alg = refs
|
||||
#pdsch_max_its = 4
|
||||
#pdsch_max_its = 8 # These are half iterations
|
||||
#attach_enable_64qam = false
|
||||
#nof_phy_threads = 2
|
||||
#equalizer_mode = mmse
|
||||
|
@ -234,6 +236,7 @@ enable = false
|
|||
#metrics_period_secs = 1
|
||||
#metrics_csv_filename = /tmp/ue_metrics.csv
|
||||
#pdsch_csi_enabled = true
|
||||
#pdsch_8bit_decoder = false
|
||||
|
||||
# CFO related values
|
||||
#cfo_is_doppler = false
|
||||
|
|
Loading…
Reference in New Issue