Merge branch 'next' into next_novolk

This commit is contained in:
Ismael Gomez 2017-06-22 18:08:14 +02:00
commit 681a2455d9
42 changed files with 2855 additions and 970 deletions

View File

@ -1,6 +1,15 @@
Change Log for Releases
==============================
## 002.000.000
* Added fully functional srsENB to srsLTE code
* Merged srsUE code into srsLTE and reestructured PHY code
* Added support for SoapySDR devices (eg LimeSDR)
* Fixed issues in RLC AM
* Added support for NEON and AVX in many kernels and Viterbi decoder
* Added support for CPU affinity
* Other minor bug-fixes and new features
## 001.004.000
* Fixed issue in rv for format1C causing incorrect SIB1 decoding in some networks
* Improved PDCCH decoding BER (fixed incorrect trellis initialization)

View File

@ -213,6 +213,7 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
endif (HAVE_AVX2)
endif(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
ADD_CXX_COMPILER_FLAG_IF_AVAILABLE("-Werror=incompatible-pointer-types" HAVE_ERROR_INCOMPATIBLE)
if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${GCC_ARCH} -Wall -Wno-comment -Wno-write-strings -Wno-format-extra-args -Winline -Wno-unused-result -Wno-format -std=c99 -D_GNU_SOURCE")

143
README.md
View File

@ -3,31 +3,61 @@ srsLTE
[![Coverity Scan Build Status](https://scan.coverity.com/projects/10045/badge.svg)](https://scan.coverity.com/projects/10045)
srsLTE is a free and open-source LTE library for SDR UE and eNodeB developed by SRS (www.softwareradiosystems.com). The library is highly modular with minimum inter-module or external dependencies. It is entirely written in C and, if available in the system, uses the acceleration library VOLK distributed in GNURadio.
srsLTE is a free and open-source LTE software suite developed by SRS (www.softwareradiosystems.com).
**srsLTE is used by srsUE, a full stack (PHY to IP) implementation of an LTE UE. srsUE is available at https://github.com/srslte/srsue**
It includes:
* srsUE - a complete SDR LTE UE application featuring all layers from PHY to IP
* srsENB - a complete SDR LTE eNodeB application
* a highly modular set of common libraries for PHY, MAC, RLC, PDCP, RRC, NAS, S1AP and GW layers.
srsLTE is released under the AGPLv3 license and uses software from the OpenLTE project (http://sourceforge.net/projects/openlte) for some security functions and for RRC/NAS message parsing.
The srsLTE software license is AGPLv3.
Common Features
---------------
Current Features:
* LTE Release 8 compliant
* FDD configuration
* Tested bandwidths: 1.4, 3, 5, 10, 15 and 20 MHz
* Transmission mode 1 (single antenna) and 2 (transmit diversity)
* Cell search and synchronization procedure for the UE
* All DL channels/signals are supported for UE and eNodeB side: PSS, SSS, PBCH, PCFICH, PHICH, PDCCH, PDSCH
* All UL channels/signals are supported for UE side: PRACH, PUSCH, PUCCH, SRS
* Frequency-based ZF and MMSE equalizer
* Highly optimized Turbo Decoder available in Intel SSE4.1/AVX (+100 Mbps) and standard C (+25 Mbps)
* UE receiver tested and verified with Amarisoft LTE 100 eNodeB and commercial LTE networks (Telefonica Spain, Three.ie and Eircom in Ireland)
* MAC, RLC, PDCP, RRC, NAS, S1AP and GW layers
* Detailed log system with per-layer log levels and hex dumps
* MAC layer wireshark packet capture
* Command-line trace metrics
* Detailed input configuration files
Missing Features:
* Closed-loop power control
* Semi-Persistent Scheduling
srsUE Features
--------------
* Cell search and synchronization procedure for the UE
* Soft USIM supporting Milenage and XOR authentication
* Virtual network interface *tun_srsue* created upon network attach
* 75 Mbps DL in 20 MHz SISO configuration in i7 Quad-Core CPU.
* 36 Mbps DL in 10 MHz SISO configuration in i5 Dual-Core CPU.
srsUE has been fully tested and validated with the following network equipment:
* Amarisoft LTE100 eNodeB and EPC
* Nokia FlexiRadio family FSMF system module with 1800MHz FHED radio module and TravelHawk EPC simulator
* Huawei DBS3900
* Octasic Flexicell LTE-FDD NIB
srsENB Features
---------------
* Round Robin MAC scheduler with FAPI-like C++ API
* SR support
* Periodic and Aperiodic CQI feedback support
* Standard S1AP and GTP-U interfaces to the Core Network
* Tested up to 75 Mbps DL in SISO configuration with commercial UEs
srsENB has been tested and validated with the following handsets:
* LG Nexus 5
* LG Nexus 4
* Motorola Moto G4 plus
Hardware
========
--------
The library currently supports the Ettus Universal Hardware Driver (UHD) and the bladeRF driver. Thus, any hardware supported by UHD or bladeRF can be used. There is no sampling rate conversion, therefore the hardware should support 30.72 MHz clock in order to work correctly with LTE sampling frequencies and decode signals from live LTE base stations.
@ -35,15 +65,29 @@ We have tested the following hardware:
* USRP B210
* USRP X300
* bladeRF
* limeSDR
Download & Install Instructions
=================================
Build Instructions
------------------
* Mandatory requirements:
* Common:
* libfftw http://www.fftw.org/
* PolarSSL/mbedTLS https://tls.mbed.org
* srsUE:
* Boost: http://www.boost.org
* srsENB:
* Boost: http://www.boost.org
* lksctp: http://lksctp.sourceforge.net/
* config: http://www.hyperrealm.com/libconfig/
* Mandatory dependencies:
* libfftw
* Optional requirements:
* srsgui: for real-time plotting. Download it here: https://github.com/srslte/srsgui
* VOLK: if the VOLK library and headers are detected, they will be used for accelerating some signal processing functions.
* srsgui: https://github.com/srslte/srsgui - for real-time plotting.
* VOLK: https://github.com/gnuradio/volk - if the VOLK library and headers are detected, they will be used to accelerate some signal processing functions.
* RF front-end driver:
* UHD: https://github.com/EttusResearch/uhd
* BladeRF: https://github.com/Nuand/bladeRF
Download and build srsLTE:
```
@ -55,64 +99,27 @@ cmake ../
make
```
The library can also be installed using the command ```sudo make install```.
The software suite can also be installed using the command ```sudo make install```.
Running srsLTE Examples
========================
Execution Instructions
----------------------
* SIB1 reception and UE measurement from commercial LTE networks:
The srsUE and srsENB applications include example configuration files. Execute the applications with root privileges to enable real-time thread priorities and to permit creation of virtual network interfaces.
### srsUE
Run the srsUE application as follows:
```
lte/examples/pdsch_ue -f [frequency_in_Hz]
```
Where -f is the LTE channel frequency.
* eNodeB to UE Downlink PHY test
You will need two computers, each equipped with a USRP. At the transmitter side, run:
```
lte/examples/pdsch_enodeb -f [frequency_in_Hz] [-h for more commands]
sudo ./srsue ue.conf
```
At the receiver run:
### srsENB
As the srsLTE software suite does not include EPC functionality, a separate EPC is required to run srsENB. Run the application as follows:
```
lte/examples/pdsch_ue -r 1234 -f [frequency_in_Hz]
sudo ./srsenb enb.conf
```
At the transmitter console, it is possible to change the Modulation and Coding Scheme (MCS) by typing a new number (between 0 and 28) and pressing Enter.
The output at the receiver should look something similar to the following video. In this example, we removed the transmitter and receiver antennas in the middle of the demonstration, showing how reception is still possible (despite with some erros).
https://www.dropbox.com/s/txh1nuzdb0igq5n/demo_pbch.ogv
![Screenshopt of the PBCH example output](pbch_capture.png "Screenshopt of the PBCH example output")
* Video over Downlink PHY (eNodeB to UE)
The previous example sends random bits to the UE. It is possible to open a TCP socket and stream video over the LTE PHY DL wireless connection. At the transmitter side, run the following command:
```
lte/examples/pdsch_enodeb -f [frequency_in_Hz] -u 2000 [-h for more commands]
```
The argument -u 2000 will open port 2000 for listening for TCP connections. Set a high-order MCS, like 16 by typing 16 in the eNodeB console and pressing Enter.
```
lte/examples/pdsch_ue -r 1234 -u 2001 -U 127.0.0.1 -f [frequency_in_Hz]
```
The arguments -u 2001 -U 127.0.0.1 will forward the data that was injected at the eNodeB to address:port indicated by the argument. Once you have the system running, you can transmit some useful data, like a video stream. At the transmitter side, run:
```
avconv -f video4linux2 -i /dev/video0 -c:v mp4 -f mpegts tcp://127.0.0.1:2000
```
to stream the video captured from the webcam throught the local host port 2000. At the receiver, run:
```
avplay tcp://127.0.0.1:2001?listen -analyzeduration 100 -loglevel verbose
```
to watch the video.
Support
========

View File

@ -18,7 +18,7 @@
# and at http://www.gnu.org/licenses/.
#
SET(SRSLTE_VERSION_MAJOR 001)
SET(SRSLTE_VERSION_MINOR 004)
SET(SRSLTE_VERSION_MAJOR 002)
SET(SRSLTE_VERSION_MINOR 000)
SET(SRSLTE_VERSION_PATCH 000)
SET(SRSLTE_VERSION_STRING "${SRSLTE_VERSION_MAJOR}.${SRSLTE_VERSION_MINOR}.${SRSLTE_VERSION_PATCH}")

View File

@ -53,6 +53,8 @@
#define SRSLTE_MAX_LAYERS 4
#define SRSLTE_MAX_CODEWORDS 2
#define SRSLTE_MAX_CODEBLOCKS 32
#define SRSLTE_LTE_CRC24A 0x1864CFB
#define SRSLTE_LTE_CRC24B 0X1800063
#define SRSLTE_LTE_CRC16 0x11021

View File

@ -52,12 +52,14 @@
#include "srslte/phy/fec/turbodecoder_gen.h"
#ifdef LV_HAVE_SSE
#include "srslte/phy/fec/turbodecoder_sse.h"
#include "srslte/phy/fec/turbodecoder_simd.h"
#else
#define SRSLTE_TDEC_NPAR 1
#endif
typedef struct SRSLTE_API {
#ifdef LV_HAVE_SSE
srslte_tdec_sse_t tdec_sse;
srslte_tdec_simd_t tdec_simd;
#else
float *input_conv;
srslte_tdec_gen_t tdec_gen;
@ -69,7 +71,16 @@ SRSLTE_API int srslte_tdec_init(srslte_tdec_t * h,
SRSLTE_API void srslte_tdec_free(srslte_tdec_t * h);
SRSLTE_API int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb);
SRSLTE_API int srslte_tdec_reset(srslte_tdec_t * h,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_reset_cb(srslte_tdec_t * h,
uint32_t cb_idx);
SRSLTE_API int srslte_tdec_get_nof_iterations_cb(srslte_tdec_t * h,
uint32_t cb_idx);
SRSLTE_API int srslte_tdec_get_nof_parallel(srslte_tdec_t * h);
SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h,
int16_t* input,
@ -89,4 +100,27 @@ SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h,
uint32_t nof_iterations,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_iteration_par(srslte_tdec_t * h,
int16_t* input[SRSLTE_TDEC_NPAR],
uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision_par(srslte_tdec_t * h,
uint8_t *output[SRSLTE_TDEC_NPAR],
uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision_byte_par(srslte_tdec_t * h,
uint8_t *output[SRSLTE_TDEC_NPAR],
uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision_byte_par_cb(srslte_tdec_t * h,
uint8_t *output,
uint32_t cb_idx,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_run_all_par(srslte_tdec_t * h,
int16_t * input[SRSLTE_TDEC_NPAR],
uint8_t *output[SRSLTE_TDEC_NPAR],
uint32_t nof_iterations,
uint32_t long_cb);
#endif

View File

@ -66,6 +66,8 @@ typedef struct SRSLTE_API {
float *parity;
int current_cbidx;
uint32_t current_cb_len;
uint32_t n_iter;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
} srslte_tdec_gen_t;

View File

@ -0,0 +1,135 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/**********************************************************************************************
* File: turbodecoder.h
*
* Description: Turbo Decoder.
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
* encoders and one turbo code internal interleaver. The coding rate of turbo
* encoder is 1/3.
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
*
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
*********************************************************************************************/
#ifndef TURBODECODER_SSE_
#define TURBODECODER_SSE_
#include "srslte/config.h"
#include "srslte/phy/fec/tc_interl.h"
#include "srslte/phy/fec/cbsegm.h"
//#define ENABLE_SIMD_INTER
// The constant SRSLTE_TDEC_NPAR defines the maximum number of parallel CB supported by all SIMD decoders
#ifdef ENABLE_SIMD_INTER
#include "srslte/phy/fec/turbodecoder_simd_inter.h"
#if LV_HAVE_AVX2
#define SRSLTE_TDEC_NPAR_INTRA 2
#else
#define SRSLTE_TDEC_NPAR_INTRA 1
#endif
#else
#if LV_HAVE_AVX2
#define SRSLTE_TDEC_NPAR 2
#else
#define SRSLTE_TDEC_NPAR 1
#endif
#endif
#define SRSLTE_TCOD_RATE 3
#define SRSLTE_TCOD_TOTALTAIL 12
#define SRSLTE_TCOD_MAX_LEN_CB 6144
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
typedef struct SRSLTE_API {
uint32_t max_long_cb;
uint32_t max_par_cb;
int16_t *alpha;
int16_t *branch;
} map_gen_t;
typedef struct SRSLTE_API {
uint32_t max_long_cb;
uint32_t max_par_cb;
map_gen_t dec;
int16_t *app1[SRSLTE_TDEC_NPAR];
int16_t *app2[SRSLTE_TDEC_NPAR];
int16_t *ext1[SRSLTE_TDEC_NPAR];
int16_t *ext2[SRSLTE_TDEC_NPAR];
int16_t *syst[SRSLTE_TDEC_NPAR];
int16_t *parity0[SRSLTE_TDEC_NPAR];
int16_t *parity1[SRSLTE_TDEC_NPAR];
int cb_mask;
int current_cbidx;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
int n_iter[SRSLTE_TDEC_NPAR];
} srslte_tdec_simd_t;
SRSLTE_API int srslte_tdec_simd_init(srslte_tdec_simd_t * h,
uint32_t max_par_cb,
uint32_t max_long_cb);
SRSLTE_API void srslte_tdec_simd_free(srslte_tdec_simd_t * h);
SRSLTE_API int srslte_tdec_simd_reset(srslte_tdec_simd_t * h,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_simd_get_nof_iterations_cb(srslte_tdec_simd_t * h,
uint32_t cb_idx);
SRSLTE_API int srslte_tdec_simd_reset_cb(srslte_tdec_simd_t * h,
uint32_t cb_idx);
SRSLTE_API void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h,
int16_t * input[SRSLTE_TDEC_NPAR],
uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_decision(srslte_tdec_simd_t * h,
uint8_t *output[SRSLTE_TDEC_NPAR],
uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h,
uint8_t *output[SRSLTE_TDEC_NPAR],
uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_decision_byte_cb(srslte_tdec_simd_t * h,
uint8_t *output,
uint32_t cbidx,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_simd_run_all(srslte_tdec_simd_t * h,
int16_t * input[SRSLTE_TDEC_NPAR],
uint8_t *output[SRSLTE_TDEC_NPAR],
uint32_t nof_iterations,
uint32_t long_cb);
#endif

View File

@ -0,0 +1,119 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/**********************************************************************************************
* File: turbodecoder.h
*
* Description: Turbo Decoder.
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
* encoders and one turbo code internal interleaver. The coding rate of turbo
* encoder is 1/3.
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
*
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
*********************************************************************************************/
#ifndef TURBODECODER_SSE_INTER_
#define TURBODECODER_SSE_INTER_
/** This is an simd inter-frame parallel turbo decoder. Parallizes 8 code-blocks using SSE
* This implementation is currently not functional and not used by the rest of the code
*/
#include "srslte/config.h"
#include "srslte/phy/fec/tc_interl.h"
#include "srslte/phy/fec/cbsegm.h"
#if LV_HAVE_AVX2
#define SRSLTE_TDEC_NPAR 16
#else
#define SRSLTE_TDEC_NPAR 8
#endif
typedef struct SRSLTE_API {
int max_long_cb;
int16_t *syst0;
int16_t *parity0;
int16_t *syst1;
int16_t *parity1;
int16_t *llr1;
int16_t *llr2;
int16_t *w;
int16_t *alpha;
uint32_t max_par_cb;
int current_cbidx;
uint32_t current_long_cb;
srslte_tc_interl_t interleaver[SRSLTE_NOF_TC_CB_SIZES];
int n_iter[SRSLTE_TDEC_NPAR];
} srslte_tdec_simd_inter_t;
SRSLTE_API int srslte_tdec_simd_inter_init(srslte_tdec_simd_inter_t * h,
uint32_t max_par_cb,
uint32_t max_long_cb);
SRSLTE_API void srslte_tdec_simd_inter_free(srslte_tdec_simd_inter_t * h);
SRSLTE_API int srslte_tdec_simd_inter_reset(srslte_tdec_simd_inter_t * h,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_simd_inter_get_nof_iterations_cb(srslte_tdec_simd_inter_t * h,
uint32_t cb_idx);
SRSLTE_API int srslte_tdec_simd_inter_reset_cb(srslte_tdec_simd_inter_t * h,
uint32_t cb_idx);
SRSLTE_API void srslte_tdec_simd_inter_iteration(srslte_tdec_simd_inter_t * h,
int16_t * input[SRSLTE_TDEC_NPAR],
uint32_t nof_cb,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_inter_decision(srslte_tdec_simd_inter_t * h,
uint8_t *output[SRSLTE_TDEC_NPAR],
uint32_t nof_cb,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_inter_decision_byte(srslte_tdec_simd_inter_t * h,
uint8_t *output[SRSLTE_TDEC_NPAR],
uint32_t nof_cb,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_simd_inter_decision_byte_cb(srslte_tdec_simd_inter_t * h,
uint8_t *output,
uint32_t cbidx,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_simd_inter_run_all(srslte_tdec_simd_inter_t * h,
int16_t *input[SRSLTE_TDEC_NPAR],
uint8_t *output[SRSLTE_TDEC_NPAR],
uint32_t nof_iterations,
uint32_t nof_cb,
uint32_t long_cb);
#endif

View File

@ -37,44 +37,52 @@ extern "C" {
SRSLTE_API int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len);
SRSLTE_API int srslte_vec_dot_prod_sss_avx(short *x, short *y, uint32_t len);
SRSLTE_API int srslte_vec_dot_prod_sss_avx2(short *x, short *y, uint32_t len);
SRSLTE_API void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_sss_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_sss_avx2(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_sss_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_sss_avx2(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_fff_sse(float *x, float *y, float *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_fff_sse(float *x, float *y, float *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_prod_fff_sse(float *x, float h, float *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_prod_ccc_sse(cf_t *x, cf_t h, cf_t *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_ccc_sse(cf_t *x,cf_t *y, cf_t *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_avx2(short *x, short *y, short *z, uint32_t len);
SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_sse(cf_t *x, cf_t *y, uint32_t len);
SRSLTE_API void srslte_vec_prod_conj_ccc_sse(cf_t *x,cf_t *y, cf_t *z, uint32_t len);
SRSLTE_API cf_t srslte_vec_dot_prod_ccc_sse(cf_t *x, cf_t *y, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_avx2(short *x, int k, short *z, uint32_t len);
SRSLTE_API void srslte_vec_abs_square_cf_sse(cf_t *x, float *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_sse(short *x, int n_rightshift, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len);
SRSLTE_API void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len);
SRSLTE_API void srslte_vec_convert_fi_sse(float *x, int16_t *z, float scale, uint32_t len);
SRSLTE_API void srslte_vec_mult_scalar_cf_f_avx( cf_t *z,const cf_t *x,const float h,const uint32_t len);
#ifdef __cplusplus
}
#endif

View File

@ -81,28 +81,6 @@ else(MKL_FOUND)
target_link_libraries(srslte_phy ${FFTW3F_LIBRARIES})
endif(MKL_FOUND)
## This linkage is required for the examples and tests only
if(RF_FOUND)
target_link_libraries(srslte_phy)
if(UHD_FOUND)
target_link_libraries(srslte_phy ${UHD_LIBRARIES})
endif(UHD_FOUND)
if(BLADERF_FOUND)
target_link_libraries(srslte_phy ${BLADERF_LIBRARIES})
endif(BLADERF_FOUND)
if(LIMESDR_FOUND)
target_link_libraries(srslte_phy ${LIMESDR_LIBRARIES})
endif(LIMESDR_FOUND)
if(SOAPYSDR_FOUND)
target_link_libraries(srslte_phy ${SOAPYSDR_LIBRARIES})
endif(SOAPYSDR_FOUND)
endif(RF_FOUND)
if(VOLK_FOUND)
target_link_libraries(srslte_phy ${VOLK_LIBRARIES})

View File

@ -48,7 +48,13 @@ const uint32_t tc_cb_sizes[SRSLTE_NOF_TC_CB_SIZES] = { 40, 48, 56, 64, 72, 80, 8
4800, 4864, 4928, 4992, 5056, 5120, 5184, 5248, 5312, 5376, 5440, 5504,
5568, 5632, 5696, 5760, 5824, 5888, 5952, 6016, 6080, 6144 };
/* Calculate Codeblock Segmentation as in Section 5.1.2 of 36.212 */
/**
* Calculate Codeblock Segmentation parameters as in Section 5.1.2 of 36.212
*
* @param[out] s Output of code block segmentation calculation
* @param[in] tbs Input Transport Block Size in bits. CRC's will be added to this
* @return Error code
*/
int srslte_cbsegm(srslte_cbsegm_t *s, uint32_t tbs) {
uint32_t Bp, B, idx1;
int ret;
@ -104,6 +110,8 @@ int srslte_cbsegm(srslte_cbsegm_t *s, uint32_t tbs) {
/*
* Finds index of minimum K>=long_cb in Table 5.1.3-3 of 36.212
*
* @return I_TBS or error code
*/
int srslte_cbsegm_cbindex(uint32_t long_cb) {
int j = 0;
@ -120,6 +128,8 @@ int srslte_cbsegm_cbindex(uint32_t long_cb) {
/*
* Returns Turbo coder interleaver size for Table 5.1.3-3 (36.212) index
*
* @return Code block size in bits or error code
*/
int srslte_cbsegm_cbsize(uint32_t index) {
if (index < SRSLTE_NOF_TC_CB_SIZES) {
@ -129,6 +139,12 @@ int srslte_cbsegm_cbsize(uint32_t index) {
}
}
/**
* Check is code block size is valid for LTE Turbo Code
*
* @param[in] size Size of code block in bits
* @return true if Code Block size is allowed
*/
bool srslte_cbsegm_cbsize_isvalid(uint32_t size) {
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
if (tc_cb_sizes[i] == size) {

View File

@ -33,6 +33,19 @@
#include "srslte/phy/fec/convcoder.h"
#include "parity.h"
/**
* Convolution encodes according to given parameters.
*
* q->R is rate
* q->tail_biting enables tail biting
* q->K is a parameter for tail biting
*
* @param[in] q Convolution coder parameters
* @param[in] input Unpacked bit array. Size frame_length
* @param[out] output Unpacked bit array. Size q->R*frame_length if q->tail_biting, else q->R*(frame_length + q->K - 1)
* @param[in] frame_length Number of bits in input_array
* @return Number of bits in output
*/
int srslte_convcoder_encode(srslte_convcoder_t *q, uint8_t *input, uint8_t *output, uint32_t frame_length) {
uint32_t sr;
uint32_t i,j;

View File

@ -39,6 +39,12 @@ uint8_t RM_PERM_CC_INV[NCOLS] =
{ 16, 0, 24, 8, 20, 4, 28, 12, 18, 2, 26, 10, 22, 6, 30, 14, 17, 1, 25, 9,
21, 5, 29, 13, 19, 3, 27, 11, 23, 7, 31, 15 };
/**
* Rate matching for convolution encoder
*
* @param[in] input Unpacked bit array. Size in_len
* @param[output] output Unpacked bit array. Size out_len <= in_len
*/
int srslte_rm_conv_tx(uint8_t *input, uint32_t in_len, uint8_t *output, uint32_t out_len) {
uint8_t tmp[3 * NCOLS * NROWS_MAX];

View File

@ -245,6 +245,20 @@ void srslte_rm_turbo_gentables() {
}
/**
* Rate matching for LTE Turbo Coder
*
* @param[out] w_buff Preallocated softbuffer
* @param[in] systematic Input code block in a byte array
* @param[in] parity Input code turbo coder parity bits in a byte array
* @param[out] output Rate matched output array of size out_len
* @param out_len Output buffer size to be filled with as many FEC bits as fit
* @param w_offset Start writing to output at this bit offset
* @param cb_idx Code block index. Used to lookup interleaver parameters
* @param rv_idx Redundancy Version Index. Indexed offset of FEC bits to copy
*
* @return Error code
*/
int srslte_rm_turbo_tx_lut(uint8_t *w_buff, uint8_t *systematic, uint8_t *parity, uint8_t *output,
uint32_t cb_idx, uint32_t out_len,
uint32_t w_offset, uint32_t rv_idx)
@ -289,6 +303,15 @@ int srslte_rm_turbo_tx_lut(uint8_t *w_buff, uint8_t *systematic, uint8_t *parity
}
}
/**
* Undoes rate matching for LTE Turbo Coder. Expands rate matched buffer to full size buffer.
*
* @param[in] input Input buffer of size in_len
* @param[out] output Output buffer of size 3*srslte_cbsegm_cbsize(cb_idx)+12
* @param[in] cb_idx Code block table index
* @param[in] rv_idx Redundancy Version from DCI control message
* @return Error code
*/
int srslte_rm_turbo_rx_lut(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
{

View File

@ -46,6 +46,7 @@ uint32_t seed = 0;
int K = -1;
#define MAX_ITERATIONS 10
int nof_cb = 1;
int nof_iterations = MAX_ITERATIONS;
int test_known_data = 0;
int test_errors = 0;
@ -59,6 +60,7 @@ void usage(char *prog) {
printf("Usage: %s [nlesv]\n", prog);
printf(
"\t-k Test with known data (ignores frame_length) [Default disabled]\n");
printf("\t-c nof_cb in parallel [Default %d]\n", nof_cb);
printf("\t-i nof_iterations [Default %d]\n", nof_iterations);
printf("\t-n nof_frames [Default %d]\n", nof_frames);
printf("\t-N nof_repetitions [Default %d]\n", nof_repetitions);
@ -70,8 +72,11 @@ void usage(char *prog) {
void parse_args(int argc, char **argv) {
int opt;
while ((opt = getopt(argc, argv, "inNlstvekt")) != -1) {
while ((opt = getopt(argc, argv, "cinNlstvekt")) != -1) {
switch (opt) {
case 'c':
nof_cb = atoi(argv[optind]);
break;
case 'n':
nof_frames = atoi(argv[optind]);
break;
@ -112,7 +117,7 @@ int main(int argc, char **argv) {
float *llr;
short *llr_s;
uint8_t *llr_c;
uint8_t *data_tx, *data_rx, *data_rx_bytes, *symbols;
uint8_t *data_tx, *data_rx, *data_rx_bytes[SRSLTE_TDEC_NPAR], *symbols;
uint32_t i, j;
float var[SNR_POINTS];
uint32_t snr_points;
@ -154,10 +159,12 @@ int main(int argc, char **argv) {
perror("malloc");
exit(-1);
}
data_rx_bytes = srslte_vec_malloc(frame_length * sizeof(uint8_t));
if (!data_rx_bytes) {
perror("malloc");
exit(-1);
for (int cb=0;cb<SRSLTE_TDEC_NPAR;cb++) {
data_rx_bytes[cb] = srslte_vec_malloc(frame_length * sizeof(uint8_t));
if (!data_rx_bytes[cb]) {
perror("malloc");
exit(-1);
}
}
symbols = srslte_vec_malloc(coded_length * sizeof(uint8_t));
@ -232,7 +239,6 @@ int main(int argc, char **argv) {
for (j = 0; j < coded_length; j++) {
llr[j] = symbols[j] ? 1 : -1;
}
srslte_ch_awgn_f(llr, llr, var[i], coded_length);
for (j=0;j<coded_length;j++) {
@ -248,23 +254,39 @@ int main(int argc, char **argv) {
t = nof_iterations;
}
int16_t *input[SRSLTE_TDEC_NPAR];
uint8_t *output[SRSLTE_TDEC_NPAR];
for (int n=0;n<SRSLTE_TDEC_NPAR;n++) {
if (n < nof_cb) {
input[n] = llr_s;
} else {
input[n] = NULL;
}
output[n] = data_rx_bytes[n];
}
gettimeofday(&tdata[1], NULL);
for (int k=0;k<nof_repetitions;k++) {
srslte_tdec_run_all(&tdec, llr_s, data_rx_bytes, t, frame_length);
for (int k=0;k<nof_repetitions;k++) {
srslte_tdec_run_all_par(&tdec, input, output, t, frame_length);
}
gettimeofday(&tdata[2], NULL);
get_time_interval(tdata);
mean_usec = (float) mean_usec * 0.9 + (float) (tdata[0].tv_usec/nof_repetitions) * 0.1;
srslte_bit_unpack_vector(data_rx_bytes, data_rx, frame_length);
errors += srslte_bit_diff(data_tx, data_rx, frame_length);
frame_cnt++;
uint32_t errors_this = 0;
for (int cb=0;cb<nof_cb;cb++) {
srslte_bit_unpack_vector(data_rx_bytes[cb], data_rx, frame_length);
errors_this=srslte_bit_diff(data_tx, data_rx, frame_length);
//printf("error[%d]=%d\n", cb, errors_this);
errors += errors_this;
}
printf("Eb/No: %2.2f %10d/%d ", SNR_MIN + i * ebno_inc, frame_cnt, nof_frames);
printf("BER: %.2e ", (float) errors / (frame_cnt * frame_length));
printf("%3.1f Mbps (%6.2f usec)", (float) frame_length / mean_usec, mean_usec);
printf("\r");
printf("BER: %.2e ", (float) errors / (nof_cb*frame_cnt * frame_length));
printf("%3.1f Mbps (%6.2f usec)", (float) (nof_cb*frame_length) / mean_usec, mean_usec);
printf("\r");
}
printf("\n");
@ -273,7 +295,7 @@ int main(int argc, char **argv) {
printf("\n");
if (snr_points == 1) {
if (errors) {
printf("%d Errors\n", errors);
printf("%d Errors\n", errors/nof_cb);
}
}

View File

@ -35,7 +35,7 @@
#ifdef LV_HAVE_SSE
#include "srslte/phy/fec/turbodecoder_sse.h"
#include "srslte/phy/fec/turbodecoder_simd.h"
#endif
#include "srslte/phy/utils/vector.h"
@ -43,7 +43,7 @@
int srslte_tdec_init(srslte_tdec_t * h, uint32_t max_long_cb) {
#ifdef LV_HAVE_SSE
return srslte_tdec_sse_init(&h->tdec_sse, max_long_cb);
return srslte_tdec_simd_init(&h->tdec_simd, SRSLTE_TDEC_NPAR, max_long_cb);
#else
h->input_conv = srslte_vec_malloc(sizeof(float) * (3*max_long_cb+12));
if (!h->input_conv) {
@ -56,7 +56,7 @@ int srslte_tdec_init(srslte_tdec_t * h, uint32_t max_long_cb) {
void srslte_tdec_free(srslte_tdec_t * h) {
#ifdef LV_HAVE_SSE
srslte_tdec_sse_free(&h->tdec_sse);
srslte_tdec_simd_free(&h->tdec_simd);
#else
if (h->input_conv) {
free(h->input_conv);
@ -68,45 +68,99 @@ void srslte_tdec_free(srslte_tdec_t * h) {
int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb) {
#ifdef LV_HAVE_SSE
return srslte_tdec_sse_reset(&h->tdec_sse, long_cb);
return srslte_tdec_simd_reset(&h->tdec_simd, long_cb);
#else
return srslte_tdec_gen_reset(&h->tdec_gen, long_cb);
#endif
}
void srslte_tdec_iteration(srslte_tdec_t * h, int16_t* input, uint32_t long_cb) {
int srslte_tdec_reset_cb(srslte_tdec_t * h, uint32_t cb_idx) {
#ifdef LV_HAVE_SSE
srslte_tdec_sse_iteration(&h->tdec_sse, input, long_cb);
return srslte_tdec_simd_reset_cb(&h->tdec_simd, cb_idx);
#else
srslte_vec_convert_if(input, h->input_conv, 0.01, 3*long_cb+12);
return srslte_tdec_gen_reset(&h->tdec_gen, h->tdec_gen.current_cb_len);
#endif
}
int srslte_tdec_get_nof_iterations_cb(srslte_tdec_t * h, uint32_t cb_idx)
{
#ifdef LV_HAVE_SSE
return srslte_tdec_simd_get_nof_iterations_cb(&h->tdec_simd, cb_idx);
#else
return h->tdec_gen.n_iter;
#endif
}
void srslte_tdec_iteration_par(srslte_tdec_t * h, int16_t* input[SRSLTE_TDEC_NPAR], uint32_t long_cb) {
#ifdef LV_HAVE_SSE
srslte_tdec_simd_iteration(&h->tdec_simd, input, long_cb);
#else
srslte_vec_convert_if(input[0], h->input_conv, 0.01, 3*long_cb+12);
srslte_tdec_gen_iteration(&h->tdec_gen, h->input_conv, long_cb);
#endif
}
void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) {
#ifdef LV_HAVE_SSE
return srslte_tdec_sse_decision(&h->tdec_sse, output, long_cb);
#else
return srslte_tdec_gen_decision(&h->tdec_gen, output, long_cb);
#endif
void srslte_tdec_iteration(srslte_tdec_t * h, int16_t* input, uint32_t long_cb) {
int16_t *input_par[SRSLTE_TDEC_NPAR];
input_par[0] = input;
return srslte_tdec_iteration_par(h, input_par, long_cb);
}
void srslte_tdec_decision_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t long_cb) {
#ifdef LV_HAVE_SSE
return srslte_tdec_simd_decision(&h->tdec_simd, output, long_cb);
#else
return srslte_tdec_gen_decision(&h->tdec_gen, output[0], long_cb);
#endif
}
void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) {
uint8_t *output_par[SRSLTE_TDEC_NPAR];
output_par[0] = output;
srslte_tdec_decision_par(h, output_par, long_cb);
}
void srslte_tdec_decision_byte_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t long_cb) {
#ifdef LV_HAVE_SSE
srslte_tdec_simd_decision_byte(&h->tdec_simd, output, long_cb);
#else
srslte_tdec_gen_decision_byte(&h->tdec_gen, output[0], long_cb);
#endif
}
void srslte_tdec_decision_byte_par_cb(srslte_tdec_t * h, uint8_t *output, uint32_t cb_idx, uint32_t long_cb) {
#ifdef LV_HAVE_SSE
srslte_tdec_simd_decision_byte_cb(&h->tdec_simd, output, cb_idx, long_cb);
#else
srslte_tdec_gen_decision_byte(&h->tdec_gen, output, long_cb);
#endif
}
void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) {
uint8_t *output_par[SRSLTE_TDEC_NPAR];
output_par[0] = output;
srslte_tdec_decision_byte_par(h, output_par, long_cb);
}
int srslte_tdec_run_all_par(srslte_tdec_t * h, int16_t * input[SRSLTE_TDEC_NPAR],
uint8_t *output[SRSLTE_TDEC_NPAR],
uint32_t nof_iterations, uint32_t long_cb) {
#ifdef LV_HAVE_SSE
return srslte_tdec_sse_decision_byte(&h->tdec_sse, output, long_cb);
return srslte_tdec_simd_run_all(&h->tdec_simd, input, output, nof_iterations, long_cb);
#else
return srslte_tdec_gen_decision_byte(&h->tdec_gen, output, long_cb);
srslte_vec_convert_if(input[0], h->input_conv, 0.01, 3*long_cb+12);
return srslte_tdec_gen_run_all(&h->tdec_gen, h->input_conv, output[0], nof_iterations, long_cb);
#endif
}
int srslte_tdec_run_all(srslte_tdec_t * h, int16_t * input, uint8_t *output, uint32_t nof_iterations, uint32_t long_cb)
{
#ifdef LV_HAVE_SSE
return srslte_tdec_sse_run_all(&h->tdec_sse, input, output, nof_iterations, long_cb);
#else
srslte_vec_convert_if(input, h->input_conv, 0.01, 3*long_cb+12);
return srslte_tdec_gen_run_all(&h->tdec_gen, h->input_conv, output, nof_iterations, long_cb);
#endif
uint8_t *output_par[SRSLTE_TDEC_NPAR];
output_par[0] = output;
int16_t *input_par[SRSLTE_TDEC_NPAR];
input_par[0] = input;
return srslte_tdec_run_all_par(h, input_par, output_par, nof_iterations, long_cb);
}

View File

@ -0,0 +1,475 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <math.h>
#include "srslte/phy/fec/turbodecoder_simd.h"
#include "srslte/phy/utils/vector.h"
#include <inttypes.h>
#define NUMSTATES 8
#define NINPUTS 2
#define TAIL 3
#define TOTALTAIL 12
#define INF 10000
#define ZERO 0
#ifdef LV_HAVE_AVX2
#include <smmintrin.h>
#include <immintrin.h>
// Number of CB processed in parllel in AVX
#define NCB 2
/*
static void print_256i(__m256i x) {
int16_t *s = (int16_t*) &x;
printf("[%d", s[0]);
for (int i=1;i<16;i++) {
printf(",%d", s[i]);
}
printf("]\n");
}
*/
/* Computes the horizontal MAX from 8 16-bit integers using the minpos_epu16 SSE4.1 instruction */
static inline int16_t hMax0(__m256i masked_value)
{
__m128i tmp1 = _mm256_extractf128_si256(masked_value, 0);
__m128i tmp3 = _mm_minpos_epu16(tmp1);
return (int16_t)(_mm_cvtsi128_si32(tmp3));
}
static inline int16_t hMax1(__m256i masked_value)
{
__m128i tmp1 = _mm256_extractf128_si256(masked_value, 1);
__m128i tmp3 = _mm_minpos_epu16(tmp1);
return (int16_t)(_mm_cvtsi128_si32(tmp3));
}
/* Computes beta values */
void map_avx_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_NPAR], uint32_t long_cb)
{
int k;
uint32_t end = long_cb + 3;
const __m256i *alphaPtr = (const __m256i*) s->alpha;
__m256i beta_k = _mm256_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0, -INF, -INF, -INF, -INF, -INF, -INF, -INF, 0);
__m256i g, bp, bn, alpha_k;
/* Define the shuffle constant for the positive beta */
__m256i shuf_bp = _mm256_set_epi8(
// 1st CB
15+16, 14+16, // 7
7+16, 6+16, // 3
5+16, 4+16, // 2
13+16, 12+16, // 6
11+16, 10+16, // 5
3+16, 2+16, // 1
1+16, 0+16, // 0
9+16, 8+16, // 4
// 2nd CB
15, 14, // 7
7, 6, // 3
5, 4, // 2
13, 12, // 6
11, 10, // 5
3, 2, // 1
1, 0, // 0
9, 8 // 4
);
/* Define the shuffle constant for the negative beta */
__m256i shuf_bn = _mm256_set_epi8(
7+16, 6+16, // 3
15+16, 14+16, // 7
13+16, 12+16, // 6
5+16, 4+16, // 2
3+16, 2+16, // 1
11+16, 10+16, // 5
9+16, 8+16, // 4
1+16, 0+16, // 0
7, 6, // 3
15, 14, // 7
13, 12, // 6
5, 4, // 2
3, 2, // 1
11, 10, // 5
9, 8, // 4
1, 0 // 0
);
alphaPtr += long_cb-1;
/* Define shuffle for branch costs */
__m256i shuf_g[4];
shuf_g[3] = _mm256_set_epi8(3+16,2+16,1+16,0+16,1+16,0+16,3+16,2+16,3+16,2+16,1+16,0+16,1+16,0+16,3+16,2+16,
3,2,1,0,1,0,3,2,3,2,1,0,1,0,3,2);
shuf_g[2] = _mm256_set_epi8(7+16,6+16,5+16,4+16,5+16,4+16,7+16,6+16,7+16,6+16,5+16,4+16,5+16,4+16,7+16,6+16,
7,6,5,4,5,4,7,6,7,6,5,4,5,4,7,6);
shuf_g[1] = _mm256_set_epi8(11+16,10+16,9+16,8+16,9+16,8+16,11+16,10+16,11+16,10+16,9+16,8+16,9+16,8+16,11+16,10+16,
11,10,9,8,9,8,11,10,11,10,9,8,9,8,11,10);
shuf_g[0] = _mm256_set_epi8(15+16,14+16,13+16,12+16,13+16,12+16,15+16,14+16,15+16,14+16,13+16,12+16,13+16,12+16,15+16,14+16,
15,14,13,12,13,12,15,14,15,14,13,12,13,12,15,14);
/* Define shuffle for beta normalization */
__m256i shuf_norm = _mm256_set_epi8(17,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0);
__m256i gv;
int16_t *b = &s->branch[2*NCB*long_cb-16];
__m256i *gPtr = (__m256i*) b;
/* This defines a beta computation step:
* Adds and substracts the branch metrics to the previous beta step,
* shuffles the states according to the trellis path and selects maximum state
*/
#define BETA_STEP(g) bp = _mm256_add_epi16(beta_k, g);\
bn = _mm256_sub_epi16(beta_k, g);\
bp = _mm256_shuffle_epi8(bp, shuf_bp);\
bn = _mm256_shuffle_epi8(bn, shuf_bn);\
beta_k = _mm256_max_epi16(bp, bn);
/* Loads the alpha metrics from memory and adds them to the temporal bn and bp
* metrics. Then computes horizontal maximum of both metrics and computes difference
*/
#define BETA_STEP_CNT(c,d) g = _mm256_shuffle_epi8(gv, shuf_g[c]);\
BETA_STEP(g)\
alpha_k = _mm256_load_si256(alphaPtr);\
alphaPtr--;\
bp = _mm256_add_epi16(bp, alpha_k);\
bn = _mm256_add_epi16(bn, alpha_k);\
bn = _mm256_sub_epi16(_mm256_set1_epi16(0x7FFF), bn);\
bp = _mm256_sub_epi16(_mm256_set1_epi16(0x7FFF), bp);\
output[0][k-d] = hMax0(bn) - hMax0(bp);\
output[1][k-d] = hMax1(bn) - hMax1(bp);
/* The tail does not require to load alpha or produce outputs. Only update
* beta metrics accordingly */
for (k=end-1; k>=long_cb; k--) {
int16_t g0_1 = s->branch[2*NCB*k];
int16_t g1_1 = s->branch[2*NCB*k+1];
int16_t g0_2 = s->branch[2*NCB*k+6];
int16_t g1_2 = s->branch[2*NCB*k+6+1];
g = _mm256_set_epi16(g1_2, g0_2, g0_2, g1_2, g1_2, g0_2, g0_2, g1_2, g1_1, g0_1, g0_1, g1_1, g1_1, g0_1, g0_1, g1_1);
BETA_STEP(g);
}
/* We inline 2 trelis steps for each normalization */
__m256i norm;
for (; k >= 0; k-=8) {
gv = _mm256_load_si256(gPtr);
gPtr--;
BETA_STEP_CNT(0,0);
BETA_STEP_CNT(1,1);
BETA_STEP_CNT(2,2);
BETA_STEP_CNT(3,3);
norm = _mm256_shuffle_epi8(beta_k, shuf_norm);
beta_k = _mm256_sub_epi16(beta_k, norm);
gv = _mm256_load_si256(gPtr);
gPtr--;
BETA_STEP_CNT(0,4);
BETA_STEP_CNT(1,5);
BETA_STEP_CNT(2,6);
BETA_STEP_CNT(3,7);
norm = _mm256_shuffle_epi8(beta_k, shuf_norm);
beta_k = _mm256_sub_epi16(beta_k, norm);
}
}
/* Computes alpha metrics */
void map_avx_alpha(map_gen_t * s, uint32_t long_cb)
{
uint32_t k;
int16_t *alpha1 = s->alpha;
int16_t *alpha2 = &s->alpha[8];
uint32_t i;
alpha1[0] = 0;
alpha2[0] = 0;
for (i = 1; i < 8; i++) {
alpha1[i] = -INF;
alpha2[i] = -INF;
}
/* Define the shuffle constant for the positive alpha */
__m256i shuf_ap = _mm256_set_epi8(
// 1st CB
31, 30, // 7
25, 24, // 4
23, 22, // 3
17, 16, // 0
29, 28, // 6
27, 26, // 5
21, 20, // 2
19, 18, // 1
// 2nd CB
15, 14, // 7
9, 8, // 4
7, 6, // 3
1, 0, // 0
13, 12, // 6
11, 10, // 5
5, 4, // 2
3, 2 // 1
);
/* Define the shuffle constant for the negative alpha */
__m256i shuf_an = _mm256_set_epi8(
// 1nd CB
29, 28, // 6
27, 26, // 5
21, 20, // 2
19, 18, // 1
31, 30, // 7
25, 24, // 4
23, 22, // 3
17, 16, // 0
// 2nd CB
13, 12, // 6
11, 10, // 5
5, 4, // 2
3, 2, // 1
15, 14, // 7
9, 8, // 4
7, 6, // 3
1, 0 // 0
);
/* Define shuffle for branch costs */
__m256i shuf_g[4];
shuf_g[0] = _mm256_set_epi8(3+16,2+16,3+16,2+16,1+16,0+16,1+16,0+16,1+16,0+16,1+16,0+16,3+16,2+16,3+16,2+16,
3,2,3,2,1,0,1,0,1,0,1,0,3,2,3,2);
shuf_g[1] = _mm256_set_epi8(7+16,6+16,7+16,6+16,5+16,4+16,5+16,4+16,5+16,4+16,5+16,4+16,7+16,6+16,7+16,6+16,
7,6,7,6,5,4,5,4,5,4,5,4,7,6,7,6);
shuf_g[2] = _mm256_set_epi8(11+16,10+16,11+16,10+16,9+16,8+16,9+16,8+16,9+16,8+16,9+16,8+16,11+16,10+16,11+16,10+16,
11,10,11,10,9,8,9,8,9,8,9,8,11,10,11,10);
shuf_g[3] = _mm256_set_epi8(15+16,14+16,15+16,14+16,13+16,12+16,13+16,12+16,13+16,12+16,13+16,12+16,15+16,14+16,15+16,14+16,
15,14,15,14,13,12,13,12,13,12,13,12,15,14,15,14);
__m256i shuf_norm = _mm256_set_epi8(17,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0);
__m256i* alphaPtr = (__m256i*) s->alpha;
alphaPtr++;
__m256i gv;
__m256i *gPtr = (__m256i*) s->branch;
__m256i g, ap, an;
__m256i alpha_k = _mm256_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0, -INF, -INF, -INF, -INF, -INF, -INF, -INF, 0);
/* This defines a alpha computation step:
* Adds and substracts the branch metrics to the previous alpha step,
* shuffles the states according to the trellis path and selects maximum state
*/
#define ALPHA_STEP(c) g = _mm256_shuffle_epi8(gv, shuf_g[c]); \
ap = _mm256_add_epi16(alpha_k, g);\
an = _mm256_sub_epi16(alpha_k, g);\
ap = _mm256_shuffle_epi8(ap, shuf_ap);\
an = _mm256_shuffle_epi8(an, shuf_an);\
alpha_k = _mm256_max_epi16(ap, an);\
_mm256_store_si256(alphaPtr, alpha_k);\
alphaPtr++;\
/* In this loop, we compute 8 steps and normalize twice for each branch metrics memory load */
__m256i norm;
for (k = 0; k < long_cb/8; k++) {
gv = _mm256_load_si256(gPtr);
gPtr++;
ALPHA_STEP(0);
ALPHA_STEP(1);
ALPHA_STEP(2);
ALPHA_STEP(3);
norm = _mm256_shuffle_epi8(alpha_k, shuf_norm);
alpha_k = _mm256_sub_epi16(alpha_k, norm);
gv = _mm256_load_si256(gPtr);
gPtr++;
ALPHA_STEP(0);
ALPHA_STEP(1);
ALPHA_STEP(2);
ALPHA_STEP(3);
norm = _mm256_shuffle_epi8(alpha_k, shuf_norm);
alpha_k = _mm256_sub_epi16(alpha_k, norm);
}
}
void map_sse_gamma_single(int16_t *output, int16_t *input, int16_t *app, int16_t *parity)
{
__m128i res00, res10, res01, res11, res0, res1;
__m128i in, ap, pa, g1, g0;
__m128i *inPtr = (__m128i*) input;
__m128i *appPtr = (__m128i*) app;
__m128i *paPtr = (__m128i*) parity;
__m128i *resPtr = (__m128i*) output;
__m128i res00_mask = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0);
__m128i res10_mask = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8);
__m128i res01_mask = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff);
__m128i res11_mask = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff);
in = _mm_load_si128(inPtr);
inPtr++;
pa = _mm_load_si128(paPtr);
paPtr++;
if (appPtr) {
ap = _mm_load_si128(appPtr);
appPtr++;
in = _mm_add_epi16(ap, in);
}
g1 = _mm_add_epi16(in, pa);
g0 = _mm_sub_epi16(in, pa);
g1 = _mm_srai_epi16(g1, 1);
g0 = _mm_srai_epi16(g0, 1);
res00 = _mm_shuffle_epi8(g0, res00_mask);
res10 = _mm_shuffle_epi8(g0, res10_mask);
res01 = _mm_shuffle_epi8(g1, res01_mask);
res11 = _mm_shuffle_epi8(g1, res11_mask);
res0 = _mm_or_si128(res00, res01);
res1 = _mm_or_si128(res10, res11);
_mm_store_si128(resPtr, res0);
resPtr++;
_mm_store_si128(resPtr, res1);
resPtr++;
}
/* Compute branch metrics (gamma) */
void map_avx_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t cbidx, uint32_t long_cb)
{
__m128i res10, res20, res11, res21, res1, res2;
__m256i in, ap, pa, g1, g0;
__m256i *inPtr = (__m256i*) input;
__m256i *appPtr = (__m256i*) app;
__m256i *paPtr = (__m256i*) parity;
__m128i *resPtr = (__m128i*) h->branch;
if (cbidx) {
resPtr++;
}
__m128i res10_mask = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0);
__m128i res11_mask = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff);
__m128i res20_mask = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8);
__m128i res21_mask = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff);
for (int i=0;i<long_cb/16;i++) {
in = _mm256_load_si256(inPtr);
inPtr++;
pa = _mm256_load_si256(paPtr);
paPtr++;
if (appPtr) {
ap = _mm256_load_si256(appPtr);
appPtr++;
in = _mm256_add_epi16(ap, in);
}
g0 = _mm256_sub_epi16(in, pa);
g1 = _mm256_add_epi16(in, pa);
g0 = _mm256_srai_epi16(g0, 1);
g1 = _mm256_srai_epi16(g1, 1);
__m128i g0_t = _mm256_extractf128_si256(g0, 0);
__m128i g1_t = _mm256_extractf128_si256(g1, 0);
res10 = _mm_shuffle_epi8(g0_t, res10_mask);
res11 = _mm_shuffle_epi8(g1_t, res11_mask);
res20 = _mm_shuffle_epi8(g0_t, res20_mask);
res21 = _mm_shuffle_epi8(g1_t, res21_mask);
res1 = _mm_or_si128(res10, res11);
res2 = _mm_or_si128(res20, res21);
_mm_store_si128(resPtr, res1);
resPtr++;
resPtr++;
_mm_store_si128(resPtr, res2);
resPtr++;
resPtr++;
g0_t = _mm256_extractf128_si256(g0, 1);
g1_t = _mm256_extractf128_si256(g1, 1);
res10 = _mm_shuffle_epi8(g0_t, res10_mask);
res11 = _mm_shuffle_epi8(g1_t, res11_mask);
res20 = _mm_shuffle_epi8(g0_t, res20_mask);
res21 = _mm_shuffle_epi8(g1_t, res21_mask);
res1 = _mm_or_si128(res10, res11);
res2 = _mm_or_si128(res20, res21);
_mm_store_si128(resPtr, res1);
resPtr++;
resPtr++;
_mm_store_si128(resPtr, res2);
resPtr++;
resPtr++;
}
if (long_cb%16) {
map_sse_gamma_single((int16_t*) resPtr, (int16_t*) inPtr, (int16_t*) appPtr, (int16_t*) paPtr);
}
for (int i=long_cb;i<long_cb+3;i++) {
h->branch[2*i*NCB+cbidx*6] = (input[i] - parity[i])/2;
h->branch[2*i*NCB+cbidx*6+1] = (input[i] + parity[i])/2;
}
}
#endif

View File

@ -0,0 +1,540 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <math.h>
#include "srslte/phy/fec/turbodecoder_simd.h"
#include "srslte/phy/utils/vector.h"
#include <inttypes.h>
#define NUMSTATES 8
#define NINPUTS 2
#define TAIL 3
#define TOTALTAIL 12
#define INF 10000
#define ZERO 0
#ifdef LV_HAVE_SSE
#include <smmintrin.h>
// Define SSE/AVX implementations
void map_sse_beta(map_gen_t * s, int16_t * output, uint32_t long_cb);
void map_sse_alpha(map_gen_t * s, uint32_t long_cb);
void map_sse_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb);
#ifdef LV_HAVE_AVX2
void map_avx_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_NPAR], uint32_t long_cb);
void map_avx_alpha(map_gen_t * s, uint32_t long_cb);
void map_avx_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t cbidx, uint32_t long_cb);
#endif
void map_simd_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_NPAR], uint32_t nof_cb, uint32_t long_cb)
{
if (nof_cb == 1) {
map_sse_beta(s, output[0], long_cb);
}
#ifdef LV_HAVE_AVX2
else if (nof_cb == 2) {
map_avx_beta(s, output, long_cb);
}
#endif
}
void map_simd_alpha(map_gen_t * s, uint32_t nof_cb, uint32_t long_cb)
{
if (nof_cb == 1) {
map_sse_alpha(s, long_cb);
}
#ifdef LV_HAVE_AVX2
else if (nof_cb == 2) {
map_avx_alpha(s, long_cb);
}
#endif
}
void map_simd_gamma(map_gen_t * s, int16_t *input, int16_t *app, int16_t *parity, uint32_t cbidx, uint32_t nof_cb, uint32_t long_cb)
{
if (nof_cb == 1) {
map_sse_gamma(s, input, app, parity, long_cb);
}
#ifdef LV_HAVE_AVX2
else if (nof_cb == 2) {
map_avx_gamma(s, input, app, parity, cbidx, long_cb);
}
#endif
}
/* Inititalizes constituent decoder object */
int map_simd_init(map_gen_t * h, uint32_t max_par_cb, uint32_t max_long_cb)
{
bzero(h, sizeof(map_gen_t));
h->max_par_cb = max_par_cb;
h->max_long_cb = max_long_cb;
h->alpha = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES * h->max_par_cb);
if (!h->alpha) {
perror("srslte_vec_malloc");
return -1;
}
h->branch = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES * h->max_par_cb);
if (!h->branch) {
perror("srslte_vec_malloc");
return -1;
}
return 0;
}
void map_simd_free(map_gen_t * h)
{
if (h->alpha) {
free(h->alpha);
}
if (h->branch) {
free(h->branch);
}
bzero(h, sizeof(map_gen_t));
}
/* Runs one instance of a decoder */
void map_simd_dec(map_gen_t * h, int16_t * input[SRSLTE_TDEC_NPAR], int16_t *app[SRSLTE_TDEC_NPAR], int16_t * parity[SRSLTE_TDEC_NPAR],
int16_t *output[SRSLTE_TDEC_NPAR], uint32_t cb_mask, uint32_t long_cb)
{
uint32_t nof_cb = 1;
int16_t *outptr[SRSLTE_TDEC_NPAR];
// Compute branch metrics
switch(cb_mask) {
case 1:
nof_cb = 1;
outptr[0] = output[0];
map_simd_gamma(h, input[0], app?app[0]:NULL, parity[0], 0, 1, long_cb);
break;
case 2:
nof_cb = 1;
outptr[0] = output[1];
map_simd_gamma(h, input[1], app?app[1]:NULL, parity[1], 0, 1, long_cb);
break;
case 3:
nof_cb = 2;
for (int i=0;i<2;i++) {
outptr[i] = output[i];
map_simd_gamma(h, input[i], app?app[i]:NULL, parity[i], i, 2, long_cb);
}
break;
}
// Forward recursion
map_simd_alpha(h, nof_cb, long_cb);
// Backwards recursion + LLR computation
map_simd_beta(h, outptr, nof_cb, long_cb);
}
/* Initializes the turbo decoder object */
int srslte_tdec_simd_init(srslte_tdec_simd_t * h, uint32_t max_par_cb, uint32_t max_long_cb)
{
int ret = -1;
bzero(h, sizeof(srslte_tdec_simd_t));
uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL;
h->max_long_cb = max_long_cb;
h->max_par_cb = max_par_cb;
for (int i=0;i<h->max_par_cb;i++) {
h->app1[i] = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->app1[i]) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->app2[i] = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->app2[i]) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->ext1[i] = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->ext1[i]) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->ext2[i] = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->ext2[i]) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->syst[i] = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->syst[i]) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity0[i] = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->parity0[i]) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity1[i] = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->parity1[i]) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
}
if (map_simd_init(&h->dec, h->max_par_cb, h->max_long_cb)) {
goto clean_and_exit;
}
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
if (srslte_tc_interl_init(&h->interleaver[i], srslte_cbsegm_cbsize(i)) < 0) {
goto clean_and_exit;
}
srslte_tc_interl_LTE_gen(&h->interleaver[i], srslte_cbsegm_cbsize(i));
}
h->current_cbidx = -1;
h->cb_mask = 0;
ret = 0;
clean_and_exit:if (ret == -1) {
srslte_tdec_simd_free(h);
}
return ret;
}
void srslte_tdec_simd_free(srslte_tdec_simd_t * h)
{
for (int i=0;i<h->max_par_cb;i++) {
if (h->app1[i]) {
free(h->app1[i]);
}
if (h->app2[i]) {
free(h->app2[i]);
}
if (h->ext1[i]) {
free(h->ext1[i]);
}
if (h->ext2[i]) {
free(h->ext2[i]);
}
if (h->syst[i]) {
free(h->syst[i]);
}
if (h->parity0[i]) {
free(h->parity0[i]);
}
if (h->parity1[i]) {
free(h->parity1[i]);
}
}
map_simd_free(&h->dec);
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
srslte_tc_interl_free(&h->interleaver[i]);
}
bzero(h, sizeof(srslte_tdec_simd_t));
}
/* Deinterleaves the 3 streams from the input (systematic and 2 parity bits) into
* 3 buffers ready to be used by compute_gamma()
*/
void deinterleave_input_simd(srslte_tdec_simd_t *h, int16_t *input, uint32_t cbidx, uint32_t long_cb) {
uint32_t i;
__m128i *inputPtr = (__m128i*) input;
__m128i in0, in1, in2;
__m128i s0, s1, s2, s;
__m128i p00, p01, p02, p0;
__m128i p10, p11, p12, p1;
__m128i *sysPtr = (__m128i*) h->syst[cbidx];
__m128i *pa0Ptr = (__m128i*) h->parity0[cbidx];
__m128i *pa1Ptr = (__m128i*) h->parity1[cbidx];
// pick bits 0, 3, 6 from 1st word
__m128i s0_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0);
// pick bits 1, 4, 7 from 2st word
__m128i s1_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 2, 5 from 3rd word
__m128i s2_mask = _mm_set_epi8(11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 1, 4, 7 from 1st word
__m128i p00_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,15,14,9,8,3,2);
// pick bits 2, 5, from 2st word
__m128i p01_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 0, 3, 6 from 3rd word
__m128i p02_mask = _mm_set_epi8(13,12,7,6,1,0,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 2, 5 from 1st word
__m128i p10_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4);
// pick bits 0, 3, 6, from 2st word
__m128i p11_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0,0xff,0xff,0xff,0xff);
// pick bits 1, 4, 7 from 3rd word
__m128i p12_mask = _mm_set_epi8(15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
// Split systematic and parity bits
for (i = 0; i < long_cb/8; i++) {
in0 = _mm_load_si128(inputPtr); inputPtr++;
in1 = _mm_load_si128(inputPtr); inputPtr++;
in2 = _mm_load_si128(inputPtr); inputPtr++;
/* Deinterleave Systematic bits */
s0 = _mm_shuffle_epi8(in0, s0_mask);
s1 = _mm_shuffle_epi8(in1, s1_mask);
s2 = _mm_shuffle_epi8(in2, s2_mask);
s = _mm_or_si128(s0, s1);
s = _mm_or_si128(s, s2);
_mm_store_si128(sysPtr, s);
sysPtr++;
/* Deinterleave parity 0 bits */
p00 = _mm_shuffle_epi8(in0, p00_mask);
p01 = _mm_shuffle_epi8(in1, p01_mask);
p02 = _mm_shuffle_epi8(in2, p02_mask);
p0 = _mm_or_si128(p00, p01);
p0 = _mm_or_si128(p0, p02);
_mm_store_si128(pa0Ptr, p0);
pa0Ptr++;
/* Deinterleave parity 1 bits */
p10 = _mm_shuffle_epi8(in0, p10_mask);
p11 = _mm_shuffle_epi8(in1, p11_mask);
p12 = _mm_shuffle_epi8(in2, p12_mask);
p1 = _mm_or_si128(p10, p11);
p1 = _mm_or_si128(p1, p12);
_mm_store_si128(pa1Ptr, p1);
pa1Ptr++;
}
for (i = 0; i < 3; i++) {
h->syst[cbidx][i+long_cb] = input[3*long_cb + 2*i];
h->parity0[cbidx][i+long_cb] = input[3*long_cb + 2*i + 1];
}
for (i = 0; i < 3; i++) {
h->app2[cbidx][i+long_cb] = input[3*long_cb + 6 + 2*i];
h->parity1[cbidx][i+long_cb] = input[3*long_cb + 6 + 2*i + 1];
}
}
/* Runs 1 turbo decoder iteration */
void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_NPAR], uint32_t long_cb)
{
int16_t *tmp_app[SRSLTE_TDEC_NPAR];
if (h->current_cbidx >= 0) {
uint16_t *inter = h->interleaver[h->current_cbidx].forward;
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
#if SRSLTE_TDEC_NPAR == 2
h->cb_mask = (input[0]?1:0) | (input[1]?2:0);
#else
h->cb_mask = input[0]?1:0;
#endif
for (int i=0;i<h->max_par_cb;i++) {
if (h->n_iter[i] == 0 && input[i]) {
//printf("deinterleaveing %d\n",i);
deinterleave_input_simd(h, input[i], i, long_cb);
}
}
// Add apriori information to decoder 1
for (int i=0;i<h->max_par_cb;i++) {
if (h->n_iter[i] > 0 && input[i]) {
srslte_vec_sub_sss(h->app1[i], h->ext1[i], h->app1[i], long_cb);
}
}
// Run MAP DEC #1
for (int i=0;i<h->max_par_cb;i++) {
if (input[i]) {
tmp_app[i] = h->n_iter[i]?h->app1[i]:NULL;
} else {
tmp_app[i] = NULL;
}
}
map_simd_dec(&h->dec, h->syst, tmp_app, h->parity0, h->ext1, h->cb_mask, long_cb);
// Convert aposteriori information into extrinsic information
for (int i=0;i<h->max_par_cb;i++) {
if (h->n_iter[i] > 0 && input[i]) {
srslte_vec_sub_sss(h->ext1[i], h->app1[i], h->ext1[i], long_cb);
}
}
// Interleave extrinsic output of DEC1 to form apriori info for decoder 2
for (int i=0;i<h->max_par_cb;i++) {
if (input[i]) {
srslte_vec_lut_sss(h->ext1[i], deinter, h->app2[i], long_cb);
}
}
// Run MAP DEC #2. 2nd decoder uses apriori information as systematic bits
map_simd_dec(&h->dec, h->app2, NULL, h->parity1, h->ext2, h->cb_mask, long_cb);
// Deinterleaved extrinsic bits become apriori info for decoder 1
for (int i=0;i<h->max_par_cb;i++) {
if (input[i]) {
srslte_vec_lut_sss(h->ext2[i], inter, h->app1[i], long_cb);
}
}
for (int i=0;i<h->max_par_cb;i++) {
if (input[i]) {
h->n_iter[i]++;
}
}
} else {
fprintf(stderr, "Error CB index not set (call srslte_tdec_simd_reset() first\n");
}
}
/* Resets the decoder and sets the codeblock length */
int srslte_tdec_simd_reset(srslte_tdec_simd_t * h, uint32_t long_cb)
{
if (long_cb > h->max_long_cb) {
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
h->max_long_cb);
return -1;
}
for (int i=0;i<h->max_par_cb;i++) {
h->n_iter[i] = 0;
}
h->cb_mask = 0;
h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
if (h->current_cbidx < 0) {
fprintf(stderr, "Invalid CB length %d\n", long_cb);
return -1;
}
return 0;
}
int srslte_tdec_simd_reset_cb(srslte_tdec_simd_t * h, uint32_t cb_idx)
{
h->n_iter[cb_idx] = 0;
return 0;
}
int srslte_tdec_simd_get_nof_iterations_cb(srslte_tdec_simd_t * h, uint32_t cb_idx)
{
return h->n_iter[cb_idx];
}
void tdec_simd_decision(srslte_tdec_simd_t * h, uint8_t *output, uint32_t cbidx, uint32_t long_cb)
{
__m128i zero = _mm_set1_epi16(0);
__m128i lsb_mask = _mm_set1_epi16(1);
__m128i *appPtr = (__m128i*) h->app1[cbidx];
__m128i *outPtr = (__m128i*) output;
__m128i ap, out, out0, out1;
for (uint32_t i = 0; i < long_cb/16; i++) {
ap = _mm_load_si128(appPtr); appPtr++;
out0 = _mm_and_si128(_mm_cmpgt_epi16(ap, zero), lsb_mask);
ap = _mm_load_si128(appPtr); appPtr++;
out1 = _mm_and_si128(_mm_cmpgt_epi16(ap, zero), lsb_mask);
out = _mm_packs_epi16(out0, out1);
_mm_store_si128(outPtr, out);
outPtr++;
}
if (long_cb%16) {
for (int i=0;i<8;i++) {
output[long_cb-8+i] = h->app1[cbidx][long_cb-8+i]>0?1:0;
}
}
}
void srslte_tdec_simd_decision(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t long_cb)
{
for (int i=0;i<h->max_par_cb;i++) {
tdec_simd_decision(h, output[i], i, long_cb);
}
}
void srslte_tdec_simd_decision_byte_cb(srslte_tdec_simd_t * h, uint8_t *output, uint32_t cbidx, uint32_t long_cb)
{
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
// long_cb is always byte aligned
for (uint32_t i = 0; i < long_cb/8; i++) {
uint8_t out0 = h->app1[cbidx][8*i+0]>0?mask[0]:0;
uint8_t out1 = h->app1[cbidx][8*i+1]>0?mask[1]:0;
uint8_t out2 = h->app1[cbidx][8*i+2]>0?mask[2]:0;
uint8_t out3 = h->app1[cbidx][8*i+3]>0?mask[3]:0;
uint8_t out4 = h->app1[cbidx][8*i+4]>0?mask[4]:0;
uint8_t out5 = h->app1[cbidx][8*i+5]>0?mask[5]:0;
uint8_t out6 = h->app1[cbidx][8*i+6]>0?mask[6]:0;
uint8_t out7 = h->app1[cbidx][8*i+7]>0?mask[7]:0;
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
}
}
void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t long_cb)
{
for (int i=0;i<h->max_par_cb;i++) {
srslte_tdec_simd_decision_byte_cb(h, output[i], i, long_cb);
}
}
/* Runs nof_iterations iterations and decides the output bits */
int srslte_tdec_simd_run_all(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_NPAR],
uint32_t nof_iterations, uint32_t long_cb)
{
if (srslte_tdec_simd_reset(h, long_cb)) {
return SRSLTE_ERROR;
}
do {
srslte_tdec_simd_iteration(h, input, long_cb);
} while (h->n_iter[0] < nof_iterations);
srslte_tdec_simd_decision_byte(h, output, long_cb);
return SRSLTE_SUCCESS;
}
#endif

View File

@ -0,0 +1,299 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <math.h>
#include "srslte/phy/fec/turbodecoder_simd_inter.h"
#include "srslte/phy/utils/vector.h"
#define TOTALTAIL 12
#ifdef LV_HAVE_SSE
#include <smmintrin.h>
void map_see_inter_alpha(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, uint32_t long_cb);
void map_sse_inter_beta(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, int16_t * output, uint32_t long_cb);
void sse_inter_update_w(srslte_tdec_simd_inter_t *h, uint16_t *deinter, uint32_t long_cb);
void sse_inter_extract_syst1(srslte_tdec_simd_inter_t *h, uint16_t *inter, uint32_t long_cb);
static void map_sse_inter_dec(srslte_tdec_simd_inter_t * h, int16_t * input, int16_t * parity, int16_t * output,
uint32_t long_cb)
{
map_see_inter_alpha(h, input, parity, long_cb);
map_sse_inter_beta(h, input, parity, output, long_cb);
}
/************************************************
*
* TURBO DECODER INTERFACE
*
************************************************/
int srslte_tdec_simd_inter_init(srslte_tdec_simd_inter_t * h, uint32_t max_par_cb, uint32_t max_long_cb)
{
int ret = -1;
bzero(h, sizeof(srslte_tdec_simd_inter_t));
uint32_t len = max_long_cb + 12;
h->max_long_cb = max_long_cb;
h->max_par_cb = max_par_cb;
h->llr1 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
if (!h->llr1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->llr2 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
if (!h->llr2) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->w = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
if (!h->w) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->syst0 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
if (!h->syst0) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->syst1 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
if (!h->syst1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity0 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
if (!h->parity0) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity1 = srslte_vec_malloc(sizeof(int16_t) * len * h->max_par_cb);
if (!h->parity1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->alpha = srslte_vec_malloc(sizeof(int16_t) * 8*(len+12) * h->max_par_cb);
if (!h->alpha) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
if (srslte_tc_interl_init(&h->interleaver[i], srslte_cbsegm_cbsize(i)) < 0) {
goto clean_and_exit;
}
srslte_tc_interl_LTE_gen(&h->interleaver[i], srslte_cbsegm_cbsize(i));
}
h->current_cbidx = -1;
ret = 0;
clean_and_exit:if (ret == -1) {
srslte_tdec_simd_inter_free(h);
}
return ret;
}
void srslte_tdec_simd_inter_free(srslte_tdec_simd_inter_t * h)
{
if (h->llr1) {
free(h->llr1);
}
if (h->llr2) {
free(h->llr2);
}
if (h->w) {
free(h->w);
}
if (h->syst0) {
free(h->syst0);
}
if (h->syst1) {
free(h->syst1);
}
if (h->parity0) {
free(h->parity0);
}
if (h->parity1) {
free(h->parity1);
}
if (h->alpha) {
free(h->alpha);
}
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
srslte_tc_interl_free(&h->interleaver[i]);
}
bzero(h, sizeof(srslte_tdec_simd_inter_t));
}
/* Deinterleave for inter-frame parallelization */
void extract_input(srslte_tdec_simd_inter_t *h, int16_t *input, uint32_t cbidx, uint32_t long_cb)
{
for (int i=0;i<long_cb;i++) {
h->syst0[h->max_par_cb*i+cbidx] = input[3*i+0];
h->parity0[h->max_par_cb*i+cbidx] = input[3*i+1];
h->parity1[h->max_par_cb*i+cbidx] = input[3*i+2];
}
for (int i = long_cb; i < long_cb + 3; i++) {
h->syst0[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb)];
h->syst1[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb)];
h->parity0[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb) + 1];
h->parity0[h->max_par_cb*i+cbidx] = input[3*long_cb + 2*(i - long_cb) + 2];
}
}
void srslte_tdec_simd_inter_iteration(srslte_tdec_simd_inter_t * h, int16_t *input[SRSLTE_TDEC_NPAR], uint32_t nof_cb, uint32_t long_cb)
{
if (h->current_cbidx >= 0) {
uint16_t *inter = h->interleaver[h->current_cbidx].forward;
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
// Prepare systematic and parity bits for MAP DEC #1
for (int i=0;i<nof_cb;i++) {
if (h->n_iter[i] == 0) {
extract_input(h, input[i], i, long_cb);
}
srslte_vec_sum_sss(h->syst0, h->w, h->syst0, long_cb*h->max_par_cb);
}
// Run MAP DEC #1
map_sse_inter_dec(h, h->syst0, h->parity0, h->llr1, long_cb);
// Prepare systematic and parity bits for MAP DEC #1
sse_inter_extract_syst1(h, inter, long_cb);
// Run MAP DEC #2
map_sse_inter_dec(h, h->syst1, h->parity1, h->llr2, long_cb);
// Update a-priori LLR from the last iteration
sse_inter_update_w(h, deinter, long_cb);
} else {
fprintf(stderr, "Error CB index not set (call srslte_tdec_simd_inter_reset() first\n");
}
}
int srslte_tdec_simd_inter_reset_cb(srslte_tdec_simd_inter_t * h, uint32_t cb_idx)
{
for (int i=0;i<h->current_long_cb;i++) {
h->w[h->max_par_cb*i+cb_idx] = 0;
}
return 0;
}
int srslte_tdec_simd_inter_reset(srslte_tdec_simd_inter_t * h, uint32_t long_cb)
{
if (long_cb > h->max_long_cb) {
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
h->max_long_cb);
return -1;
}
h->current_long_cb = long_cb;
h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
if (h->current_cbidx < 0) {
fprintf(stderr, "Invalid CB length %d\n", long_cb);
return -1;
}
memset(h->w, 0, sizeof(int16_t) * long_cb * h->max_par_cb);
return 0;
}
void srslte_tdec_simd_inter_decision_cb(srslte_tdec_simd_inter_t * h, uint8_t *output, uint32_t cb_idx, uint32_t long_cb)
{
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
uint32_t i;
for (i = 0; i < long_cb; i++) {
output[i] = (h->llr2[h->max_par_cb*deinter[i]+cb_idx] > 0) ? 1 : 0;
}
}
void srslte_tdec_simd_inter_decision(srslte_tdec_simd_inter_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t nof_cb, uint32_t long_cb)
{
for (int i=0;i<nof_cb;i++) {
srslte_tdec_simd_inter_decision_cb(h, output[i], i, long_cb);
}
}
void srslte_tdec_simd_inter_decision_byte_cb(srslte_tdec_simd_inter_t * h, uint8_t *output, uint32_t cb_idx, uint32_t long_cb)
{
uint32_t i;
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
#define indexOf_cb(idx, cb) (h->max_par_cb*(deinter[8*i+idx])+cb)
// long_cb is always byte aligned
for (i = 0; i < long_cb/8; i++) {
uint8_t out0 = h->llr2[indexOf_cb(0, cb_idx)]>0?mask[0]:0;
uint8_t out1 = h->llr2[indexOf_cb(1, cb_idx)]>0?mask[1]:0;
uint8_t out2 = h->llr2[indexOf_cb(2, cb_idx)]>0?mask[2]:0;
uint8_t out3 = h->llr2[indexOf_cb(3, cb_idx)]>0?mask[3]:0;
uint8_t out4 = h->llr2[indexOf_cb(4, cb_idx)]>0?mask[4]:0;
uint8_t out5 = h->llr2[indexOf_cb(5, cb_idx)]>0?mask[5]:0;
uint8_t out6 = h->llr2[indexOf_cb(6, cb_idx)]>0?mask[6]:0;
uint8_t out7 = h->llr2[indexOf_cb(7, cb_idx)]>0?mask[7]:0;
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
}
}
void srslte_tdec_simd_inter_decision_byte(srslte_tdec_simd_inter_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t nof_cb, uint32_t long_cb)
{
for (int i=0;i<nof_cb;i++) {
srslte_tdec_simd_inter_decision_byte_cb(h, output[i], i, long_cb);
}
}
int srslte_tdec_simd_inter_run_all(srslte_tdec_simd_inter_t * h,
int16_t *input[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_NPAR],
uint32_t nof_iterations, uint32_t nof_cb, uint32_t long_cb)
{
uint32_t iter = 0;
if (srslte_tdec_simd_inter_reset(h, long_cb)) {
return SRSLTE_ERROR;
}
do {
srslte_tdec_simd_inter_iteration(h, input, nof_cb, long_cb);
iter++;
} while (iter < nof_iterations);
srslte_tdec_simd_inter_decision_byte(h, output, nof_cb, long_cb);
return SRSLTE_SUCCESS;
}
#endif

View File

@ -31,7 +31,7 @@
#include <strings.h>
#include <math.h>
#include "srslte/phy/fec/turbodecoder_sse.h"
#include "srslte/phy/fec/turbodecoder_simd.h"
#include "srslte/phy/utils/vector.h"
#include <inttypes.h>
@ -52,16 +52,30 @@
#ifdef LV_HAVE_SSE
/*
static void print_128i(__m128i x) {
int16_t *s = (int16_t*) &x;
printf("[%d", s[0]);
for (int i=1;i<8;i++) {
printf(",%d", s[i]);
}
printf("]\n");
}
*/
//#define use_beta_transposed_max
#ifndef use_beta_transposed_max
/* Computes the horizontal MAX from 8 16-bit integers using the minpos_epu16 SSE4.1 instruction */
static inline int16_t hMax(__m128i buffer)
{
__m128i tmp1 = _mm_sub_epi8(_mm_set1_epi16(0x7FFF), buffer);
__m128i tmp1 = _mm_sub_epi16(_mm_set1_epi16(0x7FFF), buffer);
__m128i tmp3 = _mm_minpos_epu16(tmp1);
return (int16_t)(_mm_cvtsi128_si32(tmp3));
}
/* Computes beta values */
void map_gen_beta(map_gen_t * s, int16_t * output, uint32_t long_cb)
void map_sse_beta(map_gen_t * s, int16_t * output, uint32_t long_cb)
{
int k;
uint32_t end = long_cb + 3;
@ -126,15 +140,15 @@ void map_gen_beta(map_gen_t * s, int16_t * output, uint32_t long_cb)
alpha_k = _mm_load_si128(alphaPtr);\
alphaPtr--;\
bp = _mm_add_epi16(bp, alpha_k);\
bn = _mm_add_epi16(bn, alpha_k); output[k-d] = hMax(bn) - hMax(bp);
bn = _mm_add_epi16(bn, alpha_k);\
output[k-d] = hMax(bn)-hMax(bp);
/* The tail does not require to load alpha or produce outputs. Only update
* beta metrics accordingly */
for (k=end-1; k>=long_cb; k--) {
int16_t g0 = s->branch[2*k];
int16_t g1 = s->branch[2*k+1];
g = _mm_set_epi16(g1, g0, g0, g1, g1, g0, g0, g1);
BETA_STEP(g);
}
@ -143,6 +157,7 @@ void map_gen_beta(map_gen_t * s, int16_t * output, uint32_t long_cb)
for (; k >= 0; k-=8) {
gv = _mm_load_si128(gPtr);
gPtr--;
BETA_STEP_CNT(0,0);
BETA_STEP_CNT(1,1);
BETA_STEP_CNT(2,2);
@ -154,14 +169,17 @@ void map_gen_beta(map_gen_t * s, int16_t * output, uint32_t long_cb)
BETA_STEP_CNT(0,4);
BETA_STEP_CNT(1,5);
BETA_STEP_CNT(2,6);
BETA_STEP_CNT(3,7);
BETA_STEP_CNT(3,7);
norm = _mm_shuffle_epi8(beta_k, shuf_norm);
beta_k = _mm_sub_epi16(beta_k, norm);
}
}
#endif
/* Computes alpha metrics */
void map_gen_alpha(map_gen_t * s, uint32_t long_cb)
void map_sse_alpha(map_gen_t * s, uint32_t long_cb)
{
uint32_t k;
int16_t *alpha = s->alpha;
@ -250,9 +268,9 @@ void map_gen_alpha(map_gen_t * s, uint32_t long_cb)
}
/* Compute branch metrics (gamma) */
void map_gen_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb)
void map_sse_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity, uint32_t long_cb)
{
__m128i res10, res20, res11, res21, res1, res2;
__m128i res00, res10, res01, res11, res0, res1;
__m128i in, ap, pa, g1, g0;
__m128i *inPtr = (__m128i*) input;
@ -260,10 +278,10 @@ void map_gen_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity,
__m128i *paPtr = (__m128i*) parity;
__m128i *resPtr = (__m128i*) h->branch;
__m128i res10_mask = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0);
__m128i res20_mask = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8);
__m128i res11_mask = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff);
__m128i res21_mask = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff);
__m128i res00_mask = _mm_set_epi8(0xff,0xff,7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0);
__m128i res10_mask = _mm_set_epi8(0xff,0xff,15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8);
__m128i res01_mask = _mm_set_epi8(7,6,0xff,0xff,5,4,0xff,0xff,3,2,0xff,0xff,1,0,0xff,0xff);
__m128i res11_mask = _mm_set_epi8(15,14,0xff,0xff,13,12,0xff,0xff,11,10,0xff,0xff,9,8,0xff,0xff);
for (int i=0;i<long_cb/8;i++) {
in = _mm_load_si128(inPtr);
@ -283,17 +301,17 @@ void map_gen_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity,
g1 = _mm_srai_epi16(g1, 1);
g0 = _mm_srai_epi16(g0, 1);
res00 = _mm_shuffle_epi8(g0, res00_mask);
res10 = _mm_shuffle_epi8(g0, res10_mask);
res20 = _mm_shuffle_epi8(g0, res20_mask);
res01 = _mm_shuffle_epi8(g1, res01_mask);
res11 = _mm_shuffle_epi8(g1, res11_mask);
res21 = _mm_shuffle_epi8(g1, res21_mask);
res0 = _mm_or_si128(res00, res01);
res1 = _mm_or_si128(res10, res11);
res2 = _mm_or_si128(res20, res21);
_mm_store_si128(resPtr, res1);
_mm_store_si128(resPtr, res0);
resPtr++;
_mm_store_si128(resPtr, res2);
_mm_store_si128(resPtr, res1);
resPtr++;
}
@ -303,356 +321,177 @@ void map_gen_gamma(map_gen_t * h, int16_t *input, int16_t *app, int16_t *parity,
}
}
/* Inititalizes constituent decoder object */
int map_gen_init(map_gen_t * h, int max_long_cb)
{
bzero(h, sizeof(map_gen_t));
h->alpha = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
if (!h->alpha) {
perror("srslte_vec_malloc");
return -1;
}
h->branch = srslte_vec_malloc(sizeof(int16_t) * (max_long_cb + SRSLTE_TCOD_TOTALTAIL + 1) * NUMSTATES);
if (!h->branch) {
perror("srslte_vec_malloc");
return -1;
}
h->max_long_cb = max_long_cb;
return 0;
}
void map_gen_free(map_gen_t * h)
{
if (h->alpha) {
free(h->alpha);
}
if (h->branch) {
free(h->branch);
}
bzero(h, sizeof(map_gen_t));
}
/* Runs one instance of a decoder */
void map_gen_dec(map_gen_t * h, int16_t * input, int16_t *app, int16_t * parity, int16_t * output,
uint32_t long_cb)
{
// Compute branch metrics
map_gen_gamma(h, input, app, parity, long_cb);
// Forward recursion
map_gen_alpha(h, long_cb);
// Backwards recursion + LLR computation
map_gen_beta(h, output, long_cb);
}
/* Initializes the turbo decoder object */
int srslte_tdec_sse_init(srslte_tdec_sse_t * h, uint32_t max_long_cb)
{
int ret = -1;
bzero(h, sizeof(srslte_tdec_sse_t));
uint32_t len = max_long_cb + SRSLTE_TCOD_TOTALTAIL;
h->max_long_cb = max_long_cb;
h->app1 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->app1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->app2 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->app2) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->ext1 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->ext1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->ext2 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->ext2) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->syst = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->syst) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity0 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->parity0) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
h->parity1 = srslte_vec_malloc(sizeof(int16_t) * len);
if (!h->parity1) {
perror("srslte_vec_malloc");
goto clean_and_exit;
}
if (map_gen_init(&h->dec, h->max_long_cb)) {
goto clean_and_exit;
}
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
if (srslte_tc_interl_init(&h->interleaver[i], srslte_cbsegm_cbsize(i)) < 0) {
goto clean_and_exit;
}
srslte_tc_interl_LTE_gen(&h->interleaver[i], srslte_cbsegm_cbsize(i));
}
h->current_cbidx = -1;
ret = 0;
clean_and_exit:if (ret == -1) {
srslte_tdec_sse_free(h);
}
return ret;
}
void srslte_tdec_sse_free(srslte_tdec_sse_t * h)
{
if (h->app1) {
free(h->app1);
}
if (h->app2) {
free(h->app2);
}
if (h->ext1) {
free(h->ext1);
}
if (h->ext2) {
free(h->ext2);
}
if (h->syst) {
free(h->syst);
}
if (h->parity0) {
free(h->parity0);
}
if (h->parity1) {
free(h->parity1);
}
map_gen_free(&h->dec);
for (int i=0;i<SRSLTE_NOF_TC_CB_SIZES;i++) {
srslte_tc_interl_free(&h->interleaver[i]);
}
bzero(h, sizeof(srslte_tdec_sse_t));
}
/* Deinterleaves the 3 streams from the input (systematic and 2 parity bits) into
* 3 buffers ready to be used by compute_gamma()
/***********************
*
* This is an attempt to parallelize the horizontal max
* by doing a 8x8 tranpose of the vectors and computing max
* in cascade. However since we need to store 16 registers
* for the positive and negative values the performance is not very good
*/
void deinterleave_input(srslte_tdec_sse_t *h, int16_t *input, uint32_t long_cb) {
uint32_t i;
#ifdef use_beta_transposed_max
static inline __m128i transposed_max(__m128i a, __m128i b, __m128i c, __m128i d,
__m128i e, __m128i f, __m128i g, __m128i h)
{
// Transpose 8 vectors
__m128i t0 = _mm_unpacklo_epi16(a, b);
__m128i t1 = _mm_unpacklo_epi16(c, d);
__m128i t2 = _mm_unpacklo_epi16(e, f);
__m128i t3 = _mm_unpacklo_epi16(g, h);
__m128i t4 = _mm_unpackhi_epi16(a, b);
__m128i t5 = _mm_unpackhi_epi16(c, d);
__m128i t6 = _mm_unpackhi_epi16(e, f);
__m128i t7 = _mm_unpackhi_epi16(g, h);
__m128i s0 = _mm_unpacklo_epi32(t0, t1);
__m128i s1 = _mm_unpackhi_epi32(t0, t1);
__m128i s2 = _mm_unpacklo_epi32(t2, t3);
__m128i s3 = _mm_unpackhi_epi32(t2, t3);
__m128i s4 = _mm_unpacklo_epi32(t4, t5);
__m128i s5 = _mm_unpackhi_epi32(t4, t5);
__m128i s6 = _mm_unpacklo_epi32(t6, t7);
__m128i s7 = _mm_unpackhi_epi32(t6, t7);
__m128i x0 = _mm_unpacklo_epi64(s0, s2);
__m128i x1 = _mm_unpackhi_epi64(s0, s2);
__m128i x2 = _mm_unpacklo_epi64(s1, s3);
__m128i x3 = _mm_unpackhi_epi64(s1, s3);
__m128i x4 = _mm_unpacklo_epi64(s4, s6);
__m128i x5 = _mm_unpackhi_epi64(s4, s6);
__m128i x6 = _mm_unpacklo_epi64(s5, s7);
__m128i x7 = _mm_unpackhi_epi64(s5, s7);
// Cascade max on the transposed vector
__m128i res = _mm_max_epi16(x0,
_mm_max_epi16(x1,
_mm_max_epi16(x2,
_mm_max_epi16(x3,
_mm_max_epi16(x4,
_mm_max_epi16(x5,
_mm_max_epi16(x6,
x7)))))));
return res;
}
void map_sse_beta(map_gen_t * s, int16_t * output, uint32_t long_cb)
{
int k;
uint32_t end = long_cb + 3;
const __m128i *alphaPtr = (const __m128i*) s->alpha;
__m128i *inputPtr = (__m128i*) input;
__m128i in0, in1, in2;
__m128i s0, s1, s2, s;
__m128i p00, p01, p02, p0;
__m128i p10, p11, p12, p1;
__m128i beta_k = _mm_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0);
__m128i g, alpha_k;
__m128i bn, bn_0, bn_1, bn_2, bn_3, bn_4, bn_5, bn_6, bn_7;
__m128i bp, bp_0, bp_1, bp_2, bp_3, bp_4, bp_5, bp_6, bp_7;
__m128i *sysPtr = (__m128i*) h->syst;
__m128i *pa0Ptr = (__m128i*) h->parity0;
__m128i *pa1Ptr = (__m128i*) h->parity1;
// pick bits 0, 3, 6 from 1st word
__m128i s0_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0);
// pick bits 1, 4, 7 from 2st word
__m128i s1_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 2, 5 from 3rd word
__m128i s2_mask = _mm_set_epi8(11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
/* Define the shuffle constant for the positive beta */
__m128i shuf_bp = _mm_set_epi8(
15, 14, // 7
7, 6, // 3
5, 4, // 2
13, 12, // 6
11, 10, // 5
3, 2, // 1
1, 0, // 0
9, 8 // 4
);
// pick bits 1, 4, 7 from 1st word
__m128i p00_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,15,14,9,8,3,2);
// pick bits 2, 5, from 2st word
__m128i p01_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4,0xff,0xff,0xff,0xff,0xff,0xff);
// pick bits 0, 3, 6 from 3rd word
__m128i p02_mask = _mm_set_epi8(13,12,7,6,1,0,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
/* Define the shuffle constant for the negative beta */
__m128i shuf_bn = _mm_set_epi8(
7, 6, // 3
15, 14, // 7
13, 12, // 6
5, 4, // 2
3, 2, // 1
11, 10, // 5
9, 8, // 4
1, 0 // 0
);
alphaPtr += long_cb-1;
/* Define shuffle for branch costs */
__m128i shuf_g[4];
shuf_g[3] = _mm_set_epi8(3,2,1,0,1,0,3,2,3,2,1,0,1,0,3,2);
shuf_g[2] = _mm_set_epi8(7,6,5,4,5,4,7,6,7,6,5,4,5,4,7,6);
shuf_g[1] = _mm_set_epi8(11,10,9,8,9,8,11,10,11,10,9,8,9,8,11,10);
shuf_g[0] = _mm_set_epi8(15,14,13,12,13,12,15,14,15,14,13,12,13,12,15,14);
__m128i gv;
int16_t *b = &s->branch[2*long_cb-8];
__m128i *gPtr = (__m128i*) b;
/* Define shuffle for beta normalization */
__m128i shuf_norm = _mm_set_epi8(1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0);
// pick bits 2, 5 from 1st word
__m128i p10_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,11,10,5,4);
// pick bits 0, 3, 6, from 2st word
__m128i p11_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0,0xff,0xff,0xff,0xff);
// pick bits 1, 4, 7 from 3rd word
__m128i p12_mask = _mm_set_epi8(15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
// Split systematic and parity bits
for (i = 0; i < long_cb/8; i++) {
in0 = _mm_load_si128(inputPtr); inputPtr++;
in1 = _mm_load_si128(inputPtr); inputPtr++;
in2 = _mm_load_si128(inputPtr); inputPtr++;
/* This defines a beta computation step:
* Adds and substracts the branch metrics to the previous beta step,
* shuffles the states according to the trellis path and selects maximum state
*/
#define BETA_STEP(g) bp = _mm_add_epi16(beta_k, g);\
bn = _mm_sub_epi16(beta_k, g);\
bp = _mm_shuffle_epi8(bp, shuf_bp);\
bn = _mm_shuffle_epi8(bn, shuf_bn);\
beta_k = _mm_max_epi16(bp, bn);
/* Loads the alpha metrics from memory and adds them to the temporal bn and bp
* metrics.
*/
#define BETA_STEP_CNT(c,d) g = _mm_shuffle_epi8(gv, shuf_g[c]);\
BETA_STEP(g)\
alpha_k = _mm_load_si128(alphaPtr);\
alphaPtr--;\
bp_##d = _mm_add_epi16(bp, alpha_k);\
bn_##d = _mm_add_epi16(bn, alpha_k);\
/* The tail does not require to load alpha or produce outputs. Only update
* beta metrics accordingly */
for (k=end-1; k>=long_cb; k--) {
int16_t g0 = s->branch[2*k];
int16_t g1 = s->branch[2*k+1];
g = _mm_set_epi16(g1, g0, g0, g1, g1, g0, g0, g1);
BETA_STEP(g);
}
/* We inline 2 trelis steps for each normalization */
__m128i norm;
__m128i *outPtr = (__m128i*) &output[long_cb-8];
for (; k >= 0; k-=8) {
gv = _mm_load_si128(gPtr);
gPtr--;
/* Deinterleave Systematic bits */
s0 = _mm_shuffle_epi8(in0, s0_mask);
s1 = _mm_shuffle_epi8(in1, s1_mask);
s2 = _mm_shuffle_epi8(in2, s2_mask);
s = _mm_or_si128(s0, s1);
s = _mm_or_si128(s, s2);
_mm_store_si128(sysPtr, s);
sysPtr++;
/* Deinterleave parity 0 bits */
p00 = _mm_shuffle_epi8(in0, p00_mask);
p01 = _mm_shuffle_epi8(in1, p01_mask);
p02 = _mm_shuffle_epi8(in2, p02_mask);
p0 = _mm_or_si128(p00, p01);
p0 = _mm_or_si128(p0, p02);
BETA_STEP_CNT(0,0);
BETA_STEP_CNT(1,1);
BETA_STEP_CNT(2,2);
BETA_STEP_CNT(3,3);
norm = _mm_shuffle_epi8(beta_k, shuf_norm);
beta_k = _mm_sub_epi16(beta_k, norm);
gv = _mm_load_si128(gPtr);
gPtr--;
BETA_STEP_CNT(0,4);
BETA_STEP_CNT(1,5);
BETA_STEP_CNT(2,6);
BETA_STEP_CNT(3,7);
norm = _mm_shuffle_epi8(beta_k, shuf_norm);
beta_k = _mm_sub_epi16(beta_k, norm);
_mm_store_si128(pa0Ptr, p0);
pa0Ptr++;
/* Deinterleave parity 1 bits */
p10 = _mm_shuffle_epi8(in0, p10_mask);
p11 = _mm_shuffle_epi8(in1, p11_mask);
p12 = _mm_shuffle_epi8(in2, p12_mask);
p1 = _mm_or_si128(p10, p11);
p1 = _mm_or_si128(p1, p12);
_mm_store_si128(pa1Ptr, p1);
pa1Ptr++;
}
for (i = 0; i < 3; i++) {
h->syst[i+long_cb] = input[3*long_cb + 2*i];
h->parity0[i+long_cb] = input[3*long_cb + 2*i + 1];
}
for (i = 0; i < 3; i++) {
h->app2[i+long_cb] = input[3*long_cb + 6 + 2*i];
h->parity1[i+long_cb] = input[3*long_cb + 6 + 2*i + 1];
}
__m128i bn_transp = transposed_max(bn_7, bn_6, bn_5, bn_4, bn_3, bn_2, bn_1, bn_0);
__m128i bp_transp = transposed_max(bp_7, bp_6, bp_5, bp_4, bp_3, bp_2, bp_1, bp_0);
__m128i outval = _mm_sub_epi16(bp_transp,bn_transp);
_mm_store_si128(outPtr, outval);
outPtr--;
}
}
#endif
/* Runs 1 turbo decoder iteration */
void srslte_tdec_sse_iteration(srslte_tdec_sse_t * h, int16_t * input, uint32_t long_cb)
{
if (h->current_cbidx >= 0) {
uint16_t *inter = h->interleaver[h->current_cbidx].forward;
uint16_t *deinter = h->interleaver[h->current_cbidx].reverse;
if (h->n_iter == 0) {
deinterleave_input(h, input, long_cb);
}
// Add apriori information to decoder 1
if (h->n_iter > 0) {
srslte_vec_sub_sss(h->app1, h->ext1, h->app1, long_cb);
}
// Run MAP DEC #1
if (h->n_iter == 0) {
map_gen_dec(&h->dec, h->syst, NULL, h->parity0, h->ext1, long_cb);
} else {
map_gen_dec(&h->dec, h->syst, h->app1, h->parity0, h->ext1, long_cb);
}
// Convert aposteriori information into extrinsic information
if (h->n_iter > 0) {
srslte_vec_sub_sss(h->ext1, h->app1, h->ext1, long_cb);
}
// Interleave extrinsic output of DEC1 to form apriori info for decoder 2
srslte_vec_lut_sss(h->ext1, deinter, h->app2, long_cb);
// Run MAP DEC #2. 2nd decoder uses apriori information as systematic bits
map_gen_dec(&h->dec, h->app2, NULL, h->parity1, h->ext2, long_cb);
// Deinterleaved extrinsic bits become apriori info for decoder 1
srslte_vec_lut_sss(h->ext2, inter, h->app1, long_cb);
h->n_iter++;
} else {
fprintf(stderr, "Error CB index not set (call srslte_tdec_sse_reset() first\n");
}
}
/* Resets the decoder and sets the codeblock length */
int srslte_tdec_sse_reset(srslte_tdec_sse_t * h, uint32_t long_cb)
{
if (long_cb > h->max_long_cb) {
fprintf(stderr, "TDEC was initialized for max_long_cb=%d\n",
h->max_long_cb);
return -1;
}
h->n_iter = 0;
h->current_cbidx = srslte_cbsegm_cbindex(long_cb);
if (h->current_cbidx < 0) {
fprintf(stderr, "Invalid CB length %d\n", long_cb);
return -1;
}
return 0;
}
void srslte_tdec_sse_decision(srslte_tdec_sse_t * h, uint8_t *output, uint32_t long_cb)
{
__m128i zero = _mm_set1_epi16(0);
__m128i lsb_mask = _mm_set1_epi16(1);
__m128i *appPtr = (__m128i*) h->app1;
__m128i *outPtr = (__m128i*) output;
__m128i ap, out, out0, out1;
for (uint32_t i = 0; i < long_cb/16; i++) {
ap = _mm_load_si128(appPtr); appPtr++;
out0 = _mm_and_si128(_mm_cmpgt_epi16(ap, zero), lsb_mask);
ap = _mm_load_si128(appPtr); appPtr++;
out1 = _mm_and_si128(_mm_cmpgt_epi16(ap, zero), lsb_mask);
out = _mm_packs_epi16(out0, out1);
_mm_store_si128(outPtr, out);
outPtr++;
}
if (long_cb%16) {
for (int i=0;i<8;i++) {
output[long_cb-8+i] = h->app1[long_cb-8+i]>0?1:0;
}
}
}
void srslte_tdec_sse_decision_byte(srslte_tdec_sse_t * h, uint8_t *output, uint32_t long_cb)
{
uint8_t mask[8] = {0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1};
// long_cb is always byte aligned
for (uint32_t i = 0; i < long_cb/8; i++) {
uint8_t out0 = h->app1[8*i+0]>0?mask[0]:0;
uint8_t out1 = h->app1[8*i+1]>0?mask[1]:0;
uint8_t out2 = h->app1[8*i+2]>0?mask[2]:0;
uint8_t out3 = h->app1[8*i+3]>0?mask[3]:0;
uint8_t out4 = h->app1[8*i+4]>0?mask[4]:0;
uint8_t out5 = h->app1[8*i+5]>0?mask[5]:0;
uint8_t out6 = h->app1[8*i+6]>0?mask[6]:0;
uint8_t out7 = h->app1[8*i+7]>0?mask[7]:0;
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
}
}
/* Runs nof_iterations iterations and decides the output bits */
int srslte_tdec_sse_run_all(srslte_tdec_sse_t * h, int16_t * input, uint8_t *output,
uint32_t nof_iterations, uint32_t long_cb)
{
if (srslte_tdec_sse_reset(h, long_cb)) {
return SRSLTE_ERROR;
}
do {
srslte_tdec_sse_iteration(h, input, long_cb);
} while (h->n_iter < nof_iterations);
srslte_tdec_sse_decision_byte(h, output, long_cb);
return SRSLTE_SUCCESS;
}
#endif

View File

@ -0,0 +1,198 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <math.h>
#include "srslte/phy/fec/turbodecoder_simd_inter.h"
#include "srslte/phy/utils/vector.h"
#define NCB 8
#define INF 10000
#ifdef LV_HAVE_SSE
#include <smmintrin.h>
void sse_inter_extract_syst1(srslte_tdec_simd_inter_t *h, uint16_t *inter, uint32_t long_cb)
{
__m128i *llr1Ptr = (__m128i*) h->llr1;
__m128i *wPtr = (__m128i*) h->w;
__m128i *syst1Ptr = (__m128i*) h->syst1;
for (int i = 0; i < long_cb; i++) {
__m128i llr1 = _mm_load_si128(&llr1Ptr[inter[i]]);
__m128i w = _mm_load_si128(&wPtr[inter[i]]);
_mm_store_si128(syst1Ptr++, _mm_sub_epi16(llr1, w));
}
}
void sse_inter_update_w(srslte_tdec_simd_inter_t *h, uint16_t *deinter, uint32_t long_cb)
{
__m128i *llr1Ptr = (__m128i*) h->llr1;
__m128i *llr2Ptr = (__m128i*) h->llr2;
__m128i *wPtr = (__m128i*) h->w;
__m128i *syst1Ptr = (__m128i*) h->syst1;
for (int i = 0; i < long_cb; i++) {
__m128i llr1 = _mm_load_si128(llr1Ptr++);
__m128i w = _mm_load_si128(wPtr++);
__m128i llr2 = _mm_load_si128(&llr2Ptr[deinter[i]]);
_mm_store_si128(syst1Ptr++, _mm_add_epi16(w, _mm_sub_epi16(llr2, llr1)));
}
}
/* Computes beta values */
void map_sse_inter_beta(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, int16_t * output, uint32_t long_cb)
{
__m128i m_b[8], new[8], old[8], max1[8], max0[8];
__m128i x, y, xy;
__m128i m1, m0;
uint32_t end = long_cb + 3;
uint32_t i;
__m128i *inputPtr = (__m128i*) input;
__m128i *parityPtr = (__m128i*) parity;
__m128i *outputPtr = (__m128i*) output;
__m128i *alphaPtr = (__m128i*) s->alpha;
for (int k = end - 1; k >= 0; k--) {
x = _mm_load_si128(inputPtr++);
y = _mm_load_si128(parityPtr++);
xy = _mm_add_epi16(x,y);
m_b[0] = _mm_add_epi16(old[4], xy);
m_b[1] = old[4];
m_b[2] = _mm_add_epi16(old[5], y);
m_b[3] = _mm_add_epi16(old[5], x);
m_b[4] = _mm_add_epi16(old[6], x);
m_b[5] = _mm_add_epi16(old[6], y);
m_b[6] = old[7];
m_b[7] = _mm_add_epi16(old[7], xy);
new[0] = old[0];
new[1] = _mm_add_epi16(old[0], xy);
new[2] = _mm_add_epi16(old[1], x);
new[3] = _mm_add_epi16(old[1], y);
new[4] = _mm_add_epi16(old[2], y);
new[5] = _mm_add_epi16(old[2], x);
new[6] = _mm_add_epi16(old[3], xy);
new[7] = old[3];
for (i = 0; i < 8; i++) {
__m128i alpha = _mm_load_si128(alphaPtr++);
max0[i] = _mm_add_epi16(alpha, m_b[i]);
max1[i] = _mm_add_epi16(alpha, new[i]);
}
m1 = _mm_max_epi16(max1[0], max1[1]);
m0 = _mm_max_epi16(max0[0], max0[1]);
for (i = 2; i < 8; i++) {
m1 = _mm_max_epi16(m1, max1[i]);
m0 = _mm_max_epi16(m0, max0[i]);
}
for (i = 0; i < 8; i++) {
new[i] = _mm_max_epi16(m_b[i], new[i]);
old[i] = new[i];
}
__m128i out = _mm_sub_epi16(m1, m0);
_mm_store_si128(outputPtr++, out);
// normalize
if ((k%4)==0) {
for (int i=1;i<8;i++) {
_mm_sub_epi16(old[i], old[0]);
}
}
}
}
/* Computes alpha metrics */
void map_see_inter_alpha(srslte_tdec_simd_inter_t * s, int16_t *input, int16_t *parity, uint32_t long_cb)
{
__m128i m_b[8], new[8], old[8];
__m128i x, y, xy;
uint32_t k;
__m128i *inputPtr = (__m128i*) input;
__m128i *parityPtr = (__m128i*) parity;
__m128i *alphaPtr = (__m128i*) s->alpha;
old[0] = _mm_set1_epi16(0);
for (int i = 1; i < 8; i++) {
old[i] = _mm_set1_epi16(-INF);
}
for (k = 0; k < long_cb; k++) {
x = _mm_load_si128(inputPtr++);
y = _mm_load_si128(parityPtr++);
xy = _mm_add_epi16(x,y);
m_b[0] = old[0];
m_b[1] = _mm_add_epi16(old[3], y);
m_b[2] = _mm_add_epi16(old[4], y);
m_b[3] = old[7];
m_b[4] = old[1];
m_b[5] = _mm_add_epi16(old[2], y);
m_b[6] = _mm_add_epi16(old[5], y);
m_b[7] = old[6];
new[0] = _mm_add_epi16(old[1], xy);
new[1] = _mm_add_epi16(old[2], x);
new[2] = _mm_add_epi16(old[5], x);
new[3] = _mm_add_epi16(old[6], xy);
new[4] = _mm_add_epi16(old[0], xy);
new[5] = _mm_add_epi16(old[3], x);
new[6] = _mm_add_epi16(old[4], x);
new[7] = _mm_add_epi16(old[7], xy);
for (int i = 0; i < 8; i++) {
new[i] = _mm_max_epi16(m_b[i], new[i]);
old[i] = new[i];
_mm_store_si128(alphaPtr++, old[i]);
}
// normalize
if ((k%4)==0) {
for (int i=1;i<8;i++) {
_mm_sub_epi16(old[i], old[0]);
}
}
}
}
#endif

View File

@ -122,7 +122,7 @@ void free37_sse(void *o) {
#ifdef LV_HAVE_AVX
#ifdef LV_HAVE_AVX2
int decode37_avx2(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) {
srslte_viterbi_t *q = o;
@ -333,7 +333,7 @@ int init37_neon(srslte_viterbi_t *q, int poly[3], uint32_t framebits, bool tail_
#endif
#ifdef LV_HAVE_AVX
#ifdef LV_HAVE_AVX2
int init37_avx2(srslte_viterbi_t *q, int poly[3], uint32_t framebits, bool tail_biting) {
q->K = 7;
q->R = 3;
@ -383,7 +383,7 @@ int srslte_viterbi_init(srslte_viterbi_t *q, srslte_viterbi_type_t type, int pol
switch (type) {
case SRSLTE_VITERBI_37:
#ifdef LV_HAVE_SSE
#ifdef LV_HAVE_AVX
#ifdef LV_HAVE_AVX2
return init37_avx2(q, poly, max_frame_length, tail_bitting);
#else
return init37_sse(q, poly, max_frame_length, tail_bitting);
@ -408,7 +408,7 @@ int srslte_viterbi_init_sse(srslte_viterbi_t *q, srslte_viterbi_type_t type, int
}
#endif
#ifdef LV_HAVE_AVX
#ifdef LV_HAVE_AVX2
int srslte_viterbi_init_avx2(srslte_viterbi_t *q, srslte_viterbi_type_t type, int poly[3], uint32_t max_frame_length, bool tail_bitting)
{
return init37_avx2(q, poly, max_frame_length, tail_bitting);

View File

@ -14,7 +14,7 @@
//#define DEBUG
#ifdef LV_HAVE_SSE
#ifdef LV_HAVE_AVX2
#include <emmintrin.h>
#include <tmmintrin.h>

View File

@ -52,7 +52,7 @@ void srslte_filesink_free(srslte_filesink_t *q) {
}
int srslte_filesink_write(srslte_filesink_t *q, void *buffer, int nsamples) {
int i;
int i = 0;
float *fbuf = (float*) buffer;
_Complex float *cbuf = (_Complex float*) buffer;
_Complex short *sbuf = (_Complex short*) buffer;

View File

@ -111,6 +111,10 @@ int srslte_pbch_cp(cf_t *input, cf_t *output, srslte_cell_t cell, bool put) {
* Returns the number of symbols written to slot1_data
*
* 36.211 10.3 section 6.6.4
*
* @param[in] pbch PBCH complex symbols to place in slot1_data
* @param[out] slot1_data Complex symbol buffer for slot1
* @param[in] cell Cell configuration
*/
int srslte_pbch_put(cf_t *pbch, cf_t *slot1_data, srslte_cell_t cell) {
return srslte_pbch_cp(pbch, slot1_data, cell, true);
@ -122,6 +126,10 @@ int srslte_pbch_put(cf_t *pbch, cf_t *slot1_data, srslte_cell_t cell) {
* Returns the number of symbols written to pbch
*
* 36.211 10.3 section 6.6.4
*
* @param[in] slot1_data Complex symbols for slot1
* @param[out] pbch Extracted complex PBCH symbols
* @param[in] cell Cell configuration
*/
int srslte_pbch_get(cf_t *slot1_data, cf_t *pbch, srslte_cell_t cell) {
return srslte_pbch_cp(slot1_data, pbch, cell, false);
@ -244,8 +252,12 @@ void srslte_pbch_free(srslte_pbch_t *q) {
}
/** Unpacks MIB from PBCH message.
* msg buffer must be 24 byte length at least
/**
* Unpacks MIB from PBCH message.
*
* @param[in] msg PBCH in an unpacked bit array of size 24
* @param[out] sfn System frame number
* @param[out] cell MIB information about PHICH and system bandwidth will be saved here
*/
void srslte_pbch_mib_unpack(uint8_t *msg, srslte_cell_t *cell, uint32_t *sfn) {
int phich_res;
@ -289,8 +301,12 @@ void srslte_pbch_mib_unpack(uint8_t *msg, srslte_cell_t *cell, uint32_t *sfn) {
}
}
/** Unpacks MIB from PBCH message.
* msg buffer must be 24 byte length at least
/**
* Packs MIB to PBCH message.
*
* @param[out] payload Output unpacked bit array of size 24
* @param[in] sfn System frame number
* @param[in] cell Cell configuration to be encoded in MIB
*/
void srslte_pbch_mib_pack(srslte_cell_t *cell, uint32_t sfn, uint8_t *payload) {
int bw, phich_res = 0;

View File

@ -450,11 +450,11 @@ int srslte_pdsch_decode_multi(srslte_pdsch_t *q,
if (SRSLTE_VERBOSE_ISDEBUG()) {
DEBUG("SAVED FILE subframe.dat: received subframe symbols\n",0);
srslte_vec_save_file("subframe.dat", sf_symbols, SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t));
srslte_vec_save_file("subframe.dat", sf_symbols[0], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t));
DEBUG("SAVED FILE hest0.dat and hest1.dat: channel estimates for port 0 and port 1\n",0);
srslte_vec_save_file("hest0.dat", ce[0], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t));
srslte_vec_save_file("hest0.dat", ce[0][0], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t));
if (q->cell.nof_ports > 1) {
srslte_vec_save_file("hest1.dat", ce[1], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t));
srslte_vec_save_file("hest1.dat", ce[1][0], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t));
}
DEBUG("SAVED FILE pdsch_symbols.dat: symbols after equalization\n",0);
srslte_vec_save_file("pdsch_symbols.dat", q->d, cfg->nbits.nof_re*sizeof(cf_t));

View File

@ -295,6 +295,11 @@ uint32_t srslte_ra_dl_grant_nof_re(srslte_ra_dl_grant_t *grant, srslte_cell_t ce
return nof_re;
}
/** Compute PRB allocation for Downlink as defined in 7.1.6 of 36.213
* Decode dci->type?_alloc to grant
* This function only reads dci->type?_alloc and dci->alloc_type fields.
* This function only writes grant->prb_idx and grant->nof_prb.
*/
/** Compute PRB allocation for Downlink as defined in 7.1.6 of 36.213 */
int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_dl_grant_t *grant, uint32_t nof_prb) {
int i, j;
@ -427,7 +432,7 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_
return SRSLTE_SUCCESS;
}
static int dl_fill_ra_mcs(srslte_ra_mcs_t *mcs, uint32_t nprb) {
int dl_fill_ra_mcs(srslte_ra_mcs_t *mcs, uint32_t nprb) {
uint32_t i_tbs = 0;
int tbs = -1;
if (mcs->idx < 10) {
@ -461,7 +466,12 @@ static int dl_fill_ra_mcs(srslte_ra_mcs_t *mcs, uint32_t nprb) {
return tbs;
}
/* Modulation order and transport block size determination 7.1.7 in 36.213 */
/* Modulation order and transport block size determination 7.1.7 in 36.213
* This looks at DCI type, type of RNTI and reads fields dci->type?_alloc, dci->mcs_idx,
* dci->dci_is_1a and dci->dci_is_1c
* Reads global variable last_dl_tbs if mcs>=29
* Writes global variable last_dl_tbs if mcs<29
* */
static int dl_dci_to_grant_mcs(srslte_ra_dl_dci_t *dci, srslte_ra_dl_grant_t *grant, bool crc_is_crnti) {
uint32_t n_prb=0;
int tbs = -1;

View File

@ -89,7 +89,7 @@ int regs_pdcch_init(srslte_regs_t *h) {
bzero(&h->pdcch, sizeof(srslte_regs_ch_t));
for (cfi=0;cfi<3;cfi++) {
if (h->cell.nof_prb < 10) {
if (h->cell.nof_prb <= 10) {
nof_ctrl_symbols = cfi+2;
} else {
nof_ctrl_symbols = cfi+1;
@ -673,7 +673,7 @@ void srslte_regs_free(srslte_regs_t *h) {
int srslte_regs_set_cfi(srslte_regs_t *h, uint32_t cfi) {
if (cfi > 0 && cfi <= 3) {
if (h->phich_len == SRSLTE_PHICH_EXT &&
((h->cell.nof_prb < 10 && cfi < 2) || (h->cell.nof_prb >= 10 && cfi < 3))) {
((h->cell.nof_prb <= 10 && cfi < 2) || (h->cell.nof_prb >= 10 && cfi < 3))) {
fprintf(stderr, "PHICH length is extended. The number of control symbols should be at least 3.\n");
return SRSLTE_ERROR_INVALID_INPUTS;
} else {
@ -705,7 +705,7 @@ int srslte_regs_init(srslte_regs_t *h, srslte_cell_t cell) {
bzero(h, sizeof(srslte_regs_t));
ret = SRSLTE_ERROR;
max_ctrl_symbols = cell.nof_prb<10?4:3;
max_ctrl_symbols = cell.nof_prb<=10?4:3;
vo = cell.id % 3;
h->cell = cell;
h->max_ctrl_symbols = max_ctrl_symbols;

View File

@ -311,102 +311,91 @@ static int encode_tb(srslte_sch_t *q,
return encode_tb_off(q, soft_buffer, cb_segm, Qm, rv, nof_e_bits, data, e_bits, 0);
}
/* Decode a transport block according to 36.212 5.3.2
*
*/
static int decode_tb(srslte_sch_t *q,
bool decode_tb_cb(srslte_sch_t *q,
srslte_softbuffer_rx_t *softbuffer, srslte_cbsegm_t *cb_segm,
uint32_t Qm, uint32_t rv, uint32_t nof_e_bits,
int16_t *e_bits, uint8_t *data)
int16_t *e_bits, uint8_t *data,
uint32_t cb_size_group)
{
uint8_t parity[3] = {0, 0, 0};
uint32_t par_rx, par_tx;
uint32_t i;
uint32_t cb_len, rp, wp, rlen, n_e;
bool cb_map[SRSLTE_MAX_CODEBLOCKS];
uint32_t cb_idx[SRSLTE_TDEC_NPAR];
int16_t *decoder_input[SRSLTE_TDEC_NPAR];
if (q != NULL &&
data != NULL &&
softbuffer != NULL &&
e_bits != NULL &&
cb_segm != NULL)
{
uint32_t nof_cb = cb_size_group?cb_segm->C2:cb_segm->C1;
uint32_t first_cb = cb_size_group?cb_segm->C1:0;
uint32_t cb_len = cb_size_group?cb_segm->K2:cb_segm->K1;
uint32_t cb_len_idx = cb_size_group?cb_segm->K2_idx:cb_segm->K1_idx;
if (cb_segm->tbs == 0 || cb_segm->C == 0) {
return SRSLTE_SUCCESS;
}
uint32_t rlen = cb_segm->C==1?cb_len:(cb_len-24);
uint32_t Gp = nof_e_bits / Qm;
uint32_t gamma = cb_segm->C>0?Gp%cb_segm->C:Gp;
uint32_t n_e = Qm * (Gp/cb_segm->C);
if (nof_cb > SRSLTE_MAX_CODEBLOCKS) {
fprintf(stderr, "Error SRSLTE_MAX_CODEBLOCKS=%d\n", SRSLTE_MAX_CODEBLOCKS);
return false;
}
for (int i=0;i<SRSLTE_TDEC_NPAR;i++) {
cb_idx[i] = i+first_cb;
decoder_input[i] = false;
}
for (int i=0;i<nof_cb;i++) {
cb_map[i] = false;
}
rp = 0;
rp = 0;
wp = 0;
uint32_t Gp = nof_e_bits / Qm;
uint32_t gamma=Gp;
if (cb_segm->F) {
fprintf(stderr, "Error filler bits are not supported. Use standard TBS\n");
return SRSLTE_ERROR;
}
if (cb_segm->C > softbuffer->max_cb) {
fprintf(stderr, "Error number of CB (%d) exceeds soft buffer size (%d CBs)\n", cb_segm->C, softbuffer->max_cb);
return -1;
}
if (cb_segm->C>0) {
gamma = Gp%cb_segm->C;
}
bool early_stop = true;
for (i = 0; i < cb_segm->C && early_stop; i++) {
/* Get read/write lengths */
uint32_t cblen_idx;
if (i < cb_segm->C2) {
cb_len = cb_segm->K2;
cblen_idx = cb_segm->K2_idx;
} else {
cb_len = cb_segm->K1;
cblen_idx = cb_segm->K1_idx;
}
srslte_tdec_reset(&q->decoder, cb_len);
uint32_t remaining_cb = nof_cb;
while(remaining_cb>0) {
// Unratematch the codeblocks left to decode
for (int i=0;i<SRSLTE_TDEC_NPAR;i++) {
if (cb_segm->C == 1) {
rlen = cb_len;
} else {
rlen = cb_len - 24;
if (!decoder_input[i] && remaining_cb > 0) {
// Find an unprocessed CB
cb_idx[i]=first_cb;
while(cb_idx[i]<first_cb+nof_cb-1 && cb_map[cb_idx[i]]) {
cb_idx[i]++;
}
if (cb_map[cb_idx[i]] == false) {
cb_map[cb_idx[i]] = true;
uint32_t rp = cb_idx[i]*n_e;
uint32_t n_e2 = n_e;
if (cb_idx[i] > cb_segm->C - gamma) {
n_e2 = n_e+Qm;
rp = (cb_segm->C - gamma)*n_e + (cb_idx[i]-(cb_segm->C - gamma))*n_e2;
}
INFO("CB %d: rp=%d, n_e=%d, i=%d\n", cb_idx[i], rp, n_e2, i);
if (srslte_rm_turbo_rx_lut(&e_bits[rp], softbuffer->buffer_f[cb_idx[i]], n_e2, cb_len_idx, rv)) {
fprintf(stderr, "Error in rate matching\n");
return SRSLTE_ERROR;
}
decoder_input[i] = softbuffer->buffer_f[cb_idx[i]];
}
}
}
// Run 1 iteration for up to TDEC_NPAR codeblocks
srslte_tdec_iteration_par(&q->decoder, decoder_input, cb_len);
if (i <= cb_segm->C - gamma - 1) {
n_e = Qm * (Gp/cb_segm->C);
} else {
n_e = Qm * ((uint32_t) ceilf((float) Gp/cb_segm->C));
}
q->nof_iterations = srslte_tdec_get_nof_iterations_cb(&q->decoder, 0);
// Decide output bits and compute CRC
for (int i=0;i<SRSLTE_TDEC_NPAR;i++) {
if (decoder_input[i]) {
srslte_tdec_decision_byte_par_cb(&q->decoder, q->cb_in, i, cb_len);
/* Rate Unmatching */
if (srslte_rm_turbo_rx_lut(&e_bits[rp], softbuffer->buffer_f[i], n_e, cblen_idx, rv)) {
fprintf(stderr, "Error in rate matching\n");
return SRSLTE_ERROR;
}
if (SRSLTE_VERBOSE_ISDEBUG()) {
char tmpstr[64];
snprintf(tmpstr,64,"rmout_%d.dat",i);
DEBUG("SAVED FILE %s: Encoded turbo code block %d\n", tmpstr, i);
srslte_vec_save_file(tmpstr, softbuffer->buffer_f[i], (3*cb_len+12)*sizeof(int16_t));
}
/* Turbo Decoding with CRC-based early stopping */
q->nof_iterations = 0;
uint32_t len_crc;
srslte_crc_t *crc_ptr;
early_stop = false;
srslte_tdec_reset(&q->decoder, cb_len);
do {
srslte_tdec_iteration(&q->decoder, softbuffer->buffer_f[i], cb_len);
q->nof_iterations++;
uint32_t len_crc;
srslte_crc_t *crc_ptr;
if (cb_segm->C > 1) {
len_crc = cb_len;
@ -416,65 +405,110 @@ static int decode_tb(srslte_sch_t *q,
crc_ptr = &q->crc_tb;
}
srslte_tdec_decision_byte(&q->decoder, q->cb_in, cb_len);
/* Check Codeblock CRC and stop early if correct */
// CRC is OK
if (!srslte_crc_checksum_byte(crc_ptr, q->cb_in, len_crc)) {
early_stop = true;
memcpy(&data[(cb_idx[i]*rlen)/8], q->cb_in, rlen/8 * sizeof(uint8_t));
// Reset number of iterations for that CB in the decoder
srslte_tdec_reset_cb(&q->decoder, i);
remaining_cb--;
decoder_input[i] = NULL;
cb_idx[i] = 0;
// CRC is error and exceeded maximum iterations for this CB.
// Early stop the whole transport block.
} else if (srslte_tdec_get_nof_iterations_cb(&q->decoder, i) >= q->max_iterations) {
INFO("CB %d: Error. CB is erroneous. remaining_cb=%d, i=%d, first_cb=%d, nof_cb=%d\n",
cb_idx[i], remaining_cb, i, first_cb, nof_cb);
return false;
}
} while (q->nof_iterations < q->max_iterations && !early_stop);
q->average_nof_iterations = SRSLTE_VEC_EMA((float) q->nof_iterations, q->average_nof_iterations, 0.2);
INFO("CB#%d: cb_len: %d, rlen: %d, wp: %d, rp: %d, E: %d, n_iters=%d\n", i,
cb_len, rlen, wp, rp, n_e, q->nof_iterations);
// If CB CRC is not correct, early_stop will be false and wont continue with rest of CBs
/* Copy data to another buffer, removing the Codeblock CRC */
if (i < cb_segm->C - 1) {
memcpy(&data[wp/8], q->cb_in, rlen/8 * sizeof(uint8_t));
} else {
/* Append Transport Block parity bits to the last CB */
memcpy(&data[wp/8], q->cb_in, (rlen - 24)/8 * sizeof(uint8_t));
memcpy(parity, &q->cb_in[(rlen - 24)/8], 3 * sizeof(uint8_t));
}
if (SRSLTE_VERBOSE_ISDEBUG()) {
early_stop = true;
}
/* Set read/write pointers */
wp += rlen;
rp += n_e;
}
}
return true;
}
/**
* Decode a transport block according to 36.212 5.3.2
*
* @param[in] q
* @param[inout] softbuffer Initialized softbuffer
* @param[in] cb_segm Code block segmentation parameters
* @param[in] e_bits Input transport block
* @param[in] Qm Modulation type
* @param[in] rv Redundancy Version. Indicates which part of FEC bits is in input buffer
* @param[out] softbuffer Initialized output softbuffer
* @param[out] data Decoded transport block
* @return negative if error in parameters or CRC error in decoding
*/
static int decode_tb(srslte_sch_t *q,
srslte_softbuffer_rx_t *softbuffer, srslte_cbsegm_t *cb_segm,
uint32_t Qm, uint32_t rv, uint32_t nof_e_bits,
int16_t *e_bits, uint8_t *data)
{
if (q != NULL &&
data != NULL &&
softbuffer != NULL &&
e_bits != NULL &&
cb_segm != NULL)
{
if (cb_segm->tbs == 0 || cb_segm->C == 0) {
return SRSLTE_SUCCESS;
}
if (!early_stop) {
INFO("CB %d failed. TB is erroneous.\n",i-1);
return SRSLTE_ERROR;
} else {
INFO("END CB#%d: wp: %d, rp: %d\n", i, wp, rp);
if (cb_segm->F) {
fprintf(stderr, "Error filler bits are not supported. Use standard TBS\n");
return SRSLTE_ERROR;
}
if (cb_segm->C > softbuffer->max_cb) {
fprintf(stderr, "Error number of CB (%d) exceeds soft buffer size (%d CBs)\n", cb_segm->C, softbuffer->max_cb);
return SRSLTE_ERROR;
}
bool crc_ok = true;
uint32_t nof_cb_groups = cb_segm->C2>0?2:1;
data[cb_segm->tbs/8+0] = 0;
data[cb_segm->tbs/8+1] = 0;
data[cb_segm->tbs/8+2] = 0;
// Process Codeblocks in groups of equal CB size to parallelize according to SRSLTE_TDEC_NPAR
for (uint32_t i=0;i<nof_cb_groups && crc_ok;i++) {
crc_ok = decode_tb_cb(q, softbuffer, cb_segm, Qm, rv, nof_e_bits, e_bits, data, i);
}
if (crc_ok) {
uint32_t par_rx = 0, par_tx = 0;
// Compute transport block CRC
par_rx = srslte_crc_checksum_byte(&q->crc_tb, data, cb_segm->tbs);
// check parity bits
par_tx = ((uint32_t) parity[0])<<16 | ((uint32_t) parity[1])<<8 | ((uint32_t) parity[2]);
par_tx = ((uint32_t) data[cb_segm->tbs/8+0])<<16 |
((uint32_t) data[cb_segm->tbs/8+1])<<8 |
((uint32_t) data[cb_segm->tbs/8+2]);
if (!par_rx) {
INFO("Warning: Received all-zero transport block\n\n", 0);
INFO("Warning: Received all-zero transport block\n\n",0);
}
if (par_rx == par_tx) {
INFO("TB decoded OK\n",i);
INFO("TB decoded OK\n",0);
return SRSLTE_SUCCESS;
} else {
INFO("Error in TB parity: par_tx=0x%x, par_rx=0x%x\n", par_tx, par_rx);
return SRSLTE_ERROR;
}
}
} else {
return SRSLTE_ERROR;
}
} else {
return SRSLTE_ERROR_INVALID_INPUTS;
}
@ -489,6 +523,16 @@ int srslte_dlsch_decode(srslte_sch_t *q, srslte_pdsch_cfg_t *cfg, srslte_softbuf
e_bits, data);
}
/**
* Encode transport block. Segments into code blocks, adds channel coding, and does rate matching.
*
* @param[in] q Initialized
* @param[in] cfg Encoding parameters
* @param[inout] softbuffer Initialized softbuffer
* @param[in] data Byte array of data. Size is implicit in cfg->cb_segm
* @param e_bits
* @return Error code
*/
int srslte_dlsch_encode(srslte_sch_t *q, srslte_pdsch_cfg_t *cfg, srslte_softbuffer_tx_t *softbuffer,
uint8_t *data, uint8_t *e_bits)
{

View File

@ -171,7 +171,7 @@ int main(int argc, char **argv) {
}
}
data = srslte_vec_malloc(sizeof(uint8_t) * grant.mcs.tbs/8);
data = srslte_vec_malloc(sizeof(uint8_t) * (grant.mcs.tbs/8)+24);
if (!data) {
perror("srslte_vec_malloc");
goto quit;

View File

@ -188,7 +188,7 @@ int main(int argc, char **argv) {
exit(-1);
}
data = srslte_vec_malloc(sizeof(uint8_t) * cfg.grant.mcs.tbs);
data = srslte_vec_malloc(sizeof(uint8_t) * (cfg.grant.mcs.tbs+24));
if (!data) {
perror("malloc");
exit(-1);
@ -202,10 +202,12 @@ int main(int argc, char **argv) {
fprintf(stderr, "Error initiating soft buffer\n");
goto quit;
}
srslte_softbuffer_tx_reset(&softbuffer_tx);
if (srslte_softbuffer_rx_init(&softbuffer_rx, 100)) {
fprintf(stderr, "Error initiating soft buffer\n");
goto quit;
}
srslte_softbuffer_rx_reset(&softbuffer_rx);
uint32_t ntrials = 100;

View File

@ -209,6 +209,15 @@ bitarray_copy(const unsigned char *src_org, int src_offset, int src_len,
}
}
/**
* Copy bits from src to dst, with offsets and length in bits
*
* @param[out] dst Output array
* @param[in] src Input array
* @param dst_offset Output array write offset in bits
* @param src_offset Input array read offset in bits
* @param nof_bits Number of bits to copy
*/
void srslte_bit_copy(uint8_t *dst, uint32_t dst_offset, uint8_t *src, uint32_t src_offset, uint32_t nof_bits)
{
static const uint8_t mask_dst[] =
@ -247,6 +256,13 @@ void srslte_bit_unpack_l(uint64_t value, uint8_t **bits, int nof_bits)
*bits += nof_bits;
}
/**
* Unpacks nof_bits from LSBs of value in MSB order to *bits. Advances pointer past unpacked bits.
*
* @param[in] value nof_bits lowest order bits will be unpacked in MSB order
* @param[in] nof_bits Number of bits to unpack
* @param[out] bits Points to buffer pointer. The buffer pointer will be advanced by nof_bits
*/
void srslte_bit_unpack(uint32_t value, uint8_t **bits, int nof_bits)
{
int i;

View File

@ -102,13 +102,13 @@ void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) {
z[i] = x[i]-y[i];
}
#else
srslte_vec_sub_fff_simd(x, y, z, len);
srslte_vec_sub_fff_sse(x, y, z, len);
#endif
}
void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) {
#ifdef LV_HAVE_AVX
srslte_vec_sub_sss_avx(x, y, z, len);
#ifdef LV_HAVE_AVX2
srslte_vec_sub_sss_avx2(x, y, z, len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_sub_sss_sse(x, y, z, len);
@ -134,13 +134,13 @@ void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len) {
z[i] = x[i]+y[i];
}
#else
srslte_vec_sum_fff_simd(x, y, z, len);
srslte_vec_sum_fff_sse(x, y, z, len);
#endif
}
void srslte_vec_sum_sss(short *x, short *y, short *z, uint32_t len) {
#ifdef LV_HAVE_AVX
srslte_vec_sum_sss_avx(x, y, z, len);
#ifdef LV_HAVE_AVX2
srslte_vec_sum_sss_avx2(x, y, z, len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_sum_sss_sse(x, y, z, len);
@ -199,7 +199,7 @@ void srslte_vec_sc_prod_fff(float *x, float h, float *z, uint32_t len) {
z[i] = x[i]*h;
}
#else
srslte_vec_sc_prod_fff_simd(x, h, z, len);
srslte_vec_sc_prod_fff_sse(x, h, z, len);
#endif
}
@ -211,8 +211,8 @@ void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len) {
}
void srslte_vec_sc_div2_sss(short *x, int n_rightshift, short *z, uint32_t len) {
#ifdef LV_HAVE_AVX
srslte_vec_sc_div2_sss_avx(x, n_rightshift, z, len);
#ifdef LV_HAVE_AVX2
srslte_vec_sc_div2_sss_avx2(x, n_rightshift, z, len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_sc_div2_sss_sse(x, n_rightshift, z, len);
@ -258,7 +258,7 @@ void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
z[i] = x[i]*h;
}
#else
srslte_vec_sc_prod_ccc_simd(x,h,z,len);
srslte_vec_sc_prod_ccc_sse(x,h,z,len);
#endif
}
@ -335,7 +335,7 @@ void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len) {
}
}
/* Note: We align memory to 32 bytes (for AVX compatibility)
/* Note: We align memory to 32 bytes (for AVX2 compatibility)
* because in some cases volk can incorrectly detect the architecture.
* This could be inefficient for SSE or non-SIMD platforms but shouldn't
* be a huge problem.
@ -354,7 +354,7 @@ void *srslte_vec_realloc(void *ptr, uint32_t old_size, uint32_t new_size) {
return realloc(ptr, new_size);
#else
void *new_ptr;
if (posix_memalign(&new_ptr,64,new_size)) {
if (posix_memalign(&new_ptr,256,new_size)) {
return NULL;
} else {
memcpy(new_ptr, ptr, old_size);
@ -501,8 +501,8 @@ void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len) {
// Scrambling Short
void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
#ifdef LV_HAVE_AVX
srslte_vec_prod_sss_avx(x,y,z,len);
#ifdef LV_HAVE_AVX2
srslte_vec_prod_sss_avx2(x,y,z,len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_prod_sss_sse(x,y,z,len);
@ -523,7 +523,7 @@ void srslte_vec_prod_ccc(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
z[i] = x[i]*y[i];
}
#else
srslte_vec_prod_ccc_simd(x,y,z,len);
srslte_vec_prod_ccc_sse(x,y,z,len);
#endif
}
@ -535,7 +535,7 @@ void srslte_vec_prod_conj_ccc(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
z[i] = x[i]*conjf(y[i]);
}
#else
srslte_vec_prod_conj_ccc_simd(x,y,z,len);
srslte_vec_prod_conj_ccc_sse(x,y,z,len);
#endif
}
@ -588,7 +588,7 @@ cf_t srslte_vec_dot_prod_ccc(cf_t *x, cf_t *y, uint32_t len) {
}
return res;
#else
return srslte_vec_dot_prod_ccc_simd(x, y, len);
return srslte_vec_dot_prod_ccc_sse(x, y, len);
#endif
}
@ -612,7 +612,7 @@ cf_t srslte_vec_dot_prod_conj_ccc(cf_t *x, cf_t *y, uint32_t len) {
}
return res;
#else
return srslte_vec_dot_prod_conj_ccc_simd(x, y, len);
return srslte_vec_dot_prod_conj_ccc_sse(x, y, len);
#endif
}
@ -628,8 +628,8 @@ float srslte_vec_dot_prod_fff(float *x, float *y, uint32_t len) {
}
int32_t srslte_vec_dot_prod_sss(int16_t *x, int16_t *y, uint32_t len) {
#ifdef LV_HAVE_AVX
return srslte_vec_dot_prod_sss_avx(x, y, len);
#ifdef LV_HAVE_AVX2
return srslte_vec_dot_prod_sss_avx2(x, y, len);
#else
#ifdef LV_HAVE_SSE
return srslte_vec_dot_prod_sss_sse(x, y, len);
@ -664,7 +664,7 @@ void srslte_vec_abs_square_cf(cf_t *x, float *abs_square, uint32_t len) {
abs_square[i] = crealf(x[i])*crealf(x[i])+cimagf(x[i])*cimagf(x[i]);
}
#else
srslte_vec_abs_square_cf_simd(x,abs_square,len);
srslte_vec_abs_square_cf_sse(x,abs_square,len);
#endif
}
@ -677,11 +677,17 @@ void srslte_vec_arg_cf(cf_t *x, float *arg, uint32_t len) {
}
uint32_t srslte_vec_max_fi(float *x, uint32_t len) {
#ifdef HAVE_VOLK_MAX_FUNCTION
uint16_t target=0;
// This is to solve an issue with incorrect type of 1st parameter in version 1.2 of volk
#ifdef HAVE_VOLK_MAX_FUNCTION_32
uint32_t target=0;
volk_32f_index_max_32u(&target,x,len);
return target;
#else
#ifdef HAVE_VOLK_MAX_FUNCTION_16
uint32_t target=0;
volk_32f_index_max_16u(&target,x,len);
return target;
#else
uint32_t i;
float m=-FLT_MAX;
@ -694,6 +700,7 @@ uint32_t srslte_vec_max_fi(float *x, uint32_t len) {
}
return p;
#endif
#endif
}
int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len) {
@ -732,11 +739,15 @@ void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) {
// CP autocorr
uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) {
#ifdef HAVE_VOLK_MAX_ABS_FUNCTION
uint16_t target=0;
#ifdef HAVE_VOLK_MAX_ABS_FUNCTION_32
uint32_t target=0;
volk_32fc_index_max_32u(&target,x,len);
return target;
#else
#ifdef HAVE_VOLK_MAX_ABS_FUNCTION_16
uint32_t target=0;
volk_32fc_index_max_16u(&target,x,len);
return target;
#else
uint32_t i;
float m=-FLT_MAX;
@ -751,6 +762,7 @@ uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) {
}
return p;
#endif
#endif
}
void srslte_vec_quant_fuc(float *in, uint8_t *out, float gain, float offset, float clip, uint32_t len) {

View File

@ -87,10 +87,10 @@ int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len)
}
int srslte_vec_dot_prod_sss_avx(short *x, short *y, uint32_t len)
int srslte_vec_dot_prod_sss_avx2(short *x, short *y, uint32_t len)
{
int result = 0;
#ifdef LV_HAVE_AVX
#ifdef LV_HAVE_AVX2
unsigned int number = 0;
const unsigned int points = len / 16;
@ -110,7 +110,7 @@ int srslte_vec_dot_prod_sss_avx(short *x, short *y, uint32_t len)
yPtr ++;
}
short dotProdVector[16];
__attribute__ ((aligned (256))) short dotProdVector[16];
_mm256_store_si256((__m256i*) dotProdVector, dotProdVal);
for (int i=0;i<16;i++) {
result += dotProdVector[i];
@ -160,9 +160,9 @@ void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len)
}
void srslte_vec_sum_sss_avx(short *x, short *y, short *z, uint32_t len)
void srslte_vec_sum_sss_avx2(short *x, short *y, short *z, uint32_t len)
{
#ifdef LV_HAVE_SSE
#ifdef LV_HAVE_AVX2
unsigned int number = 0;
const unsigned int points = len / 16;
@ -225,9 +225,9 @@ void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len)
#endif
}
void srslte_vec_sub_sss_avx(short *x, short *y, short *z, uint32_t len)
void srslte_vec_sub_sss_avx2(short *x, short *y, short *z, uint32_t len)
{
#ifdef LV_HAVE_AVX
#ifdef LV_HAVE_AVX2
unsigned int number = 0;
const unsigned int points = len / 16;
@ -292,9 +292,9 @@ void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len)
#endif
}
void srslte_vec_prod_sss_avx(short *x, short *y, short *z, uint32_t len)
void srslte_vec_prod_sss_avx2(short *x, short *y, short *z, uint32_t len)
{
#ifdef LV_HAVE_SSE
#ifdef LV_HAVE_AVX2
unsigned int number = 0;
const unsigned int points = len / 16;
@ -359,9 +359,9 @@ void srslte_vec_sc_div2_sss_sse(short *x, int k, short *z, uint32_t len)
#endif
}
void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len)
void srslte_vec_sc_div2_sss_avx2(short *x, int k, short *z, uint32_t len)
{
#ifdef LV_HAVE_AVX
#ifdef LV_HAVE_AVX2
unsigned int number = 0;
const unsigned int points = len / 16;
@ -394,7 +394,11 @@ void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len)
/* No improvement with AVX */
void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len)
{
#ifndef DEBUG_MODE
#ifdef DEBUG_MODE
for (int i=0;i<len;i++) {
y[lut[i]] = x[i];
}
#else
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int points = len / 8;
@ -466,7 +470,7 @@ void srslte_vec_convert_fi_sse(float *x, int16_t *z, float scale, uint32_t len)
// for enb no-volk
void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len) {
void srslte_vec_sum_fff_sse(float *x, float *y, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int points = len / 4;
@ -497,7 +501,7 @@ void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len) {
#endif
}
void srslte_vec_sub_fff_simd(float *x, float *y, float *z, uint32_t len) {
void srslte_vec_sub_fff_sse(float *x, float *y, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int points = len / 4;
@ -550,7 +554,7 @@ static inline __m128 _mm_complexmulconj_ps(__m128 x, __m128 y) {
}
#endif
cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len)
cf_t srslte_vec_dot_prod_ccc_sse(cf_t *x, cf_t *y, uint32_t len)
{
cf_t result = 0;
#ifdef LV_HAVE_SSE
@ -591,8 +595,7 @@ cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len)
return result;
}
cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len)
cf_t srslte_vec_dot_prod_conj_ccc_sse(cf_t *x, cf_t *y, uint32_t len)
{
cf_t result = 0;
#ifdef LV_HAVE_SSE
@ -632,7 +635,8 @@ cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len)
#endif
return result;
}
void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len)
void srslte_vec_prod_ccc_sse(cf_t *x,cf_t *y, cf_t *z, uint32_t len)
{
#ifdef LV_HAVE_SSE
unsigned int number = 0;
@ -662,7 +666,7 @@ void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len)
}
void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
void srslte_vec_prod_conj_ccc_sse(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int halfPoints = len / 2;
@ -690,7 +694,7 @@ void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
#endif
}
void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
void srslte_vec_sc_prod_ccc_sse(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int halfPoints = len / 2;
@ -724,7 +728,7 @@ void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
}
void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len) {
void srslte_vec_sc_prod_cfc_sse(cf_t *x, float h, cf_t *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int halfPoints = len / 2;
@ -756,7 +760,7 @@ void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len) {
void srslte_vec_sc_prod_fff_simd(float *x, float h, float *z, uint32_t len) {
void srslte_vec_sc_prod_fff_sse(float *x, float h, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int quarterPoints = len / 4;
@ -786,8 +790,7 @@ void srslte_vec_sc_prod_fff_simd(float *x, float h, float *z, uint32_t len) {
#endif
}
void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, uint32_t len) {
void srslte_vec_abs_square_cf_sse(cf_t *x, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int quarterPoints = len / 4;

Binary file not shown.

Before

Width:  |  Height:  |  Size: 435 KiB

View File

@ -28,25 +28,22 @@ qci_config = (
{
qci=9;
pdcp_config = {
discard_timer = -1;
status_report_required = false;
discard_timer = 100;
pdcp_sn_size = 12;
}
rlc_config = {
ul_am = {
t_poll_retx = 200;
poll_pdu = 16;
poll_byte = -1;
max_retx_thresh = 8;
ul_um = {
sn_field_length = 10;
};
dl_am = {
t_reordering = 80;
t_status_prohibit = 35;
dl_um = {
sn_field_length = 10;
t_reordering = 80;
};
};
logical_channel_config = {
priority = 3;
prioritized_bit_rate = 8;
bucket_size_duration = 50;
priority = 11;
prioritized_bit_rate = -1;
bucket_size_duration = 100;
log_chan_group = 3;
};
}

View File

@ -143,7 +143,7 @@ nof_ctrl_symbols = 2
#pregenerate_signals = false
#tx_amplitude = 0.8
#link_failure_nof_err = 50
#rrc_inactivity_timer = 5000
#rrc_inactivity_timer = 30000
#max_prach_offset_us = 30
#####################################################################

View File

@ -176,7 +176,7 @@ void parse_args(all_args_t *args, int argc, char* argv[]) {
"Chooses the coefficients for the 3-tap channel estimator centered filter.")
("expert.rrc_inactivity_timer",
bpo::value<uint32_t>(&args->expert.rrc_inactivity_timer)->default_value(5000),
bpo::value<uint32_t>(&args->expert.rrc_inactivity_timer)->default_value(30000),
"Inactivity timer in ms")

View File

@ -995,6 +995,7 @@ bool rrc::ue::release_erabs()
void rrc::ue::notify_s1ap_ue_ctxt_setup_complete()
{
LIBLTE_S1AP_MESSAGE_INITIALCONTEXTSETUPRESPONSE_STRUCT res;
res.ext = false;
res.E_RABSetupListCtxtSURes.len = 0;
res.E_RABFailedToSetupListCtxtSURes.len = 0;

View File

@ -46,7 +46,7 @@ endif (RPATH)
########################################################################
if (NOT ${BUILDUE_CMD} STREQUAL "")
message(STATUS "Added custom post-build-UE command: ${BUILDUE_CMD}")
add_custom_command(TARGET ue POST_BUILD COMMAND ${BUILDUE_CMD})
add_custom_command(TARGET srsue POST_BUILD COMMAND ${BUILDUE_CMD})
else(NOT ${BUILDUE_CMD} STREQUAL "")
message(STATUS "No post-build-UE command defined")
endif (NOT ${BUILDUE_CMD} STREQUAL "")

View File

@ -49,284 +49,289 @@ namespace bpo = boost::program_options;
***********************************************************************/
string config_file;
void parse_args(all_args_t *args, int argc, char* argv[]) {
void parse_args(all_args_t *args, int argc, char *argv[]) {
// Command line only options
bpo::options_description general("General options");
general.add_options()
("help,h", "Produce help message")
("version,v", "Print version information and exit")
;
// Command line only options
bpo::options_description general("General options");
// Command line or config file options
bpo::options_description common("Configuration options");
common.add_options()
("rf.dl_freq", bpo::value<float>(&args->rf.dl_freq)->default_value(2680000000), "Downlink centre frequency")
("rf.ul_freq", bpo::value<float>(&args->rf.ul_freq)->default_value(2560000000), "Uplink centre frequency")
("rf.rx_gain", bpo::value<float>(&args->rf.rx_gain)->default_value(-1), "Front-end receiver gain")
("rf.tx_gain", bpo::value<float>(&args->rf.tx_gain)->default_value(-1), "Front-end transmitter gain")
("rf.nof_rx_ant", bpo::value<uint32_t>(&args->rf.nof_rx_ant)->default_value(1), "Number of RX antennas")
general.add_options()
("help,h", "Produce help message")
("version,v", "Print version information and exit");
("rf.device_name", bpo::value<string>(&args->rf.device_name)->default_value("auto"), "Front-end device name")
("rf.device_args", bpo::value<string>(&args->rf.device_args)->default_value("auto"), "Front-end device arguments")
("rf.time_adv_nsamples", bpo::value<string>(&args->rf.time_adv_nsamples)->default_value("auto"), "Transmission time advance")
("rf.burst_preamble_us", bpo::value<string>(&args->rf.burst_preamble)->default_value("auto"), "Transmission time advance")
// Command line or config file options
bpo::options_description common("Configuration options");
common.add_options()
("rf.dl_freq", bpo::value<float>(&args->rf.dl_freq)->default_value(2680000000), "Downlink centre frequency")
("rf.ul_freq", bpo::value<float>(&args->rf.ul_freq)->default_value(2560000000), "Uplink centre frequency")
("rf.rx_gain", bpo::value<float>(&args->rf.rx_gain)->default_value(-1), "Front-end receiver gain")
("rf.tx_gain", bpo::value<float>(&args->rf.tx_gain)->default_value(-1), "Front-end transmitter gain")
("rf.nof_rx_ant", bpo::value<uint32_t>(&args->rf.nof_rx_ant)->default_value(1), "Number of RX antennas")
("pcap.enable", bpo::value<bool>(&args->pcap.enable)->default_value(false), "Enable MAC packet captures for wireshark")
("pcap.filename", bpo::value<string>(&args->pcap.filename)->default_value("ue.pcap"), "MAC layer capture filename")
("rf.device_name", bpo::value<string>(&args->rf.device_name)->default_value("auto"), "Front-end device name")
("rf.device_args", bpo::value<string>(&args->rf.device_args)->default_value("auto"), "Front-end device arguments")
("rf.time_adv_nsamples", bpo::value<string>(&args->rf.time_adv_nsamples)->default_value("auto"),
"Transmission time advance")
("rf.burst_preamble_us", bpo::value<string>(&args->rf.burst_preamble)->default_value("auto"),
"Transmission time advance")
("trace.enable", bpo::value<bool>(&args->trace.enable)->default_value(false), "Enable PHY and radio timing traces")
("trace.phy_filename",bpo::value<string>(&args->trace.phy_filename)->default_value("ue.phy_trace"), "PHY timing traces filename")
("trace.radio_filename",bpo::value<string>(&args->trace.radio_filename)->default_value("ue.radio_trace"), "Radio timing traces filename")
("pcap.enable", bpo::value<bool>(&args->pcap.enable)->default_value(false),
"Enable MAC packet captures for wireshark")
("pcap.filename", bpo::value<string>(&args->pcap.filename)->default_value("ue.pcap"), "MAC layer capture filename")
("gui.enable", bpo::value<bool>(&args->gui.enable)->default_value(false), "Enable GUI plots")
("log.phy_level", bpo::value<string>(&args->log.phy_level), "PHY log level")
("log.phy_hex_limit", bpo::value<int>(&args->log.phy_hex_limit), "PHY log hex dump limit")
("log.mac_level", bpo::value<string>(&args->log.mac_level), "MAC log level")
("log.mac_hex_limit", bpo::value<int>(&args->log.mac_hex_limit), "MAC log hex dump limit")
("log.rlc_level", bpo::value<string>(&args->log.rlc_level), "RLC log level")
("log.rlc_hex_limit", bpo::value<int>(&args->log.rlc_hex_limit), "RLC log hex dump limit")
("log.pdcp_level", bpo::value<string>(&args->log.pdcp_level), "PDCP log level")
("log.pdcp_hex_limit",bpo::value<int>(&args->log.pdcp_hex_limit), "PDCP log hex dump limit")
("log.rrc_level", bpo::value<string>(&args->log.rrc_level), "RRC log level")
("log.rrc_hex_limit", bpo::value<int>(&args->log.rrc_hex_limit), "RRC log hex dump limit")
("log.gw_level", bpo::value<string>(&args->log.gw_level), "GW log level")
("log.gw_hex_limit", bpo::value<int>(&args->log.gw_hex_limit), "GW log hex dump limit")
("log.nas_level", bpo::value<string>(&args->log.nas_level), "NAS log level")
("log.nas_hex_limit", bpo::value<int>(&args->log.nas_hex_limit), "NAS log hex dump limit")
("log.usim_level", bpo::value<string>(&args->log.usim_level), "USIM log level")
("log.usim_hex_limit",bpo::value<int>(&args->log.usim_hex_limit), "USIM log hex dump limit")
("trace.enable", bpo::value<bool>(&args->trace.enable)->default_value(false), "Enable PHY and radio timing traces")
("trace.phy_filename", bpo::value<string>(&args->trace.phy_filename)->default_value("ue.phy_trace"),
"PHY timing traces filename")
("trace.radio_filename", bpo::value<string>(&args->trace.radio_filename)->default_value("ue.radio_trace"),
"Radio timing traces filename")
("log.all_level", bpo::value<string>(&args->log.all_level)->default_value("info"), "ALL log level")
("log.all_hex_limit", bpo::value<int>(&args->log.all_hex_limit)->default_value(32), "ALL log hex dump limit")
("gui.enable", bpo::value<bool>(&args->gui.enable)->default_value(false), "Enable GUI plots")
("log.filename", bpo::value<string>(&args->log.filename)->default_value("/tmp/ue.log"),"Log filename")
("log.phy_level", bpo::value<string>(&args->log.phy_level), "PHY log level")
("log.phy_hex_limit", bpo::value<int>(&args->log.phy_hex_limit), "PHY log hex dump limit")
("log.mac_level", bpo::value<string>(&args->log.mac_level), "MAC log level")
("log.mac_hex_limit", bpo::value<int>(&args->log.mac_hex_limit), "MAC log hex dump limit")
("log.rlc_level", bpo::value<string>(&args->log.rlc_level), "RLC log level")
("log.rlc_hex_limit", bpo::value<int>(&args->log.rlc_hex_limit), "RLC log hex dump limit")
("log.pdcp_level", bpo::value<string>(&args->log.pdcp_level), "PDCP log level")
("log.pdcp_hex_limit", bpo::value<int>(&args->log.pdcp_hex_limit), "PDCP log hex dump limit")
("log.rrc_level", bpo::value<string>(&args->log.rrc_level), "RRC log level")
("log.rrc_hex_limit", bpo::value<int>(&args->log.rrc_hex_limit), "RRC log hex dump limit")
("log.gw_level", bpo::value<string>(&args->log.gw_level), "GW log level")
("log.gw_hex_limit", bpo::value<int>(&args->log.gw_hex_limit), "GW log hex dump limit")
("log.nas_level", bpo::value<string>(&args->log.nas_level), "NAS log level")
("log.nas_hex_limit", bpo::value<int>(&args->log.nas_hex_limit), "NAS log hex dump limit")
("log.usim_level", bpo::value<string>(&args->log.usim_level), "USIM log level")
("log.usim_hex_limit", bpo::value<int>(&args->log.usim_hex_limit), "USIM log hex dump limit")
("usim.algo", bpo::value<string>(&args->usim.algo), "USIM authentication algorithm")
("usim.op", bpo::value<string>(&args->usim.op), "USIM operator variant")
("usim.amf", bpo::value<string>(&args->usim.amf), "USIM authentication management field")
("usim.imsi", bpo::value<string>(&args->usim.imsi), "USIM IMSI")
("usim.imei", bpo::value<string>(&args->usim.imei), "USIM IMEI")
("usim.k", bpo::value<string>(&args->usim.k), "USIM K")
/* Expert section */
("expert.phy.worker_cpu_mask",
bpo::value<int>(&args->expert.phy.worker_cpu_mask)->default_value(-1),
"cpu bit mask (eg 255 = 1111 1111)")
("expert.phy.sync_cpu_affinity",
bpo::value<int>(&args->expert.phy.sync_cpu_affinity)->default_value(-1),
"index of the core used by the sync thread")
("expert.ue_category",
bpo::value<int>(&args->expert.ue_cateogry)->default_value(4),
"UE Category (1 to 5)")
("log.all_level", bpo::value<string>(&args->log.all_level)->default_value("info"), "ALL log level")
("log.all_hex_limit", bpo::value<int>(&args->log.all_hex_limit)->default_value(32), "ALL log hex dump limit")
("expert.metrics_period_secs",
bpo::value<float>(&args->expert.metrics_period_secs)->default_value(1.0),
"Periodicity for metrics in seconds")
("log.filename", bpo::value<string>(&args->log.filename)->default_value("/tmp/ue.log"), "Log filename")
("expert.pregenerate_signals",
bpo::value<bool>(&args->expert.pregenerate_signals)->default_value(false),
"Pregenerate uplink signals after attach. Improves CPU performance.")
("expert.rssi_sensor_enabled",
bpo::value<bool>(&args->expert.phy.rssi_sensor_enabled)->default_value(true),
"Enable or disable RF frontend RSSI sensor. In some USRP devices can cause segmentation fault")
("expert.prach_gain",
bpo::value<float>(&args->expert.phy.prach_gain)->default_value(-1.0),
"Disable PRACH power control")
("expert.cqi_max",
bpo::value<int>(&args->expert.phy.cqi_max)->default_value(15),
"Upper bound on the maximum CQI to be reported. Default 15.")
("expert.cqi_fixed",
bpo::value<int>(&args->expert.phy.cqi_fixed)->default_value(-1),
"Fixes the reported CQI to a constant value. Default disabled.")
("expert.snr_ema_coeff",
bpo::value<float>(&args->expert.phy.snr_ema_coeff)->default_value(0.1),
"Sets the SNR exponential moving average coefficient (Default 0.1)")
("expert.snr_estim_alg",
bpo::value<string>(&args->expert.phy.snr_estim_alg)->default_value("refs"),
"Sets the noise estimation algorithm. (Default refs)")
("expert.pdsch_max_its",
bpo::value<int>(&args->expert.phy.pdsch_max_its)->default_value(4),
"Maximum number of turbo decoder iterations")
("usim.algo", bpo::value<string>(&args->usim.algo), "USIM authentication algorithm")
("usim.op", bpo::value<string>(&args->usim.op), "USIM operator variant")
("usim.amf", bpo::value<string>(&args->usim.amf), "USIM authentication management field")
("usim.imsi", bpo::value<string>(&args->usim.imsi), "USIM IMSI")
("usim.imei", bpo::value<string>(&args->usim.imei), "USIM IMEI")
("usim.k", bpo::value<string>(&args->usim.k), "USIM K")
("expert.attach_enable_64qam",
bpo::value<bool>(&args->expert.phy.attach_enable_64qam)->default_value(false),
"PUSCH 64QAM modulation before attachment")
("expert.nof_phy_threads",
bpo::value<int>(&args->expert.phy.nof_phy_threads)->default_value(2),
"Number of PHY threads")
("expert.equalizer_mode",
bpo::value<string>(&args->expert.phy.equalizer_mode)->default_value("mmse"),
"Equalizer mode")
("expert.cfo_integer_enabled",
bpo::value<bool>(&args->expert.phy.cfo_integer_enabled)->default_value(false),
"Enables integer CFO estimation and correction.")
("expert.cfo_correct_tol_hz",
bpo::value<float>(&args->expert.phy.cfo_correct_tol_hz)->default_value(50.0),
"Tolerance (in Hz) for digial CFO compensation.")
("expert.time_correct_period",
bpo::value<int>(&args->expert.phy.time_correct_period)->default_value(5),
"Period for sampling time offset correction.")
("expert.sfo_correct_disable",
bpo::value<bool>(&args->expert.phy.sfo_correct_disable)->default_value(false),
"Disables phase correction before channel estimation.")
("expert.sss_algorithm",
bpo::value<string>(&args->expert.phy.sss_algorithm)->default_value("full"),
"Selects the SSS estimation algorithm.")
("expert.estimator_fil_w",
bpo::value<float>(&args->expert.phy.estimator_fil_w)->default_value(0.1),
"Chooses the coefficients for the 3-tap channel estimator centered filter.")
("rf_calibration.tx_corr_dc_gain", bpo::value<float>(&args->rf_cal.tx_corr_dc_gain)->default_value(0.0), "TX DC offset gain correction")
("rf_calibration.tx_corr_dc_phase", bpo::value<float>(&args->rf_cal.tx_corr_dc_phase)->default_value(0.0), "TX DC offset phase correction")
("rf_calibration.tx_corr_iq_i", bpo::value<float>(&args->rf_cal.tx_corr_iq_i)->default_value(0.0), "TX IQ imbalance inphase correction")
("rf_calibration.tx_corr_iq_q", bpo::value<float>(&args->rf_cal.tx_corr_iq_q)->default_value(0.0), "TX IQ imbalance quadrature correction")
;
// Positional options - config file location
bpo::options_description position("Positional options");
position.add_options()
("config_file", bpo::value< string >(&config_file), "UE configuration file")
;
bpo::positional_options_description p;
p.add("config_file", -1);
/* Expert section */
("expert.phy.worker_cpu_mask",
bpo::value<int>(&args->expert.phy.worker_cpu_mask)->default_value(-1),
"cpu bit mask (eg 255 = 1111 1111)")
// these options are allowed on the command line
bpo::options_description cmdline_options;
cmdline_options.add(common).add(position).add(general);
("expert.phy.sync_cpu_affinity",
bpo::value<int>(&args->expert.phy.sync_cpu_affinity)->default_value(-1),
"index of the core used by the sync thread")
// parse the command line and store result in vm
bpo::variables_map vm;
bpo::store(bpo::command_line_parser(argc, argv).options(cmdline_options).positional(p).run(), vm);
("expert.ue_category",
bpo::value<int>(&args->expert.ue_cateogry)->default_value(4),
"UE Category (1 to 5)")
("expert.metrics_period_secs",
bpo::value<float>(&args->expert.metrics_period_secs)->default_value(1.0),
"Periodicity for metrics in seconds")
("expert.pregenerate_signals",
bpo::value<bool>(&args->expert.pregenerate_signals)->default_value(false),
"Pregenerate uplink signals after attach. Improves CPU performance.")
("expert.rssi_sensor_enabled",
bpo::value<bool>(&args->expert.phy.rssi_sensor_enabled)->default_value(true),
"Enable or disable RF frontend RSSI sensor. In some USRP devices can cause segmentation fault")
("expert.prach_gain",
bpo::value<float>(&args->expert.phy.prach_gain)->default_value(-1.0),
"Disable PRACH power control")
("expert.cqi_max",
bpo::value<int>(&args->expert.phy.cqi_max)->default_value(15),
"Upper bound on the maximum CQI to be reported. Default 15.")
("expert.cqi_fixed",
bpo::value<int>(&args->expert.phy.cqi_fixed)->default_value(-1),
"Fixes the reported CQI to a constant value. Default disabled.")
("expert.snr_ema_coeff",
bpo::value<float>(&args->expert.phy.snr_ema_coeff)->default_value(0.1),
"Sets the SNR exponential moving average coefficient (Default 0.1)")
("expert.snr_estim_alg",
bpo::value<string>(&args->expert.phy.snr_estim_alg)->default_value("refs"),
"Sets the noise estimation algorithm. (Default refs)")
("expert.pdsch_max_its",
bpo::value<int>(&args->expert.phy.pdsch_max_its)->default_value(4),
"Maximum number of turbo decoder iterations")
("expert.attach_enable_64qam",
bpo::value<bool>(&args->expert.phy.attach_enable_64qam)->default_value(false),
"PUSCH 64QAM modulation before attachment")
("expert.nof_phy_threads",
bpo::value<int>(&args->expert.phy.nof_phy_threads)->default_value(2),
"Number of PHY threads")
("expert.equalizer_mode",
bpo::value<string>(&args->expert.phy.equalizer_mode)->default_value("mmse"),
"Equalizer mode")
("expert.cfo_integer_enabled",
bpo::value<bool>(&args->expert.phy.cfo_integer_enabled)->default_value(false),
"Enables integer CFO estimation and correction.")
("expert.cfo_correct_tol_hz",
bpo::value<float>(&args->expert.phy.cfo_correct_tol_hz)->default_value(50.0),
"Tolerance (in Hz) for digial CFO compensation.")
("expert.time_correct_period",
bpo::value<int>(&args->expert.phy.time_correct_period)->default_value(5),
"Period for sampling time offset correction.")
("expert.sfo_correct_disable",
bpo::value<bool>(&args->expert.phy.sfo_correct_disable)->default_value(false),
"Disables phase correction before channel estimation.")
("expert.sss_algorithm",
bpo::value<string>(&args->expert.phy.sss_algorithm)->default_value("full"),
"Selects the SSS estimation algorithm.")
("expert.estimator_fil_w",
bpo::value<float>(&args->expert.phy.estimator_fil_w)->default_value(0.1),
"Chooses the coefficients for the 3-tap channel estimator centered filter.")
("rf_calibration.tx_corr_dc_gain", bpo::value<float>(&args->rf_cal.tx_corr_dc_gain)->default_value(0.0),
"TX DC offset gain correction")
("rf_calibration.tx_corr_dc_phase", bpo::value<float>(&args->rf_cal.tx_corr_dc_phase)->default_value(0.0),
"TX DC offset phase correction")
("rf_calibration.tx_corr_iq_i", bpo::value<float>(&args->rf_cal.tx_corr_iq_i)->default_value(0.0),
"TX IQ imbalance inphase correction")
("rf_calibration.tx_corr_iq_q", bpo::value<float>(&args->rf_cal.tx_corr_iq_q)->default_value(0.0),
"TX IQ imbalance quadrature correction");
// Positional options - config file location
bpo::options_description position("Positional options");
position.add_options()
("config_file", bpo::value<string>(&config_file), "UE configuration file");
bpo::positional_options_description p;
p.add("config_file", -1);
// these options are allowed on the command line
bpo::options_description cmdline_options;
cmdline_options.add(common).add(position).add(general);
// parse the command line and store result in vm
bpo::variables_map vm;
bpo::store(bpo::command_line_parser(argc, argv).options(cmdline_options).positional(p).run(), vm);
bpo::notify(vm);
// help option was given - print usage and exit
if (vm.count("help")) {
cout << "Usage: " << argv[0] << " [OPTIONS] config_file" << endl << endl;
cout << common << endl << general << endl;
exit(0);
}
// print version number and exit
if (vm.count("version")) {
cout << "Version " <<
srslte_get_version_major() << "." <<
srslte_get_version_minor() << "." <<
srslte_get_version_patch() << endl;
exit(0);
}
// no config file given - print usage and exit
if (!vm.count("config_file")) {
cout << "Error: Configuration file not provided" << endl;
cout << "Usage: " << argv[0] << " [OPTIONS] config_file" << endl << endl;
exit(0);
} else {
cout << "Reading configuration file " << config_file << "..." << endl;
ifstream conf(config_file.c_str(), ios::in);
if (conf.fail()) {
cout << "Failed to read configuration file " << config_file << " - exiting" << endl;
exit(1);
}
bpo::store(bpo::parse_config_file(conf, common), vm);
bpo::notify(vm);
}
// help option was given - print usage and exit
if (vm.count("help")) {
cout << "Usage: " << argv[0] << " [OPTIONS] config_file" << endl << endl;
cout << common << endl << general << endl;
exit(0);
// Apply all_level to any unset layers
if (vm.count("log.all_level")) {
if (!vm.count("log.phy_level")) {
args->log.phy_level = args->log.all_level;
}
if (!vm.count("log.mac_level")) {
args->log.mac_level = args->log.all_level;
}
if (!vm.count("log.rlc_level")) {
args->log.rlc_level = args->log.all_level;
}
if (!vm.count("log.pdcp_level")) {
args->log.pdcp_level = args->log.all_level;
}
if (!vm.count("log.rrc_level")) {
args->log.rrc_level = args->log.all_level;
}
if (!vm.count("log.nas_level")) {
args->log.nas_level = args->log.all_level;
}
if (!vm.count("log.gw_level")) {
args->log.gw_level = args->log.all_level;
}
if (!vm.count("log.usim_level")) {
args->log.usim_level = args->log.all_level;
}
}
// print version number and exit
if (vm.count("version")) {
cout << "Version " <<
srslte_get_version_major() << "." <<
srslte_get_version_minor() << "." <<
srslte_get_version_patch() << endl;
exit(0);
}
// no config file given - print usage and exit
if (!vm.count("config_file")) {
cout << "Error: Configuration file not provided" << endl;
cout << "Usage: " << argv[0] << " [OPTIONS] config_file" << endl << endl;
exit(0);
} else {
cout << "Reading configuration file " << config_file << "..." << endl;
ifstream conf(config_file.c_str(), ios::in);
if(conf.fail()) {
cout << "Failed to read configuration file " << config_file << " - exiting" << endl;
exit(1);
}
bpo::store(bpo::parse_config_file(conf, common), vm);
bpo::notify(vm);
// Apply all_hex_limit to any unset layers
if (vm.count("log.all_hex_limit")) {
if (!vm.count("log.phy_hex_limit")) {
args->log.phy_hex_limit = args->log.all_hex_limit;
}
// Apply all_level to any unset layers
if (vm.count("log.all_level")) {
if(!vm.count("log.phy_level")) {
args->log.phy_level = args->log.all_level;
}
if(!vm.count("log.mac_level")) {
args->log.mac_level = args->log.all_level;
}
if(!vm.count("log.rlc_level")) {
args->log.rlc_level = args->log.all_level;
}
if(!vm.count("log.pdcp_level")) {
args->log.pdcp_level = args->log.all_level;
}
if(!vm.count("log.rrc_level")) {
args->log.rrc_level = args->log.all_level;
}
if(!vm.count("log.nas_level")) {
args->log.nas_level = args->log.all_level;
}
if(!vm.count("log.gw_level")) {
args->log.gw_level = args->log.all_level;
}
if(!vm.count("log.usim_level")) {
args->log.usim_level = args->log.all_level;
}
if (!vm.count("log.mac_hex_limit")) {
args->log.mac_hex_limit = args->log.all_hex_limit;
}
// Apply all_hex_limit to any unset layers
if (vm.count("log.all_hex_limit")) {
if(!vm.count("log.phy_hex_limit")) {
args->log.phy_hex_limit = args->log.all_hex_limit;
}
if(!vm.count("log.mac_hex_limit")) {
args->log.mac_hex_limit = args->log.all_hex_limit;
}
if(!vm.count("log.rlc_hex_limit")) {
args->log.rlc_hex_limit = args->log.all_hex_limit;
}
if(!vm.count("log.pdcp_hex_limit")) {
args->log.pdcp_hex_limit = args->log.all_hex_limit;
}
if(!vm.count("log.rrc_hex_limit")) {
args->log.rrc_hex_limit = args->log.all_hex_limit;
}
if(!vm.count("log.nas_hex_limit")) {
args->log.nas_hex_limit = args->log.all_hex_limit;
}
if(!vm.count("log.gw_hex_limit")) {
args->log.gw_hex_limit = args->log.all_hex_limit;
}
if(!vm.count("log.usim_hex_limit")) {
args->log.usim_hex_limit = args->log.all_hex_limit;
}
if (!vm.count("log.rlc_hex_limit")) {
args->log.rlc_hex_limit = args->log.all_hex_limit;
}
if (!vm.count("log.pdcp_hex_limit")) {
args->log.pdcp_hex_limit = args->log.all_hex_limit;
}
if (!vm.count("log.rrc_hex_limit")) {
args->log.rrc_hex_limit = args->log.all_hex_limit;
}
if (!vm.count("log.nas_hex_limit")) {
args->log.nas_hex_limit = args->log.all_hex_limit;
}
if (!vm.count("log.gw_hex_limit")) {
args->log.gw_hex_limit = args->log.all_hex_limit;
}
if (!vm.count("log.usim_hex_limit")) {
args->log.usim_hex_limit = args->log.all_hex_limit;
}
}
}
static bool running = true;
static bool running = true;
static bool do_metrics = false;
void sig_int_handler(int signo)
{
void sig_int_handler(int signo) {
running = false;
}
void *input_loop(void *m)
{
metrics_stdout *metrics = (metrics_stdout*)m;
void *input_loop(void *m) {
metrics_stdout *metrics = (metrics_stdout *) m;
char key;
while(running) {
while (running) {
cin >> key;
if('t' == key) {
if ('t' == key) {
do_metrics = !do_metrics;
if(do_metrics) {
if (do_metrics) {
cout << "Enter t to stop trace." << endl;
} else {
cout << "Enter t to restart trace." << endl;
@ -337,17 +342,16 @@ void *input_loop(void *m)
return NULL;
}
int main(int argc, char *argv[])
{
int main(int argc, char *argv[]) {
signal(SIGINT, sig_int_handler);
all_args_t args;
all_args_t args;
metrics_stdout metrics;
ue *ue = ue::get_instance();
ue *ue = ue::get_instance();
cout << "--- Software Radio Systems LTE UE ---" << endl << endl;
parse_args(&args, argc, argv);
if(!ue->init(&args)) {
if (!ue->init(&args)) {
exit(1);
}
metrics.init(ue, args.expert.metrics_period_secs);
@ -355,17 +359,17 @@ int main(int argc, char *argv[])
pthread_t input;
pthread_create(&input, NULL, &input_loop, &metrics);
bool plot_started = false;
bool signals_pregenerated = false;
while(running) {
bool plot_started = false;
bool signals_pregenerated = false;
while (running) {
if (ue->is_attached()) {
if (!signals_pregenerated && args.expert.pregenerate_signals) {
ue->pregenerate_signals(true);
signals_pregenerated = true;
signals_pregenerated = true;
}
if (!plot_started && args.gui.enable) {
ue->start_plot();
plot_started = true;
plot_started = true;
}
}
sleep(1);