From 621a49eb6999485599cba0fe02f74c23cc55f07f Mon Sep 17 00:00:00 2001 From: Eric Wild Date: Thu, 12 Jan 2023 16:21:58 +0100 Subject: [PATCH] ms: adjust float<->integral type conversion Given integral type A and non integral type B and depending on rounding mode, optimization, compiler, and phase of the moon A(A)*B != A(A*B) so split the two cases. While at it, also make the template automagically work for complex types instead of requiring manual casts, the general idea here is to allow inlining and vectorization by treating all args as plain arrays, which is fine. This works as expected with -tune=native, x64 implies sse2, and we do not target any neon-less arm versions either. Clang only array length hints can improve this even more. Change-Id: I93f077f967daf2ed382d12cc20a54846b3688634 --- Transceiver52M/Complex.h | 2 +- Transceiver52M/ms/ms.cpp | 4 +- Transceiver52M/ms/ms.h | 53 +++++++++++++++++++++++--- Transceiver52M/ms/ms_rx_burst_test.cpp | 7 ++-- Transceiver52M/ms/ms_rx_lower.cpp | 6 +-- Transceiver52M/ms/ms_upper.cpp | 7 ++-- 6 files changed, 58 insertions(+), 21 deletions(-) diff --git a/Transceiver52M/Complex.h b/Transceiver52M/Complex.h index 6e72346f..597a26f8 100644 --- a/Transceiver52M/Complex.h +++ b/Transceiver52M/Complex.h @@ -29,7 +29,7 @@ unlike the built-in complex<> templates, these inline most operations for speed template class Complex { public: - + typedef Real value_type; Real r, i; /**@name constructors */ diff --git a/Transceiver52M/ms/ms.cpp b/Transceiver52M/ms/ms.cpp index ddcfc3e1..6f63a737 100644 --- a/Transceiver52M/ms/ms.cpp +++ b/Transceiver52M/ms/ms.cpp @@ -62,7 +62,7 @@ void tx_test(ms_trx *t, ts_hitter_q_t *q, unsigned int *tsc) // float -> int16 blade_sample_type burst_buf[burst->size()]; - convert_and_scale(burst_buf, burst->begin(), burst->size() * 2, 1); + convert_and_scale(burst_buf, burst->begin(), burst->size() * 2, 1); while (1) { GSM::Time target; @@ -126,7 +126,7 @@ void tx_test(ms_trx *t, ts_hitter_q_t *q, unsigned int *tsc) t->submit_burst_ts(buf2, burst->size() + pad, send_ts - pad); #ifdef DBGXX signalVector test(burst->size() + pad); - convert_and_scale(test.begin(), buf2, burst->size() * 2 + pad, 1.f / float(scale)); + convert_and_scale(test.begin(), buf2, burst->size() * 2 + pad, 1.f / float(scale)); estim_burst_params ebp; auto det = detectAnyBurst(test, 0, 4, 4, CorrType::RACH, 40, &ebp); if (det > 0) diff --git a/Transceiver52M/ms/ms.h b/Transceiver52M/ms/ms.h index 54252836..8ea99323 100644 --- a/Transceiver52M/ms/ms.h +++ b/Transceiver52M/ms/ms.h @@ -38,6 +38,7 @@ #error wat? no device.. #endif +#include "Complex.h" #include "GSMCommon.h" #include "itrq.h" @@ -45,7 +46,8 @@ const unsigned int ONE_TS_BURST_LEN = (3 + 58 + 26 + 58 + 3 + 8.25) * 4 /*sps*/; const unsigned int NUM_RXQ_FRAMES = 1; // rx thread <-> upper rx queue const unsigned int SCH_LEN_SPS = (ONE_TS_BURST_LEN * 8 /*ts*/ * 12 /*frames*/); -template void clamp_array(T *start2, unsigned int len, T max) +template +void clamp_array(T *start2, unsigned int len, T max) { for (int i = 0; i < len; i++) { const T t1 = start2[i] < -max ? -max : start2[i]; @@ -53,15 +55,54 @@ template void clamp_array(T *start2, unsigned int len, T max) start2[i] = t2; } } -template -void convert_and_scale(void *dst, void *src, unsigned int src_len, ST scale) + +namespace cvt_internal +{ + +template +void convert_and_scale_i(float *dst, const SRC_T *src, unsigned int src_len, ST scale) { for (unsigned int i = 0; i < src_len; i++) - reinterpret_cast(dst)[i] = static_cast((reinterpret_cast(src)[i])) * scale; + dst[i] = static_cast(src[i]) * scale; } -template void convert_and_scale_default(void *dst, void *src, unsigned int src_len) + +template +void convert_and_scale_i(DST_T *dst, const float *src, unsigned int src_len, ST scale) { - return convert_and_scale(dst, src, src_len, SAMPLE_SCALE_FACTOR); + for (unsigned int i = 0; i < src_len; i++) + dst[i] = static_cast(src[i] * scale); +} + +template +void convert_and_scale_i(float *dst, const float *src, unsigned int src_len, ST scale) +{ + for (unsigned int i = 0; i < src_len; i++) + dst[i] = src[i] * scale; +} + +template +struct is_complex : std::false_type { + using baset = T; +}; + +template +struct is_complex> : std::true_type { + using baset = typename std::complex::value_type; +}; + +template +struct is_complex> : std::true_type { + using baset = typename Complex::value_type; +}; + +} // namespace cvt_internal + +template +void convert_and_scale(DST_T *dst, const SRC_T *src, unsigned int src_len, ST scale) +{ + using vd = typename cvt_internal::is_complex::baset; + using vs = typename cvt_internal::is_complex::baset; + return cvt_internal::convert_and_scale_i((vd *)dst, (vs *)src, src_len, scale); } struct one_burst { diff --git a/Transceiver52M/ms/ms_rx_burst_test.cpp b/Transceiver52M/ms/ms_rx_burst_test.cpp index 70183016..c3ba4ee1 100644 --- a/Transceiver52M/ms/ms_rx_burst_test.cpp +++ b/Transceiver52M/ms/ms_rx_burst_test.cpp @@ -90,7 +90,7 @@ static void handle_it(one_burst &e, signalVector &burst, unsigned int tsc, int s if (is_sch) { char outbin[148]; - convert_and_scale_default(burst.begin(), e.burst, ONE_TS_BURST_LEN * 2); + convert_and_scale(burst.begin(), e.burst, ONE_TS_BURST_LEN * 2, SAMPLE_SCALE_FACTOR); std::stringstream dbgout; #if 0 { @@ -109,8 +109,7 @@ static void handle_it(one_burst &e, signalVector &burst, unsigned int tsc, int s } #endif { - convert_and_scale(burst.begin(), burst.begin(), ONE_TS_BURST_LEN * 2, - 1.f / float(scale)); + convert_and_scale(burst.begin(), burst.begin(), ONE_TS_BURST_LEN * 2, 1.f / float(scale)); std::complex channel_imp_resp[CHAN_IMP_RESP_LENGTH * d_OSR]; auto ss = reinterpret_cast *>(burst.begin()); @@ -133,7 +132,7 @@ static void handle_it(one_burst &e, signalVector &burst, unsigned int tsc, int s return; } #if 1 - convert_and_scale(burst.begin(), e.burst, ONE_TS_BURST_LEN * 2, 1.f / float(scale)); + convert_and_scale(burst.begin(), e.burst, ONE_TS_BURST_LEN * 2, 1.f / float(scale)); // std::cerr << "@" << tsc << " " << e.gsmts.FN() << ":" << e.gsmts.TN() << " " << ebp.toa << " " // << std::endl; diff --git a/Transceiver52M/ms/ms_rx_lower.cpp b/Transceiver52M/ms/ms_rx_lower.cpp index bec16912..e39d72df 100644 --- a/Transceiver52M/ms/ms_rx_lower.cpp +++ b/Transceiver52M/ms/ms_rx_lower.cpp @@ -180,13 +180,11 @@ bool ms_trx::handle_sch(bool is_first_sch_acq) memset((void *)&sch_acq_buffer[0], 0, sizeof(sch_acq_buffer)); if (is_first_sch_acq) { float max_corr = 0; - convert_and_scale(which_out_buffer, which_in_buffer, buf_len * 2, - 1.f / float(rxFullScale)); + convert_and_scale(which_out_buffer, which_in_buffer, buf_len * 2, 1.f / float(rxFullScale)); start = get_sch_buffer_chan_imp_resp(ss, &channel_imp_resp[0], buf_len, &max_corr); detect_burst(&ss[start], &channel_imp_resp[0], 0, sch_demod_bits); } else { - convert_and_scale(which_out_buffer, which_in_buffer, buf_len * 2, - 1.f / float(rxFullScale)); + convert_and_scale(which_out_buffer, which_in_buffer, buf_len * 2, 1.f / float(rxFullScale)); start = get_sch_chan_imp_resp(ss, &channel_imp_resp[0]); start = start < 39 ? start : 39; start = start > -39 ? start : -39; diff --git a/Transceiver52M/ms/ms_upper.cpp b/Transceiver52M/ms/ms_upper.cpp index 2f3bdc6e..63f5926b 100644 --- a/Transceiver52M/ms/ms_upper.cpp +++ b/Transceiver52M/ms/ms_upper.cpp @@ -191,7 +191,7 @@ bool upper_trx::pullRadioVector(GSM::Time &wTime, int &RSSI, int &timingOffset) return true; } - convert_and_scale(ss, e.burst, ONE_TS_BURST_LEN * 2, 1.f / float(rxFullScale)); + convert_and_scale(ss, e.burst, ONE_TS_BURST_LEN * 2, 1.f / float(rxFullScale)); pow = energyDetect(sv, 20 * 4 /*sps*/); if (pow < -1) { @@ -292,10 +292,10 @@ void upper_trx::driveTx() // float -> int16 blade_sample_type burst_buf[txburst->size()]; - convert_and_scale(burst_buf, txburst->begin(), txburst->size() * 2, 1); + convert_and_scale(burst_buf, txburst->begin(), txburst->size() * 2, 1); #ifdef TXDEBUG auto check = signalVector(txburst->size(), 40); - convert_and_scale(check.begin(), burst_buf, txburst->size() * 2); + convert_and_scale(check.begin(), burst_buf, txburst->size() * 2, 1); estim_burst_params ebp; auto d = detectAnyBurst(check, 2, 4, 4, CorrType::RACH, 40, &ebp); if (d) @@ -462,7 +462,6 @@ int main(int argc, char *argv[]) { auto tall_trxcon_ctx = talloc_init("trxcon context"); signal(SIGPIPE, sighandler); - fesetround(FE_TOWARDZERO); trxcon::msgb_talloc_ctx_init(tall_trxcon_ctx, 0); trxc_log_init(tall_trxcon_ctx);