ms: adjust float<->integral type conversion

Given integral type A and non integral type B and depending on rounding
mode, optimization, compiler, and phase of the moon A(A)*B != A(A*B) so
split the two cases.

While at it, also make the template automagically work for complex types
instead of requiring manual casts, the general idea here is to allow
inlining and vectorization by treating all args as plain arrays, which is fine.

This works as expected with -tune=native, x64 implies sse2, and we do not
target any neon-less arm versions either.

Clang only array length hints can improve this even more.

Change-Id: I93f077f967daf2ed382d12cc20a54846b3688634
This commit is contained in:
Eric Wild 2023-01-12 16:21:58 +01:00 committed by Eric
parent f538397826
commit 621a49eb69
6 changed files with 58 additions and 21 deletions

View File

@ -29,7 +29,7 @@ unlike the built-in complex<> templates, these inline most operations for speed
template<class Real> class Complex {
public:
typedef Real value_type;
Real r, i;
/**@name constructors */

View File

@ -62,7 +62,7 @@ void tx_test(ms_trx *t, ts_hitter_q_t *q, unsigned int *tsc)
// float -> int16
blade_sample_type burst_buf[burst->size()];
convert_and_scale<int16_t, float>(burst_buf, burst->begin(), burst->size() * 2, 1);
convert_and_scale(burst_buf, burst->begin(), burst->size() * 2, 1);
while (1) {
GSM::Time target;
@ -126,7 +126,7 @@ void tx_test(ms_trx *t, ts_hitter_q_t *q, unsigned int *tsc)
t->submit_burst_ts(buf2, burst->size() + pad, send_ts - pad);
#ifdef DBGXX
signalVector test(burst->size() + pad);
convert_and_scale<float, int16_t>(test.begin(), buf2, burst->size() * 2 + pad, 1.f / float(scale));
convert_and_scale(test.begin(), buf2, burst->size() * 2 + pad, 1.f / float(scale));
estim_burst_params ebp;
auto det = detectAnyBurst(test, 0, 4, 4, CorrType::RACH, 40, &ebp);
if (det > 0)

View File

@ -38,6 +38,7 @@
#error wat? no device..
#endif
#include "Complex.h"
#include "GSMCommon.h"
#include "itrq.h"
@ -45,7 +46,8 @@ const unsigned int ONE_TS_BURST_LEN = (3 + 58 + 26 + 58 + 3 + 8.25) * 4 /*sps*/;
const unsigned int NUM_RXQ_FRAMES = 1; // rx thread <-> upper rx queue
const unsigned int SCH_LEN_SPS = (ONE_TS_BURST_LEN * 8 /*ts*/ * 12 /*frames*/);
template <typename T> void clamp_array(T *start2, unsigned int len, T max)
template <typename T>
void clamp_array(T *start2, unsigned int len, T max)
{
for (int i = 0; i < len; i++) {
const T t1 = start2[i] < -max ? -max : start2[i];
@ -53,15 +55,54 @@ template <typename T> void clamp_array(T *start2, unsigned int len, T max)
start2[i] = t2;
}
}
template <typename DST_T, typename SRC_T, typename ST>
void convert_and_scale(void *dst, void *src, unsigned int src_len, ST scale)
namespace cvt_internal
{
template <typename SRC_T, typename ST>
void convert_and_scale_i(float *dst, const SRC_T *src, unsigned int src_len, ST scale)
{
for (unsigned int i = 0; i < src_len; i++)
reinterpret_cast<DST_T *>(dst)[i] = static_cast<DST_T>((reinterpret_cast<SRC_T *>(src)[i])) * scale;
dst[i] = static_cast<float>(src[i]) * scale;
}
template <typename DST_T, typename SRC_T> void convert_and_scale_default(void *dst, void *src, unsigned int src_len)
template <typename DST_T, typename ST>
void convert_and_scale_i(DST_T *dst, const float *src, unsigned int src_len, ST scale)
{
return convert_and_scale<DST_T, SRC_T>(dst, src, src_len, SAMPLE_SCALE_FACTOR);
for (unsigned int i = 0; i < src_len; i++)
dst[i] = static_cast<DST_T>(src[i] * scale);
}
template <typename ST>
void convert_and_scale_i(float *dst, const float *src, unsigned int src_len, ST scale)
{
for (unsigned int i = 0; i < src_len; i++)
dst[i] = src[i] * scale;
}
template <typename T>
struct is_complex : std::false_type {
using baset = T;
};
template <typename T>
struct is_complex<std::complex<T>> : std::true_type {
using baset = typename std::complex<T>::value_type;
};
template <typename T>
struct is_complex<Complex<T>> : std::true_type {
using baset = typename Complex<T>::value_type;
};
} // namespace cvt_internal
template <typename DST_T, typename SRC_T, typename ST>
void convert_and_scale(DST_T *dst, const SRC_T *src, unsigned int src_len, ST scale)
{
using vd = typename cvt_internal::is_complex<DST_T>::baset;
using vs = typename cvt_internal::is_complex<SRC_T>::baset;
return cvt_internal::convert_and_scale_i((vd *)dst, (vs *)src, src_len, scale);
}
struct one_burst {

View File

@ -90,7 +90,7 @@ static void handle_it(one_burst &e, signalVector &burst, unsigned int tsc, int s
if (is_sch) {
char outbin[148];
convert_and_scale_default<float, int16_t>(burst.begin(), e.burst, ONE_TS_BURST_LEN * 2);
convert_and_scale(burst.begin(), e.burst, ONE_TS_BURST_LEN * 2, SAMPLE_SCALE_FACTOR);
std::stringstream dbgout;
#if 0
{
@ -109,8 +109,7 @@ static void handle_it(one_burst &e, signalVector &burst, unsigned int tsc, int s
}
#endif
{
convert_and_scale<float, float>(burst.begin(), burst.begin(), ONE_TS_BURST_LEN * 2,
1.f / float(scale));
convert_and_scale(burst.begin(), burst.begin(), ONE_TS_BURST_LEN * 2, 1.f / float(scale));
std::complex<float> channel_imp_resp[CHAN_IMP_RESP_LENGTH * d_OSR];
auto ss = reinterpret_cast<std::complex<float> *>(burst.begin());
@ -133,7 +132,7 @@ static void handle_it(one_burst &e, signalVector &burst, unsigned int tsc, int s
return;
}
#if 1
convert_and_scale<float, int16_t>(burst.begin(), e.burst, ONE_TS_BURST_LEN * 2, 1.f / float(scale));
convert_and_scale(burst.begin(), e.burst, ONE_TS_BURST_LEN * 2, 1.f / float(scale));
// std::cerr << "@" << tsc << " " << e.gsmts.FN() << ":" << e.gsmts.TN() << " " << ebp.toa << " "
// << std::endl;

View File

@ -180,13 +180,11 @@ bool ms_trx::handle_sch(bool is_first_sch_acq)
memset((void *)&sch_acq_buffer[0], 0, sizeof(sch_acq_buffer));
if (is_first_sch_acq) {
float max_corr = 0;
convert_and_scale<float, int16_t>(which_out_buffer, which_in_buffer, buf_len * 2,
1.f / float(rxFullScale));
convert_and_scale(which_out_buffer, which_in_buffer, buf_len * 2, 1.f / float(rxFullScale));
start = get_sch_buffer_chan_imp_resp(ss, &channel_imp_resp[0], buf_len, &max_corr);
detect_burst(&ss[start], &channel_imp_resp[0], 0, sch_demod_bits);
} else {
convert_and_scale<float, int16_t>(which_out_buffer, which_in_buffer, buf_len * 2,
1.f / float(rxFullScale));
convert_and_scale(which_out_buffer, which_in_buffer, buf_len * 2, 1.f / float(rxFullScale));
start = get_sch_chan_imp_resp(ss, &channel_imp_resp[0]);
start = start < 39 ? start : 39;
start = start > -39 ? start : -39;

View File

@ -191,7 +191,7 @@ bool upper_trx::pullRadioVector(GSM::Time &wTime, int &RSSI, int &timingOffset)
return true;
}
convert_and_scale<float, int16_t>(ss, e.burst, ONE_TS_BURST_LEN * 2, 1.f / float(rxFullScale));
convert_and_scale(ss, e.burst, ONE_TS_BURST_LEN * 2, 1.f / float(rxFullScale));
pow = energyDetect(sv, 20 * 4 /*sps*/);
if (pow < -1) {
@ -292,10 +292,10 @@ void upper_trx::driveTx()
// float -> int16
blade_sample_type burst_buf[txburst->size()];
convert_and_scale<int16_t, float>(burst_buf, txburst->begin(), txburst->size() * 2, 1);
convert_and_scale(burst_buf, txburst->begin(), txburst->size() * 2, 1);
#ifdef TXDEBUG
auto check = signalVector(txburst->size(), 40);
convert_and_scale<float, int16_t, 1>(check.begin(), burst_buf, txburst->size() * 2);
convert_and_scale(check.begin(), burst_buf, txburst->size() * 2, 1);
estim_burst_params ebp;
auto d = detectAnyBurst(check, 2, 4, 4, CorrType::RACH, 40, &ebp);
if (d)
@ -462,7 +462,6 @@ int main(int argc, char *argv[])
{
auto tall_trxcon_ctx = talloc_init("trxcon context");
signal(SIGPIPE, sighandler);
fesetround(FE_TOWARDZERO);
trxcon::msgb_talloc_ctx_init(tall_trxcon_ctx, 0);
trxc_log_init(tall_trxcon_ctx);