phy/tetra_burst.c: use bitwise operations to speed up synchronization

Finding synchronization sequence eats several times more CPU time than the
actual decoding. This is especially pronounced on channels with lots of errors
(where synchronization is lost frequently) and channels that are most of the
time empty (such as uplink channels, support for which is coming in following
patches).

Profiling shows that all the time is spent in memcmp calls.

A complicated and efficient algorithm, e.g. Aho-Corasick, turned out to be
not necessary. Compilers can optimize even a simple bit filter into fast code.

This provides only a modest (~25 %) performance gain, more fixes are coming.

Fixes: OS#1897
Change-Id: I3b90cc70c2ec67253a0fd2f00c6957a80971c38b
This commit is contained in:
Jan Hrach 2018-07-30 21:59:54 +02:00 committed by Harald Welte
parent 013ef6e8a1
commit 22bb16dfa4
1 changed files with 30 additions and 0 deletions

View File

@ -269,9 +269,39 @@ int build_norm_c_d_burst(uint8_t *buf, const uint8_t *bkn1, const uint8_t *bb, c
int tetra_find_train_seq(const uint8_t *in, unsigned int end_of_in,
uint32_t mask_of_train_seq, unsigned int *offset)
{
static uint32_t tsq_bytes[5];
if (tsq_bytes[0] == 0) {
#define FILTER_LOOKAHEAD_LEN 22
#define FILTER_LOOKAHEAD_MASK ((1<<FILTER_LOOKAHEAD_LEN)-1)
for (int i = 0; i < FILTER_LOOKAHEAD_LEN; i++) {
tsq_bytes[0] = (tsq_bytes[0] << 1) | y_bits[i];
tsq_bytes[1] = (tsq_bytes[1] << 1) | n_bits[i];
tsq_bytes[2] = (tsq_bytes[2] << 1) | p_bits[i];
tsq_bytes[3] = (tsq_bytes[3] << 1) | q_bits[i];
tsq_bytes[4] = (tsq_bytes[4] << 1) | x_bits[i];
}
}
uint32_t filter = 0;
for (int i = 0; i < FILTER_LOOKAHEAD_LEN-2; i++)
filter = (filter << 1) | in[i];
const uint8_t *cur;
for (cur = in; cur < in + end_of_in; cur++) {
filter = ((filter << 1) | cur[FILTER_LOOKAHEAD_LEN-1]) & FILTER_LOOKAHEAD_MASK;
int match = 0;
for (int i = 0; i < 5; i++)
if (filter == tsq_bytes[i])
match = 1;
if (!match)
continue;
int remain_len = (in + end_of_in) - cur;
if (mask_of_train_seq & (1 << TETRA_TRAIN_SYNC) &&