1060 lines
36 KiB
C
1060 lines
36 KiB
C
/*
|
|
* SpanDSP - a series of DSP components for telephony
|
|
*
|
|
* echo.c - An echo cancellor, suitable for electrical and acoustic
|
|
* cancellation. This code does not currently comply with
|
|
* any relevant standards (e.g. G.164/5/7/8). One day....
|
|
*
|
|
* Written by Steve Underwood <steveu@coppice.org>
|
|
*
|
|
* Copyright (C) 2001, 2003 Steve Underwood
|
|
*
|
|
* Based on a bit from here, a bit from there, eye of toad,
|
|
* ear of bat, etc - plus, of course, my own 2 cents.
|
|
*
|
|
* All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2, as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*
|
|
* $Id: echo.c,v 1.20 2006/12/01 18:00:48 steveu Exp $
|
|
*/
|
|
|
|
/*! \file */
|
|
|
|
/* TODO:
|
|
Finish the echo suppressor option, however nasty suppression may be.
|
|
Add an option to reintroduce side tone at -24dB under appropriate conditions.
|
|
Improve double talk detector (iterative!)
|
|
*/
|
|
|
|
/* We need to differentiate between transmitted energy which will train the echo
|
|
canceller well (voice, white noise, and other broadband sources) and energy
|
|
which will train it badly (supervisory tones, DTMF, whistles, and other
|
|
narrowband sources). There are many ways this might be done. This canceller uses
|
|
a method based on the autocorrelation qualities of the transmitted signal. A rather
|
|
peaky autocorrelation function is a clear sign of a narrowband signal. We only need
|
|
perform the autocorrelation at well spaced intervals, so the compute load is not too
|
|
great. Multiple successive autocorrelation functions with a similar peaky shape are a
|
|
clear indication of a stationary narrowband signal. Using TKEO, it should be possible to
|
|
greatly reduce the compute requirement for narrowband detection. */
|
|
|
|
/* The FIR taps must be adapted as 32 bit values, to get the necessary finesse
|
|
in the adaption process. However, they are applied as 16 bit values (bits 30-15
|
|
of the 32 bit values) in the FIR. For the working 16 bit values, we need 4 sets.
|
|
|
|
3 of the 16 bit sets are used on a rotating basis. Normally the canceller steps
|
|
round these 3 sets at regular intervals. Any time we detect double talk, we can go
|
|
back to the set from two steps ago with reasonable assurance it is a well adapted
|
|
set. We cannot just go back one step, as we may have rotated the sets just before
|
|
double talk or tone was detected, and that set may already be somewhat corrupted.
|
|
|
|
When narrowband energy is detected we need to continue adapting to it, to echo
|
|
cancel it. However, the adaption will almost certainly be going astray. Broadband
|
|
(or even complex sequences of narrowband) energy will normally lead to a well
|
|
trained cancellor, with taps matching the impulse response of the channel.
|
|
For stationary narrowband energy, there is usually has an infinite number of
|
|
alternative tap sets which will cancel it well. A previously well trained set of
|
|
taps will tend to drift amongst the alternatives. When broadband energy resumes, the
|
|
taps may be a total mismatch for the signal, and could even amplify rather than
|
|
attenuate the echo. The solution is to use a fourth set of 16 bit taps. When we first
|
|
detect the narrowband energy we save the oldest of the group of three sets, but do
|
|
not change back to an older set. We let the canceller cancel, and it adaption drift
|
|
while the narrowband energy is present. When we detect the narrowband energy has ceased,
|
|
we switch to using the fourth set of taps which was saved.
|
|
|
|
When we revert to an older set of taps, we must replace both the 16 bit and 32 bit
|
|
working tap sets. The saved 16 bit values are good enough to also be used as a replacement
|
|
for the 32 bit values. We loose the fractions, but they should soon settle down in a
|
|
reasonable way. */
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include <config.h>
|
|
#endif
|
|
|
|
#include <stdlib.h>
|
|
#include <inttypes.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
|
|
#include "celliax_spandsp.h"
|
|
|
|
//#include "spandsp/telephony.h"
|
|
//#include "spandsp/logging.h"
|
|
//#include "spandsp/bit_operations.h"
|
|
//#include "spandsp/echo.h"
|
|
|
|
//#include "bit_operations.h"
|
|
//#include "giova.h"
|
|
|
|
#if !defined(NULL)
|
|
#define NULL (void *) 0
|
|
#endif
|
|
#if !defined(FALSE)
|
|
#define FALSE 0
|
|
#endif
|
|
#if !defined(TRUE)
|
|
#define TRUE (!FALSE)
|
|
#endif
|
|
|
|
#if 0
|
|
#define MIN_TX_POWER_FOR_ADAPTION 64*64
|
|
#define MIN_RX_POWER_FOR_ADAPTION 64*64
|
|
|
|
static int narrowband_detect(echo_can_state_t * ec)
|
|
{
|
|
int k;
|
|
int i;
|
|
float temp;
|
|
float scale;
|
|
float sf[128];
|
|
float f_acf[128];
|
|
int32_t acf[28];
|
|
int score;
|
|
int len = 32;
|
|
int alen = 9;
|
|
|
|
k = ec->curr_pos;
|
|
for (i = 0; i < len; i++) {
|
|
sf[i] = ec->fir_state.history[k++];
|
|
if (k >= 256)
|
|
k = 0;
|
|
}
|
|
for (k = 0; k < alen; k++) {
|
|
temp = 0;
|
|
for (i = k; i < len; i++)
|
|
temp += sf[i] * sf[i - k];
|
|
f_acf[k] = temp;
|
|
}
|
|
scale = 0x1FFFFFFF / f_acf[0];
|
|
for (k = 0; k < alen; k++)
|
|
acf[k] = (int32_t) (f_acf[k] * scale);
|
|
score = 0;
|
|
for (i = 0; i < 9; i++) {
|
|
if (ec->last_acf[i] >= 0 && acf[i] >= 0) {
|
|
if ((ec->last_acf[i] >> 1) < acf[i] && acf[i] < (ec->last_acf[i] << 1))
|
|
score++;
|
|
} else if (ec->last_acf[i] < 0 && acf[i] < 0) {
|
|
if ((ec->last_acf[i] >> 1) > acf[i] && acf[i] > (ec->last_acf[i] << 1))
|
|
score++;
|
|
}
|
|
}
|
|
memcpy(ec->last_acf, acf, alen * sizeof(ec->last_acf[0]));
|
|
return score;
|
|
}
|
|
|
|
static __inline__ void lms_adapt(echo_can_state_t * ec, int factor)
|
|
{
|
|
int i;
|
|
|
|
#if 0
|
|
mmx_t *mmx_taps;
|
|
mmx_t *mmx_coeffs;
|
|
mmx_t *mmx_hist;
|
|
mmx_t mmx;
|
|
|
|
mmx.w[0] = mmx.w[1] = mmx.w[2] = mmx.w[3] = factor;
|
|
mmx_hist = (mmx_t *) & fir->history[fir->curr_pos];
|
|
mmx_taps = (mmx_t *) & fir->taps;
|
|
mmx_coeffs = (mmx_t *) fir->coeffs;
|
|
i = fir->taps;
|
|
movq_m2r(mmx, mm0);
|
|
while (i > 0) {
|
|
movq_m2r(mmx_hist[0], mm1);
|
|
movq_m2r(mmx_taps[0], mm0);
|
|
movq_m2r(mmx_taps[1], mm1);
|
|
movq_r2r(mm1, mm2);
|
|
pmulhw(mm0, mm1);
|
|
pmullw(mm0, mm2);
|
|
|
|
pmaddwd_r2r(mm1, mm0);
|
|
pmaddwd_r2r(mm3, mm2);
|
|
paddd_r2r(mm0, mm4);
|
|
paddd_r2r(mm2, mm4);
|
|
movq_r2m(mm0, mmx_taps[0]);
|
|
movq_r2m(mm1, mmx_taps[0]);
|
|
movq_r2m(mm2, mmx_coeffs[0]);
|
|
mmx_taps += 2;
|
|
mmx_coeffs += 1;
|
|
mmx_hist += 1;
|
|
i -= 4;
|
|
)
|
|
emms();
|
|
#elif 0
|
|
/* Update the FIR taps */
|
|
for (i = ec->taps - 1; i >= 0; i--) {
|
|
/* Leak to avoid the coefficients drifting beyond the ability of the
|
|
adaption process to bring them back under control. */
|
|
ec->fir_taps32[i] -= (ec->fir_taps32[i] >> 23);
|
|
ec->fir_taps32[i] += (ec->fir_state.history[i + ec->curr_pos] * factor);
|
|
ec->latest_correction = (ec->fir_state.history[i + ec->curr_pos] * factor);
|
|
ec->fir_taps16[ec->tap_set][i] = ec->fir_taps32[i] >> 15;
|
|
}
|
|
#else
|
|
int offset1;
|
|
int offset2;
|
|
|
|
/* Update the FIR taps */
|
|
offset2 = ec->curr_pos;
|
|
offset1 = ec->taps - offset2;
|
|
for (i = ec->taps - 1; i >= offset1; i--) {
|
|
ec->fir_taps32[i] += (ec->fir_state.history[i - offset1] * factor);
|
|
ec->fir_taps16[ec->tap_set][i] = (int16_t) (ec->fir_taps32[i] >> 15);
|
|
}
|
|
for (; i >= 0; i--) {
|
|
ec->fir_taps32[i] += (ec->fir_state.history[i + offset2] * factor);
|
|
ec->fir_taps16[ec->tap_set][i] = (int16_t) (ec->fir_taps32[i] >> 15);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
#ifdef NOT_NEEDED
|
|
echo_can_state_t *echo_can_create(int len, int adaption_mode)
|
|
{
|
|
echo_can_state_t *ec;
|
|
int i;
|
|
int j;
|
|
|
|
ec = (echo_can_state_t *) malloc(sizeof(*ec));
|
|
if (ec == NULL)
|
|
return NULL;
|
|
memset(ec, 0, sizeof(*ec));
|
|
ec->taps = len;
|
|
ec->curr_pos = ec->taps - 1;
|
|
ec->tap_mask = ec->taps - 1;
|
|
if ((ec->fir_taps32 = (int32_t *) malloc(ec->taps * sizeof(int32_t))) == NULL) {
|
|
free(ec);
|
|
return NULL;
|
|
}
|
|
memset(ec->fir_taps32, 0, ec->taps * sizeof(int32_t));
|
|
for (i = 0; i < 4; i++) {
|
|
if ((ec->fir_taps16[i] = (int16_t *) malloc(ec->taps * sizeof(int16_t))) == NULL) {
|
|
for (j = 0; j < i; j++)
|
|
free(ec->fir_taps16[j]);
|
|
free(ec->fir_taps32);
|
|
free(ec);
|
|
return NULL;
|
|
}
|
|
memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t));
|
|
}
|
|
fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps);
|
|
ec->rx_power_threshold = 10000000;
|
|
ec->geigel_max = 0;
|
|
ec->geigel_lag = 0;
|
|
ec->dtd_onset = FALSE;
|
|
ec->tap_set = 0;
|
|
ec->tap_rotate_counter = 1600;
|
|
ec->cng_level = 1000;
|
|
echo_can_adaption_mode(ec, adaption_mode);
|
|
return ec;
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
void echo_can_free(echo_can_state_t * ec)
|
|
{
|
|
int i;
|
|
|
|
fir16_free(&ec->fir_state);
|
|
free(ec->fir_taps32);
|
|
for (i = 0; i < 4; i++)
|
|
free(ec->fir_taps16[i]);
|
|
free(ec);
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
void echo_can_adaption_mode(echo_can_state_t * ec, int adaption_mode)
|
|
{
|
|
ec->adaption_mode = adaption_mode;
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
void echo_can_flush(echo_can_state_t * ec)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < 4; i++)
|
|
ec->tx_power[i] = 0;
|
|
for (i = 0; i < 3; i++)
|
|
ec->rx_power[i] = 0;
|
|
ec->clean_rx_power = 0;
|
|
ec->nonupdate_dwell = 0;
|
|
|
|
fir16_flush(&ec->fir_state);
|
|
ec->fir_state.curr_pos = ec->taps - 1;
|
|
memset(ec->fir_taps32, 0, ec->taps * sizeof(int32_t));
|
|
for (i = 0; i < 4; i++)
|
|
memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t));
|
|
|
|
ec->curr_pos = ec->taps - 1;
|
|
|
|
ec->supp_test1 = 0;
|
|
ec->supp_test2 = 0;
|
|
ec->supp1 = 0;
|
|
ec->supp2 = 0;
|
|
ec->vad = 0;
|
|
ec->cng_level = 1000;
|
|
ec->cng_filter = 0;
|
|
|
|
ec->geigel_max = 0;
|
|
ec->geigel_lag = 0;
|
|
ec->dtd_onset = FALSE;
|
|
ec->tap_set = 0;
|
|
ec->tap_rotate_counter = 1600;
|
|
|
|
ec->latest_correction = 0;
|
|
|
|
memset(ec->last_acf, 0, sizeof(ec->last_acf));
|
|
ec->narrowband_count = 0;
|
|
ec->narrowband_score = 0;
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
int sample_no = 0;
|
|
|
|
int16_t echo_can_update(echo_can_state_t * ec, int16_t tx, int16_t rx)
|
|
{
|
|
int32_t echo_value;
|
|
int clean_rx;
|
|
int nsuppr;
|
|
int score;
|
|
int i;
|
|
|
|
sample_no++;
|
|
ec->latest_correction = 0;
|
|
/* Evaluate the echo - i.e. apply the FIR filter */
|
|
/* Assume the gain of the FIR does not exceed unity. Exceeding unity
|
|
would seem like a rather poor thing for an echo cancellor to do :)
|
|
This means we can compute the result with a total disregard for
|
|
overflows. 16bits x 16bits -> 31bits, so no overflow can occur in
|
|
any multiply. While accumulating we may overflow and underflow the
|
|
32 bit scale often. However, if the gain does not exceed unity,
|
|
everything should work itself out, and the final result will be
|
|
OK, without any saturation logic. */
|
|
/* Overflow is very much possible here, and we do nothing about it because
|
|
of the compute costs */
|
|
/* 16 bit coeffs for the LMS give lousy results (maths good, actual sound
|
|
bad!), but 32 bit coeffs require some shifting. On balance 32 bit seems
|
|
best */
|
|
echo_value = fir16(&ec->fir_state, tx);
|
|
|
|
/* And the answer is..... */
|
|
clean_rx = rx - echo_value;
|
|
//printf("echo is %" PRId32 "\n", echo_value);
|
|
/* That was the easy part. Now we need to adapt! */
|
|
if (ec->nonupdate_dwell > 0)
|
|
ec->nonupdate_dwell--;
|
|
|
|
/* Calculate short term power levels using very simple single pole IIRs */
|
|
/* TODO: Is the nasty modulus approach the fastest, or would a real
|
|
tx*tx power calculation actually be faster? Using the squares
|
|
makes the numbers grow a lot! */
|
|
ec->tx_power[3] += ((abs(tx) - ec->tx_power[3]) >> 5);
|
|
ec->tx_power[2] += ((tx * tx - ec->tx_power[2]) >> 8);
|
|
ec->tx_power[1] += ((tx * tx - ec->tx_power[1]) >> 5);
|
|
ec->tx_power[0] += ((tx * tx - ec->tx_power[0]) >> 3);
|
|
ec->rx_power[1] += ((rx * rx - ec->rx_power[1]) >> 6);
|
|
ec->rx_power[0] += ((rx * rx - ec->rx_power[0]) >> 3);
|
|
ec->clean_rx_power += ((clean_rx * clean_rx - ec->clean_rx_power) >> 6);
|
|
|
|
score = 0;
|
|
/* If there is very little being transmitted, any attempt to train is
|
|
futile. We would either be training on the far end's noise or signal,
|
|
the channel's own noise, or our noise. Either way, this is hardly good
|
|
training, so don't do it (avoid trouble). */
|
|
if (ec->tx_power[0] > MIN_TX_POWER_FOR_ADAPTION) {
|
|
/* If the received power is very low, either we are sending very little or
|
|
we are already well adapted. There is little point in trying to improve
|
|
the adaption under these circumstances, so don't do it (reduce the
|
|
compute load). */
|
|
if (ec->tx_power[1] > ec->rx_power[0]) {
|
|
/* There is no (or little) far-end speech. */
|
|
if (ec->nonupdate_dwell == 0) {
|
|
if (++ec->narrowband_count >= 160) {
|
|
ec->narrowband_count = 0;
|
|
score = narrowband_detect(ec);
|
|
//printf("Do the narrowband test %d at %d\n", score, ec->curr_pos);
|
|
if (score > 6) {
|
|
if (ec->narrowband_score == 0)
|
|
memcpy(ec->fir_taps16[3], ec->fir_taps16[(ec->tap_set + 1) % 3],
|
|
ec->taps * sizeof(int16_t));
|
|
ec->narrowband_score += score;
|
|
} else {
|
|
if (ec->narrowband_score > 200) {
|
|
//printf("Revert to %d at %d\n", (ec->tap_set + 1)%3, sample_no);
|
|
memcpy(ec->fir_taps16[ec->tap_set], ec->fir_taps16[3],
|
|
ec->taps * sizeof(int16_t));
|
|
memcpy(ec->fir_taps16[(ec->tap_set - 1) % 3], ec->fir_taps16[3],
|
|
ec->taps * sizeof(int16_t));
|
|
for (i = 0; i < ec->taps; i++)
|
|
ec->fir_taps32[i] = ec->fir_taps16[3][i] << 15;
|
|
ec->tap_rotate_counter = 1600;
|
|
}
|
|
ec->narrowband_score = 0;
|
|
}
|
|
}
|
|
ec->dtd_onset = FALSE;
|
|
if (--ec->tap_rotate_counter <= 0) {
|
|
//printf("Rotate to %d at %d\n", ec->tap_set, sample_no);
|
|
ec->tap_rotate_counter = 1600;
|
|
ec->tap_set++;
|
|
if (ec->tap_set > 2)
|
|
ec->tap_set = 0;
|
|
ec->fir_state.coeffs = ec->fir_taps16[ec->tap_set];
|
|
}
|
|
/* ... and we are not in the dwell time from previous speech. */
|
|
if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) && ec->narrowband_score == 0) {
|
|
//nsuppr = saturate((clean_rx << 16)/ec->tx_power[1]);
|
|
//nsuppr = clean_rx/ec->tx_power[1];
|
|
/* If a sudden surge in signal level (e.g. the onset of a tone
|
|
burst) cause an abnormally high instantaneous to average
|
|
signal power ratio, we could kick the adaption badly in the
|
|
wrong direction. This is because the tx_power takes too long
|
|
to react and rise. We need to stop too rapid adaption to the
|
|
new signal. We normalise to a value derived from the
|
|
instantaneous signal if it exceeds the peak by too much. */
|
|
nsuppr = clean_rx;
|
|
/* Divide isn't very quick, but the "where is the top bit" and shift
|
|
instructions are single cycle. */
|
|
if (tx > 4 * ec->tx_power[3])
|
|
i = top_bit(tx) - 8;
|
|
else
|
|
i = top_bit(ec->tx_power[3]) - 8;
|
|
if (i > 0)
|
|
nsuppr >>= i;
|
|
lms_adapt(ec, nsuppr);
|
|
}
|
|
}
|
|
//printf("%10d %10d %10d %10d %10d\n", rx, clean_rx, nsuppr, ec->tx_power[1], ec->rx_power[1]);
|
|
//printf("%.4f\n", (float) ec->rx_power[1]/(float) ec->clean_rx_power);
|
|
} else {
|
|
if (!ec->dtd_onset) {
|
|
//printf("Revert to %d at %d\n", (ec->tap_set + 1)%3, sample_no);
|
|
memcpy(ec->fir_taps16[ec->tap_set], ec->fir_taps16[(ec->tap_set + 1) % 3],
|
|
ec->taps * sizeof(int16_t));
|
|
memcpy(ec->fir_taps16[(ec->tap_set - 1) % 3],
|
|
ec->fir_taps16[(ec->tap_set + 1) % 3], ec->taps * sizeof(int16_t));
|
|
for (i = 0; i < ec->taps; i++)
|
|
ec->fir_taps32[i] = ec->fir_taps16[(ec->tap_set + 1) % 3][i] << 15;
|
|
ec->tap_rotate_counter = 1600;
|
|
ec->dtd_onset = TRUE;
|
|
}
|
|
ec->nonupdate_dwell = NONUPDATE_DWELL_TIME;
|
|
}
|
|
}
|
|
|
|
if (ec->rx_power[1])
|
|
ec->vad = (8000 * ec->clean_rx_power) / ec->rx_power[1];
|
|
else
|
|
ec->vad = 0;
|
|
if (ec->rx_power[1] > 2048 * 2048 && ec->clean_rx_power > 4 * ec->rx_power[1]) {
|
|
/* The EC seems to be making things worse, instead of better. Zap it! */
|
|
memset(ec->fir_taps32, 0, ec->taps * sizeof(int32_t));
|
|
for (i = 0; i < 4; i++)
|
|
memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t));
|
|
}
|
|
#if defined(XYZZY)
|
|
if ((ec->adaption_mode & ECHO_CAN_USE_SUPPRESSOR)) {
|
|
ec->supp_test1 +=
|
|
(ec->fir_state.history[ec->curr_pos] -
|
|
ec->fir_state.history[(ec->curr_pos - 7) & ec->tap_mask]);
|
|
ec->supp_test2 +=
|
|
(ec->fir_state.history[(ec->curr_pos - 24) & ec->tap_mask] -
|
|
ec->fir_state.history[(ec->curr_pos - 31) & ec->tap_mask]);
|
|
if (ec->supp_test1 > 42 && ec->supp_test2 > 42)
|
|
supp_change = 25;
|
|
else
|
|
supp_change = 50;
|
|
supp = supp_change + k1 * ec->supp1 + k2 * ec->supp2;
|
|
ec->supp2 = ec->supp1;
|
|
ec->supp1 = supp;
|
|
clean_rx *= (1 - supp);
|
|
}
|
|
#endif
|
|
|
|
if ((ec->adaption_mode & ECHO_CAN_USE_NLP)) {
|
|
/* Non-linear processor - a fancy way to say "zap small signals, to avoid
|
|
residual echo due to (uLaw/ALaw) non-linearity in the channel.". */
|
|
if (ec->rx_power[1] < 30000000) {
|
|
if (!ec->cng) {
|
|
ec->cng_level = ec->clean_rx_power;
|
|
ec->cng = TRUE;
|
|
}
|
|
if ((ec->adaption_mode & ECHO_CAN_USE_CNG)) {
|
|
/* Very elementary comfort noise generation */
|
|
/* Just random numbers rolled off very vaguely Hoth-like */
|
|
ec->cng_rndnum = 1664525U * ec->cng_rndnum + 1013904223U;
|
|
ec->cng_filter = ((ec->cng_rndnum & 0xFFFF) - 32768 + 5 * ec->cng_filter) >> 3;
|
|
clean_rx = (ec->cng_filter * ec->cng_level) >> 17;
|
|
/* TODO: A better CNG, with more accurate (tracking) spectral shaping! */
|
|
} else {
|
|
clean_rx = 0;
|
|
}
|
|
//clean_rx = -16000;
|
|
} else {
|
|
ec->cng = FALSE;
|
|
}
|
|
} else {
|
|
ec->cng = FALSE;
|
|
}
|
|
|
|
//printf("Narrowband score %4d %5d at %d\n", ec->narrowband_score, score, sample_no);
|
|
/* Roll around the rolling buffer */
|
|
if (ec->curr_pos <= 0)
|
|
ec->curr_pos = ec->taps;
|
|
ec->curr_pos--;
|
|
return (int16_t) clean_rx;
|
|
}
|
|
|
|
#endif //NOT_NEEDED
|
|
/*- End of function --------------------------------------------------------*/
|
|
/*- End of file ------------------------------------------------------------*/
|
|
#endif
|
|
|
|
#include <inttypes.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <time.h>
|
|
#include <fcntl.h>
|
|
#include <math.h>
|
|
|
|
//#include "spandsp/telephony.h"
|
|
//#include "spandsp/tone_detect.h"
|
|
//#include "spandsp/tone_generate.h"
|
|
//#include "spandsp/super_tone_rx.h"
|
|
//#include "giova.h"
|
|
|
|
#if !defined(M_PI)
|
|
/* C99 systems may not define M_PI */
|
|
#define M_PI 3.14159265358979323846264338327
|
|
#endif
|
|
|
|
//#define USE_3DNOW
|
|
|
|
#define DEFAULT_DTMF_TX_LEVEL -10
|
|
#define DEFAULT_DTMF_TX_ON_TIME 50
|
|
#define DEFAULT_DTMF_TX_OFF_TIME 55
|
|
|
|
#define DTMF_THRESHOLD 8.0e7f
|
|
#define DTMF_NORMAL_TWIST 6.3f /* 8dB */
|
|
#define DTMF_REVERSE_TWIST 2.5f /* 4dB */
|
|
#define DTMF_RELATIVE_PEAK_ROW 6.3f /* 8dB */
|
|
#define DTMF_RELATIVE_PEAK_COL 6.3f /* 8dB */
|
|
#define DTMF_TO_TOTAL_ENERGY 42.0f
|
|
|
|
static const float dtmf_row[] = {
|
|
697.0f, 770.0f, 852.0f, 941.0f
|
|
};
|
|
static const float dtmf_col[] = {
|
|
1209.0f, 1336.0f, 1477.0f, 1633.0f
|
|
};
|
|
|
|
static const char dtmf_positions[] = "123A" "456B" "789C" "*0#D";
|
|
|
|
static goertzel_descriptor_t dtmf_detect_row[4];
|
|
static goertzel_descriptor_t dtmf_detect_col[4];
|
|
|
|
//
|
|
//static int dtmf_tx_inited = 0;
|
|
//static tone_gen_descriptor_t dtmf_digit_tones[16];
|
|
|
|
#if defined(USE_3DNOW)
|
|
static __inline__ void _dtmf_goertzel_update(goertzel_state_t * s, float x[], int samples)
|
|
{
|
|
int n;
|
|
float v;
|
|
int i;
|
|
float vv[16];
|
|
|
|
vv[4] = s[0].v2;
|
|
vv[5] = s[1].v2;
|
|
vv[6] = s[2].v2;
|
|
vv[7] = s[3].v2;
|
|
vv[8] = s[0].v3;
|
|
vv[9] = s[1].v3;
|
|
vv[10] = s[2].v3;
|
|
vv[11] = s[3].v3;
|
|
vv[12] = s[0].fac;
|
|
vv[13] = s[1].fac;
|
|
vv[14] = s[2].fac;
|
|
vv[15] = s[3].fac;
|
|
|
|
//v1 = s->v2;
|
|
//s->v2 = s->v3;
|
|
//s->v3 = s->fac*s->v2 - v1 + x[0];
|
|
|
|
__asm__ __volatile__(" femms;\n" " movq 16(%%edx),%%mm2;\n"
|
|
" movq 24(%%edx),%%mm3;\n" " movq 32(%%edx),%%mm4;\n"
|
|
" movq 40(%%edx),%%mm5;\n" " movq 48(%%edx),%%mm6;\n"
|
|
" movq 56(%%edx),%%mm7;\n" " jmp 1f;\n"
|
|
" .align 32;\n" " 1: ;\n" " prefetch (%%eax);\n"
|
|
" movq %%mm3,%%mm1;\n" " movq %%mm2,%%mm0;\n"
|
|
" movq %%mm5,%%mm3;\n" " movq %%mm4,%%mm2;\n"
|
|
" pfmul %%mm7,%%mm5;\n" " pfmul %%mm6,%%mm4;\n"
|
|
" pfsub %%mm1,%%mm5;\n" " pfsub %%mm0,%%mm4;\n"
|
|
" movq (%%eax),%%mm0;\n" " movq %%mm0,%%mm1;\n"
|
|
" punpckldq %%mm0,%%mm1;\n" " add $4,%%eax;\n"
|
|
" pfadd %%mm1,%%mm5;\n" " pfadd %%mm1,%%mm4;\n"
|
|
" dec %%ecx;\n" " jnz 1b;\n"
|
|
" movq %%mm2,16(%%edx);\n" " movq %%mm3,24(%%edx);\n"
|
|
" movq %%mm4,32(%%edx);\n" " movq %%mm5,40(%%edx);\n"
|
|
" femms;\n"::"c"(samples), "a"(x), "d"(vv)
|
|
:"memory", "eax", "ecx");
|
|
|
|
s[0].v2 = vv[4];
|
|
s[1].v2 = vv[5];
|
|
s[2].v2 = vv[6];
|
|
s[3].v2 = vv[7];
|
|
s[0].v3 = vv[8];
|
|
s[1].v3 = vv[9];
|
|
s[2].v3 = vv[10];
|
|
s[3].v3 = vv[11];
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
#endif
|
|
|
|
int dtmf_rx(dtmf_rx_state_t * s, const int16_t amp[], int samples)
|
|
{
|
|
float row_energy[4];
|
|
float col_energy[4];
|
|
float famp;
|
|
float v1;
|
|
int i;
|
|
int j;
|
|
int sample;
|
|
int best_row;
|
|
int best_col;
|
|
int limit;
|
|
uint8_t hit;
|
|
|
|
hit = 0;
|
|
for (sample = 0; sample < samples; sample = limit) {
|
|
/* The block length is optimised to meet the DTMF specs. */
|
|
if ((samples - sample) >= (102 - s->current_sample))
|
|
limit = sample + (102 - s->current_sample);
|
|
else
|
|
limit = samples;
|
|
#if defined(USE_3DNOW)
|
|
_dtmf_goertzel_update(s->row_out, amp + sample, limit - sample);
|
|
_dtmf_goertzel_update(s->col_out, amp + sample, limit - sample);
|
|
#else
|
|
/* The following unrolled loop takes only 35% (rough estimate) of the
|
|
time of a rolled loop on the machine on which it was developed */
|
|
for (j = sample; j < limit; j++) {
|
|
famp = amp[j];
|
|
if (s->filter_dialtone) {
|
|
/* Sharp notches applied at 350Hz and 440Hz - the two common dialtone frequencies.
|
|
These are rather high Q, to achieve the required narrowness, without using lots of
|
|
sections. */
|
|
v1 = 0.98356f * famp + 1.8954426f * s->z350_1 - 0.9691396f * s->z350_2;
|
|
famp = v1 - 1.9251480f * s->z350_1 + s->z350_2;
|
|
s->z350_2 = s->z350_1;
|
|
s->z350_1 = v1;
|
|
|
|
v1 = 0.98456f * famp + 1.8529543f * s->z440_1 - 0.9691396f * s->z440_2;
|
|
famp = v1 - 1.8819938f * s->z440_1 + s->z440_2;
|
|
s->z440_2 = s->z440_1;
|
|
s->z440_1 = v1;
|
|
}
|
|
s->energy += famp * famp;
|
|
/* With GCC 2.95, the following unrolled code seems to take about 35%
|
|
(rough estimate) as long as a neat little 0-3 loop */
|
|
v1 = s->row_out[0].v2;
|
|
s->row_out[0].v2 = s->row_out[0].v3;
|
|
s->row_out[0].v3 = s->row_out[0].fac * s->row_out[0].v2 - v1 + famp;
|
|
|
|
v1 = s->col_out[0].v2;
|
|
s->col_out[0].v2 = s->col_out[0].v3;
|
|
s->col_out[0].v3 = s->col_out[0].fac * s->col_out[0].v2 - v1 + famp;
|
|
|
|
v1 = s->row_out[1].v2;
|
|
s->row_out[1].v2 = s->row_out[1].v3;
|
|
s->row_out[1].v3 = s->row_out[1].fac * s->row_out[1].v2 - v1 + famp;
|
|
|
|
v1 = s->col_out[1].v2;
|
|
s->col_out[1].v2 = s->col_out[1].v3;
|
|
s->col_out[1].v3 = s->col_out[1].fac * s->col_out[1].v2 - v1 + famp;
|
|
|
|
v1 = s->row_out[2].v2;
|
|
s->row_out[2].v2 = s->row_out[2].v3;
|
|
s->row_out[2].v3 = s->row_out[2].fac * s->row_out[2].v2 - v1 + famp;
|
|
|
|
v1 = s->col_out[2].v2;
|
|
s->col_out[2].v2 = s->col_out[2].v3;
|
|
s->col_out[2].v3 = s->col_out[2].fac * s->col_out[2].v2 - v1 + famp;
|
|
|
|
v1 = s->row_out[3].v2;
|
|
s->row_out[3].v2 = s->row_out[3].v3;
|
|
s->row_out[3].v3 = s->row_out[3].fac * s->row_out[3].v2 - v1 + famp;
|
|
|
|
v1 = s->col_out[3].v2;
|
|
s->col_out[3].v2 = s->col_out[3].v3;
|
|
s->col_out[3].v3 = s->col_out[3].fac * s->col_out[3].v2 - v1 + famp;
|
|
}
|
|
#endif
|
|
s->current_sample += (limit - sample);
|
|
if (s->current_sample < 102)
|
|
continue;
|
|
|
|
/* We are at the end of a DTMF detection block */
|
|
/* Find the peak row and the peak column */
|
|
row_energy[0] = goertzel_result(&s->row_out[0]);
|
|
best_row = 0;
|
|
col_energy[0] = goertzel_result(&s->col_out[0]);
|
|
best_col = 0;
|
|
|
|
for (i = 1; i < 4; i++) {
|
|
row_energy[i] = goertzel_result(&s->row_out[i]);
|
|
if (row_energy[i] > row_energy[best_row])
|
|
best_row = i;
|
|
col_energy[i] = goertzel_result(&s->col_out[i]);
|
|
if (col_energy[i] > col_energy[best_col])
|
|
best_col = i;
|
|
}
|
|
hit = 0;
|
|
/* Basic signal level test and the twist test */
|
|
if (row_energy[best_row] >= DTMF_THRESHOLD && col_energy[best_col] >= DTMF_THRESHOLD
|
|
&& col_energy[best_col] < row_energy[best_row] * s->reverse_twist
|
|
&& col_energy[best_col] * s->normal_twist > row_energy[best_row]) {
|
|
/* Relative peak test ... */
|
|
for (i = 0; i < 4; i++) {
|
|
if ((i != best_col
|
|
&& col_energy[i] * DTMF_RELATIVE_PEAK_COL > col_energy[best_col])
|
|
|| (i != best_row
|
|
&& row_energy[i] * DTMF_RELATIVE_PEAK_ROW > row_energy[best_row])) {
|
|
break;
|
|
}
|
|
}
|
|
/* ... and fraction of total energy test */
|
|
if (i >= 4
|
|
&& (row_energy[best_row] + col_energy[best_col]) >
|
|
DTMF_TO_TOTAL_ENERGY * s->energy) {
|
|
hit = dtmf_positions[(best_row << 2) + best_col];
|
|
}
|
|
}
|
|
/* The logic in the next test should ensure the following for different successive hit patterns:
|
|
-----ABB = start of digit B.
|
|
----B-BB = start of digit B
|
|
----A-BB = start of digit B
|
|
BBBBBABB = still in digit B.
|
|
BBBBBB-- = end of digit B
|
|
BBBBBBC- = end of digit B
|
|
BBBBACBB = B ends, then B starts again.
|
|
BBBBBBCC = B ends, then C starts.
|
|
BBBBBCDD = B ends, then D starts.
|
|
This can work with:
|
|
- Back to back differing digits. Back-to-back digits should
|
|
not happen. The spec. says there should be a gap between digits.
|
|
However, many real phones do not impose a gap, and rolling across
|
|
the keypad can produce little or no gap.
|
|
- It tolerates nasty phones that give a very wobbly start to a digit.
|
|
- VoIP can give sample slips. The phase jumps that produces will cause
|
|
the block it is in to give no detection. This logic will ride over a
|
|
single missed block, and not falsely declare a second digit. If the
|
|
hiccup happens in the wrong place on a minimum length digit, however
|
|
we would still fail to detect that digit. Could anything be done to
|
|
deal with that? Packet loss is clearly a no-go zone.
|
|
Note this is only relevant to VoIP using A-law, u-law or similar.
|
|
Low bit rate codecs scramble DTMF too much for it to be recognised,
|
|
and often slip in units larger than a sample. */
|
|
if (hit != s->in_digit) {
|
|
if (s->last_hit != s->in_digit) {
|
|
/* We have two successive indications that something has changed. */
|
|
/* To declare digit on, the hits must agree. Otherwise we declare tone off. */
|
|
hit = (hit && hit == s->last_hit) ? hit : 0;
|
|
#if 0
|
|
if (s->realtime_callback) {
|
|
/* Avoid reporting multiple no digit conditions on flaky hits */
|
|
if (s->in_digit || hit) {
|
|
i = (s->in_digit
|
|
&& !hit) ? -99 : rint(log10f(s->energy) * 10.0f - 20.08f - 90.30F +
|
|
DBM0_MAX_POWER);
|
|
s->realtime_callback(s->realtime_callback_data, hit, i);
|
|
}
|
|
} else {
|
|
#endif
|
|
if (hit) {
|
|
if (s->current_digits < MAX_DTMF_DIGITS) {
|
|
s->digits[s->current_digits++] = (char) hit;
|
|
s->digits[s->current_digits] = '\0';
|
|
if (s->callback) {
|
|
s->callback(s->callback_data, s->digits, s->current_digits);
|
|
s->current_digits = 0;
|
|
}
|
|
} else {
|
|
s->lost_digits++;
|
|
}
|
|
}
|
|
#if 0
|
|
}
|
|
#endif
|
|
s->in_digit = hit;
|
|
}
|
|
}
|
|
s->last_hit = hit;
|
|
/* Reinitialise the detector for the next block */
|
|
for (i = 0; i < 4; i++) {
|
|
goertzel_reset(&s->row_out[i]);
|
|
goertzel_reset(&s->col_out[i]);
|
|
}
|
|
s->energy = 0.0f;
|
|
s->current_sample = 0;
|
|
}
|
|
if (s->current_digits && s->callback) {
|
|
s->callback(s->callback_data, s->digits, s->current_digits);
|
|
s->digits[0] = '\0';
|
|
s->current_digits = 0;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
size_t dtmf_rx_get(dtmf_rx_state_t * s, char *buf, int max)
|
|
{
|
|
if (max > s->current_digits)
|
|
max = s->current_digits;
|
|
if (max > 0) {
|
|
memcpy(buf, s->digits, max);
|
|
memmove(s->digits, s->digits + max, s->current_digits - max);
|
|
s->current_digits -= max;
|
|
}
|
|
buf[max] = '\0';
|
|
return max;
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
#if 0
|
|
void dtmf_rx_set_realtime_callback(dtmf_rx_state_t * s, tone_report_func_t callback,
|
|
void *user_data)
|
|
{
|
|
s->realtime_callback = callback;
|
|
s->realtime_callback_data = user_data;
|
|
}
|
|
#endif
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
void dtmf_rx_parms(dtmf_rx_state_t * s, int filter_dialtone, int twist, int reverse_twist)
|
|
{
|
|
if (filter_dialtone >= 0) {
|
|
s->z350_1 = 0.0f;
|
|
s->z350_2 = 0.0f;
|
|
s->z440_1 = 0.0f;
|
|
s->z440_2 = 0.0f;
|
|
s->filter_dialtone = filter_dialtone;
|
|
}
|
|
if (twist >= 0)
|
|
s->normal_twist = powf(10.0f, twist / 10.0f);
|
|
if (reverse_twist >= 0)
|
|
s->reverse_twist = powf(10.0f, reverse_twist / 10.0f);
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
dtmf_rx_state_t *dtmf_rx_init(dtmf_rx_state_t * s, dtmf_rx_callback_t callback,
|
|
void *user_data)
|
|
{
|
|
int i;
|
|
static int initialised = 0;
|
|
|
|
s->callback = callback;
|
|
s->callback_data = user_data;
|
|
s->realtime_callback = NULL;
|
|
s->realtime_callback_data = NULL;
|
|
s->filter_dialtone = 0;
|
|
s->normal_twist = DTMF_NORMAL_TWIST;
|
|
s->reverse_twist = DTMF_REVERSE_TWIST;
|
|
|
|
s->in_digit = 0;
|
|
s->last_hit = 0;
|
|
|
|
if (!initialised) {
|
|
for (i = 0; i < 4; i++) {
|
|
make_goertzel_descriptor(&dtmf_detect_row[i], dtmf_row[i], 102);
|
|
make_goertzel_descriptor(&dtmf_detect_col[i], dtmf_col[i], 102);
|
|
}
|
|
initialised = 1;
|
|
}
|
|
for (i = 0; i < 4; i++) {
|
|
goertzel_init(&s->row_out[i], &dtmf_detect_row[i]);
|
|
goertzel_init(&s->col_out[i], &dtmf_detect_col[i]);
|
|
}
|
|
s->energy = 0.0f;
|
|
s->current_sample = 0;
|
|
s->lost_digits = 0;
|
|
s->current_digits = 0;
|
|
s->digits[0] = '\0';
|
|
return s;
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
#if 0
|
|
static void dtmf_tx_initialise(void)
|
|
{
|
|
int row;
|
|
int col;
|
|
|
|
if (dtmf_tx_inited)
|
|
return;
|
|
for (row = 0; row < 4; row++) {
|
|
for (col = 0; col < 4; col++) {
|
|
make_tone_gen_descriptor(&dtmf_digit_tones[row * 4 + col], (int) dtmf_row[row],
|
|
DEFAULT_DTMF_TX_LEVEL, (int) dtmf_col[col],
|
|
DEFAULT_DTMF_TX_LEVEL, DEFAULT_DTMF_TX_ON_TIME,
|
|
DEFAULT_DTMF_TX_OFF_TIME, 0, 0, FALSE);
|
|
}
|
|
}
|
|
dtmf_tx_inited = TRUE;
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
int dtmf_tx(dtmf_tx_state_t * s, int16_t amp[], int max_samples)
|
|
{
|
|
int len;
|
|
size_t dig;
|
|
char *cp;
|
|
|
|
len = 0;
|
|
if (s->tones.current_section >= 0) {
|
|
/* Deal with the fragment left over from last time */
|
|
len = tone_gen(&(s->tones), amp, max_samples);
|
|
}
|
|
dig = 0;
|
|
while (dig < s->current_digits && len < max_samples) {
|
|
/* Step to the next digit */
|
|
if ((cp = strchr(dtmf_positions, s->digits[dig++])) == NULL)
|
|
continue;
|
|
tone_gen_init(&(s->tones), &(s->tone_descriptors[cp - dtmf_positions]));
|
|
len += tone_gen(&(s->tones), amp + len, max_samples - len);
|
|
}
|
|
if (dig) {
|
|
/* Shift out the consumed digits */
|
|
s->current_digits -= dig;
|
|
memmove(s->digits, s->digits + dig, s->current_digits);
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
size_t dtmf_tx_put(dtmf_tx_state_t * s, const char *digits)
|
|
{
|
|
size_t len;
|
|
|
|
/* This returns the number of characters that would not fit in the buffer.
|
|
The buffer will only be loaded if the whole string of digits will fit,
|
|
in which case zero is returned. */
|
|
if ((len = strlen(digits)) > 0) {
|
|
if (s->current_digits + len <= MAX_DTMF_DIGITS) {
|
|
memcpy(s->digits + s->current_digits, digits, len);
|
|
s->current_digits += len;
|
|
len = 0;
|
|
} else {
|
|
len = MAX_DTMF_DIGITS - s->current_digits;
|
|
}
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
dtmf_tx_state_t *dtmf_tx_init(dtmf_tx_state_t * s)
|
|
{
|
|
if (!dtmf_tx_inited)
|
|
dtmf_tx_initialise();
|
|
s->tone_descriptors = dtmf_digit_tones;
|
|
tone_gen_init(&(s->tones), &dtmf_digit_tones[0]);
|
|
s->current_sample = 0;
|
|
s->current_digits = 0;
|
|
s->tones.current_section = -1;
|
|
return s;
|
|
}
|
|
#endif //NO TX
|
|
/*- End of function --------------------------------------------------------*/
|
|
/*- End of file ------------------------------------------------------------*/
|
|
|
|
void make_goertzel_descriptor(goertzel_descriptor_t * t, float freq, int samples)
|
|
{
|
|
//t->fac = 2.0f*cosf(2.0f*M_PI*(freq/(float) SAMPLE_RATE));
|
|
t->fac = 2.0f * cosf(2.0f * M_PI * (freq / (float) 8000));
|
|
t->samples = samples;
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
goertzel_state_t *goertzel_init(goertzel_state_t * s, goertzel_descriptor_t * t)
|
|
{
|
|
if (s || (s = malloc(sizeof(goertzel_state_t)))) {
|
|
s->v2 = s->v3 = 0.0;
|
|
s->fac = t->fac;
|
|
s->samples = t->samples;
|
|
s->current_sample = 0;
|
|
}
|
|
return s;
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
void goertzel_reset(goertzel_state_t * s)
|
|
{
|
|
s->v2 = s->v3 = 0.0;
|
|
s->current_sample = 0;
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
int goertzel_update(goertzel_state_t * s, const int16_t amp[], int samples)
|
|
{
|
|
int i;
|
|
float v1;
|
|
|
|
if (samples > s->samples - s->current_sample)
|
|
samples = s->samples - s->current_sample;
|
|
for (i = 0; i < samples; i++) {
|
|
v1 = s->v2;
|
|
s->v2 = s->v3;
|
|
s->v3 = s->fac * s->v2 - v1 + amp[i];
|
|
}
|
|
s->current_sample += samples;
|
|
return samples;
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
|
|
float goertzel_result(goertzel_state_t * s)
|
|
{
|
|
float v1;
|
|
|
|
/* Push a zero through the process to finish things off. */
|
|
v1 = s->v2;
|
|
s->v2 = s->v3;
|
|
s->v3 = s->fac * s->v2 - v1;
|
|
/* Now calculate the non-recursive side of the filter. */
|
|
/* The result here is not scaled down to allow for the magnification
|
|
effect of the filter (the usual DFT magnification effect). */
|
|
return s->v3 * s->v3 + s->v2 * s->v2 - s->v2 * s->v3 * s->fac;
|
|
}
|
|
|
|
/*- End of function --------------------------------------------------------*/
|
|
/*- End of file ------------------------------------------------------------*/
|