Added libecho, the line-echo-canceler from spandsp
parent
d8a9f29153
commit
35ceaafd85
|
@ -7,7 +7,6 @@ Makefile.in
|
|||
*.la
|
||||
*.pc
|
||||
aclocal.m4
|
||||
m4/*.m4
|
||||
autom4te.cache
|
||||
config.h*
|
||||
config.sub
|
||||
|
@ -41,4 +40,6 @@ tests/testsuite.log
|
|||
# vi/vim files
|
||||
*.sw?
|
||||
|
||||
src/libecho/libecho.a
|
||||
src/osmo-v5-le
|
||||
tests/answer_detect
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
ACLOCAL_AMFLAGS = -I m4
|
||||
|
||||
AM_CPPFLAGS = $(all_includes)
|
||||
SUBDIRS = src
|
||||
SUBDIRS = src tests
|
||||
|
||||
dist-hook:
|
||||
echo $(VERSION) > $(distdir)/.tarball-version
|
||||
|
|
|
@ -102,9 +102,13 @@ then
|
|||
CPPFLAGS="$CPPFLAGS $WERROR_FLAGS"
|
||||
fi
|
||||
|
||||
AX_CHECK_X86_FEATURES()
|
||||
|
||||
AC_MSG_RESULT([CFLAGS="$CFLAGS"])
|
||||
AC_MSG_RESULT([CPPFLAGS="$CPPFLAGS"])
|
||||
|
||||
AC_OUTPUT(
|
||||
src/libecho/Makefile
|
||||
src/Makefile
|
||||
tests/Makefile
|
||||
Makefile)
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
AC_DEFUN([AX_CHECK_X86_FEATURES],
|
||||
[m4_foreach_w(
|
||||
[ax_x86_feature],
|
||||
[mmx popcnt sse sse2 sse3 sse4.1 sse4.2 sse4a avx avx2 avx512f fma fma4 bmi bmi2],
|
||||
[AX_GCC_X86_CPU_SUPPORTS(ax_x86_feature,
|
||||
[X86_FEATURE_CFLAGS="$X86_FEATURE_CFLAGS -m[]ax_x86_feature"],
|
||||
[])
|
||||
])
|
||||
AC_SUBST([X86_FEATURE_CFLAGS])
|
||||
m4_ifval([$1],[$1],
|
||||
[CFLAGS="$CFLAGS $X86_FEATURE_CFLAGS"])
|
||||
$2
|
||||
])
|
|
@ -0,0 +1,44 @@
|
|||
AC_DEFUN_ONCE([_AX_GCC_X86_CPU_INIT],
|
||||
[AC_LANG_PUSH([C])
|
||||
AC_CACHE_CHECK([for gcc __builtin_cpu_init function],
|
||||
[ax_cv_gcc_check_x86_cpu_init],
|
||||
[AC_RUN_IFELSE(
|
||||
[AC_LANG_PROGRAM([#include <stdlib.h>],
|
||||
[__builtin_cpu_init ();])
|
||||
],
|
||||
[ax_cv_gcc_check_x86_cpu_init=yes],
|
||||
[ax_cv_gcc_check_x86_cpu_init=no])])
|
||||
AC_LANG_POP([C])
|
||||
AS_IF([test "X$ax_cv_gcc_check_x86_cpu_init" = "Xno"],
|
||||
[AC_MSG_ERROR([Need GCC to support X86 CPU features tests])])
|
||||
])
|
||||
|
||||
AC_DEFUN([AX_GCC_X86_CPU_SUPPORTS],
|
||||
[AC_REQUIRE([AC_PROG_CC])
|
||||
AC_REQUIRE([_AX_GCC_X86_CPU_INIT])
|
||||
AC_LANG_PUSH([C])
|
||||
AS_VAR_PUSHDEF([gcc_x86_feature], [AS_TR_SH([ax_cv_gcc_x86_cpu_supports_$1])])
|
||||
AC_CACHE_CHECK([for x86 $1 instruction support],
|
||||
[gcc_x86_feature],
|
||||
[AC_RUN_IFELSE(
|
||||
[AC_LANG_PROGRAM( [#include <stdlib.h> ],
|
||||
[ __builtin_cpu_init ();
|
||||
if (__builtin_cpu_supports("$1"))
|
||||
return 0;
|
||||
return 1;
|
||||
])],
|
||||
[gcc_x86_feature=yes],
|
||||
[gcc_x86_feature=no]
|
||||
)]
|
||||
)
|
||||
AC_LANG_POP([C])
|
||||
AS_VAR_IF([gcc_x86_feature],[yes],
|
||||
[AC_DEFINE(
|
||||
AS_TR_CPP([HAVE_$1_INSTRUCTIONS]),
|
||||
[1],
|
||||
[Define if $1 instructions are supported])
|
||||
$2],
|
||||
[$3]
|
||||
)
|
||||
AS_VAR_POPDEF([gcc_x86_feature])
|
||||
])
|
|
@ -3,6 +3,8 @@ AM_CFLAGS= -Wall -Wextra -g $(LIBOSMOCORE_CFLAGS) $(LIBOSMOGSM_CFLAGS) $(LIBOSMO
|
|||
AM_LDFLAGS = $(COVERAGE_LDFLAGS)
|
||||
COMMONLIBS = $(LIBOSMOCORE_LIBS) $(LIBOSMOGSM_LIBS) $(LIBOSMOVTY_LIBS) $(LIBOSMOABIS_LIBS) $(LIBOSMOCTRL_LIBS)
|
||||
|
||||
SUBDIRS = libecho
|
||||
|
||||
bin_PROGRAMS = osmo-v5-le
|
||||
|
||||
osmo_v5_le_SOURCES = logging.c \
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
AM_CPPFLAGS = $(all_includes)
|
||||
AM_CFLAGS= -Wall -Wextra -g
|
||||
|
||||
noinst_LIBRARIES = libecho.a
|
||||
|
||||
libecho_a_SOURCES = echo.c \
|
||||
answertone.c
|
|
@ -0,0 +1,267 @@
|
|||
/* An answer tone detection algrotihm using Goertzel to detect level and phase
|
||||
*
|
||||
* The answer tone is described in ITU V.8 and is also used to disable echo
|
||||
* suppression and cancelation in the network. The detection uses phase to
|
||||
* detect frequencies that are too far off, as well a phase reversal, to
|
||||
* distinguish the answer tone from other continuous tones.
|
||||
*
|
||||
* (C) 2023 by Andreas Eversberg <andreas@eversberg.eu>
|
||||
* All Rights Reserved
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <errno.h>
|
||||
#include "answertone.h"
|
||||
|
||||
#define ANS_FREQUENCY 2100.0
|
||||
#define WINDOW_DURATION 0.010 /* 10 MS */
|
||||
#define DETECT_DURATION 0.400 /* < 425 MS */
|
||||
#define MIN_LEVEL 0.05 /* -26 dBm0 */
|
||||
#define MAX_NOISE 0.5 /* -6 dB */
|
||||
#define MAX_FREQUENCY 22 /* frequency error */
|
||||
#define MIN_REVERSAL 2.967 /* about 10 degrees off 180 (PI) */
|
||||
|
||||
// #define DEBUG
|
||||
// #define HEAVY_DEBUG
|
||||
|
||||
void goertzel_init(struct answer_tone *at, float frequency, float samplerate)
|
||||
{
|
||||
float omega;
|
||||
|
||||
omega = 2.0 * M_PI * frequency / samplerate;
|
||||
at->sine = sin(omega);
|
||||
at->cosine = cos(omega);
|
||||
at->coeff = 2.0 * at->cosine;
|
||||
}
|
||||
|
||||
/* with rectangular window, the frequency response is:
|
||||
* if it matches the filter frequency: 0 dB
|
||||
* if it is 0.5/duration off the filter frequency: about -4 dB
|
||||
* if it is 1/duration off the filter frequency; -INF dB
|
||||
* if it is 1.5/duration off the filter frequency: about -13.5 dB
|
||||
*/
|
||||
void goertzel_calculate(struct answer_tone *at, float *data, int length, float *level_p, float *magnitude_p, float *phase_p)
|
||||
{
|
||||
float q0 = 0, q1 = 0, q2 = 0;
|
||||
float real, imag;
|
||||
float scaling = (float)length / 2.0;
|
||||
float upper = 0, lower = 0;
|
||||
int i;
|
||||
|
||||
upper = lower = *data;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
if (*data > upper)
|
||||
upper = *data;
|
||||
else if (*data < lower)
|
||||
lower = *data;
|
||||
q0 = at->coeff * q1 - q2 + *data++;
|
||||
q2 = q1;
|
||||
q1 = q0;
|
||||
}
|
||||
|
||||
real = (q1 * at->cosine - q2) / scaling;
|
||||
imag = (q1 * at->sine) / scaling;
|
||||
|
||||
*level_p = (upper - lower) / 2.0;
|
||||
*magnitude_p = sqrtf(real*real + imag*imag);
|
||||
*phase_p = atan2(imag, real);
|
||||
}
|
||||
|
||||
int answertone_init(struct answer_tone *at, int samplerate)
|
||||
{
|
||||
memset(at, 0, sizeof(*at));
|
||||
|
||||
/* use 10 ms as window size */
|
||||
at->buffer_size = samplerate * WINDOW_DURATION;
|
||||
at->buffer = calloc(at->buffer_size, sizeof(*at->buffer));
|
||||
if (!at->buffer)
|
||||
return -ENOMEM;
|
||||
|
||||
goertzel_init(at, ANS_FREQUENCY, samplerate);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void answertone_reset(struct answer_tone *at)
|
||||
{
|
||||
at->state = AT_STATE_NONE;
|
||||
}
|
||||
|
||||
void answertone_exit(struct answer_tone *at)
|
||||
{
|
||||
free(at->buffer);
|
||||
}
|
||||
|
||||
enum at_fail answertone_check(struct answer_tone *at, float *phase_p, float *frequency_p)
|
||||
{
|
||||
float level, magnitude, last_phase, shift;
|
||||
|
||||
goertzel_calculate(at, at->buffer, at->buffer_size, &level, &magnitude, phase_p);
|
||||
last_phase = at->last_phase;
|
||||
at->last_phase = *phase_p;
|
||||
shift = (*phase_p - last_phase);
|
||||
if (shift > M_PI)
|
||||
shift -= M_PI * 2.0;
|
||||
else if (shift < -M_PI)
|
||||
shift += M_PI * 2.0;
|
||||
*frequency_p = shift / M_PI / 2.0 / WINDOW_DURATION;
|
||||
|
||||
#if HEAVY_DEBUG
|
||||
printf("level=%6.3f magnitude=%6.3f snr=%6.3f phase=%9.3f frequency=%8.3f\n",
|
||||
level, magnitude, magnitude/level, *phase_p / M_PI * 180, *frequency_p);
|
||||
#endif
|
||||
|
||||
/* fails due to low level */
|
||||
if (magnitude < MIN_LEVEL)
|
||||
return AT_FAIL_MIN_LEVEL;
|
||||
|
||||
/* fails due to bad SNR */
|
||||
if (1.0 - magnitude/level > MAX_NOISE)
|
||||
return AT_FAIL_MAX_NOISE;
|
||||
|
||||
/* fails due to wrong frequency */
|
||||
if (fabsf(*frequency_p) > MAX_FREQUENCY)
|
||||
return AT_FAIL_MAX_FREQUENCY;
|
||||
|
||||
return AT_FAIL_NONE;
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
static const char *answertone_cause[] = {
|
||||
"ok",
|
||||
"level too low",
|
||||
"too noisy",
|
||||
"wrong frequency",
|
||||
};
|
||||
#endif
|
||||
|
||||
static enum at_state answertone_chunk(struct answer_tone *at)
|
||||
{
|
||||
float phase, frequency, shift;
|
||||
enum at_fail fail;
|
||||
int rc = 0;
|
||||
|
||||
fail = answertone_check(at, &phase, &frequency);
|
||||
|
||||
switch (at->state) {
|
||||
case AT_STATE_NONE:
|
||||
if (fail == AT_FAIL_NONE) {
|
||||
#if DEBUG
|
||||
printf("DETECTED TONE\n");
|
||||
#endif
|
||||
at->state = AT_STATE_DETECT;
|
||||
at->tone_duration = WINDOW_DURATION;
|
||||
/* count frequency values */
|
||||
at->frequency_sum = 0.0;
|
||||
at->count = 0;
|
||||
}
|
||||
break;
|
||||
case AT_STATE_DETECT:
|
||||
if (fail != AT_FAIL_NONE) {
|
||||
#if DEBUG
|
||||
printf("LOST TONE AFTER %.3f SECONDS (because %s)\n", at->tone_duration, cause[fail]);
|
||||
#endif
|
||||
at->state = AT_STATE_NONE;
|
||||
break;
|
||||
}
|
||||
at->frequency_sum += frequency;
|
||||
at->count++;
|
||||
at->tone_duration += WINDOW_DURATION;
|
||||
at->last2_valid_phase = at->last1_valid_phase;
|
||||
at->last1_valid_phase = phase;
|
||||
if (at->tone_duration >= DETECT_DURATION) {
|
||||
#if DEBUG
|
||||
printf("LONG TONE VALID WITH FREQUENCY OFFSET %.3f\n", at->frequency_sum / at->count);
|
||||
#endif
|
||||
at->state = AT_STATE_TONE;
|
||||
}
|
||||
break;
|
||||
case AT_STATE_TONE:
|
||||
if (fail != AT_FAIL_NONE) {
|
||||
#if DEBUG
|
||||
printf("LONG TONE CHANGED, CHECK PHASE REVERSAL\n");
|
||||
#endif
|
||||
at->last_valid_frequency = at->frequency_sum / at->count;
|
||||
at->state = AT_STATE_PAUSE;
|
||||
/* count pause chunks */
|
||||
at->count = 1;
|
||||
break;
|
||||
}
|
||||
at->frequency_sum += frequency;
|
||||
at->count++;
|
||||
at->tone_duration += WINDOW_DURATION;
|
||||
at->last2_valid_phase = at->last1_valid_phase;
|
||||
at->last1_valid_phase = phase;
|
||||
break;
|
||||
case AT_STATE_PAUSE:
|
||||
/* three pause chunks: two may be invalid due to phase reversal, one extra to get valid frequency */
|
||||
if (at->count++ < 3)
|
||||
break;
|
||||
/* phase change over 5 chunks */
|
||||
shift = (phase - at->last2_valid_phase);
|
||||
if (shift > M_PI)
|
||||
shift -= M_PI * 2.0;
|
||||
else if (shift < -M_PI)
|
||||
shift += M_PI * 2.0;
|
||||
/* substract phase change to be expected over 5 chunk */
|
||||
shift -= at->last_valid_frequency * WINDOW_DURATION * 5.0 * M_PI * 2.0;
|
||||
if (shift > M_PI)
|
||||
shift -= M_PI * 2.0;
|
||||
else if (shift < -M_PI)
|
||||
shift += M_PI * 2.0;
|
||||
/* if there is change of less than about 10 degrees off 180 */
|
||||
if (fabsf(shift) > MIN_REVERSAL) {
|
||||
#if DEBUG
|
||||
printf("PHASE REVERSAL VALID (phase shift %.0f deg)\n", shift / M_PI * 180.0);
|
||||
#endif
|
||||
at->frequency_sum = 0.0;
|
||||
at->count = 0;
|
||||
at->state = AT_STATE_DETECT;
|
||||
rc = 1;
|
||||
} else {
|
||||
#if DEBUG
|
||||
printf("LOST TONE, NO VALID PHASE REVERSAL\n");
|
||||
#endif
|
||||
at->state = AT_STATE_NONE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* convert stream of int16_t (ISDN) stream into chunks of float (dBm0); return 1, if there was a valid answer tone */
|
||||
int answertone_process(struct answer_tone *at, int16_t *data, int length)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
while (length--) {
|
||||
/* convert ISDN data to float with dBm level (1 dBm = -3 dB below full int16_t scale) */
|
||||
at->buffer[at->buffer_pos++] = (float)(*data++) / 23170.0;
|
||||
if (at->buffer_pos == at->buffer_size) {
|
||||
rc |= answertone_chunk(at);
|
||||
at->buffer_pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
|
||||
enum at_fail {
|
||||
AT_FAIL_NONE = 0,
|
||||
AT_FAIL_MIN_LEVEL,
|
||||
AT_FAIL_MAX_NOISE,
|
||||
AT_FAIL_MAX_FREQUENCY,
|
||||
};
|
||||
|
||||
enum at_state {
|
||||
AT_STATE_NONE = 0,
|
||||
AT_STATE_DETECT,
|
||||
AT_STATE_TONE,
|
||||
AT_STATE_PAUSE,
|
||||
};
|
||||
|
||||
struct answer_tone {
|
||||
float sine;
|
||||
float cosine;
|
||||
float coeff;
|
||||
float *buffer;
|
||||
int buffer_size;
|
||||
int buffer_pos;
|
||||
float last_phase;
|
||||
enum at_state state;
|
||||
float tone_duration;
|
||||
float frequency_sum;
|
||||
int count;
|
||||
float last1_valid_phase;
|
||||
float last2_valid_phase;
|
||||
float last_valid_frequency;
|
||||
};
|
||||
|
||||
int answertone_init(struct answer_tone *at, int samplerate);
|
||||
void answertone_reset(struct answer_tone *at);
|
||||
void answertone_exit(struct answer_tone *at);
|
||||
enum at_fail answertone_check(struct answer_tone *at, float *phase_p, float *frequency_p);
|
||||
int answertone_process(struct answer_tone *at, int16_t *data, int length);
|
|
@ -0,0 +1,253 @@
|
|||
/*
|
||||
* SpanDSP - a series of DSP components for telephony
|
||||
*
|
||||
* bit_operations.h - Various bit level operations, such as bit reversal
|
||||
*
|
||||
* Written by Steve Underwood <steveu@coppice.org>
|
||||
*
|
||||
* Copyright (C) 2006 Steve Underwood
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*
|
||||
* $Id: bit_operations.h,v 1.11 2006/11/28 15:37:03 steveu Exp $
|
||||
*/
|
||||
|
||||
/*! \file */
|
||||
|
||||
#if !defined(_BIT_OPERATIONS_H_)
|
||||
#define _BIT_OPERATIONS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
/*! \brief Find the bit position of the highest set bit in a word
|
||||
\param bits The word to be searched
|
||||
\return The bit number of the highest set bit, or -1 if the word is zero. */
|
||||
static __inline__ int top_bit(unsigned int bits)
|
||||
{
|
||||
int res;
|
||||
|
||||
__asm__ (" xorl %[res],%[res];\n"
|
||||
" decl %[res];\n"
|
||||
" bsrl %[bits],%[res]\n"
|
||||
: [res] "=&r" (res)
|
||||
: [bits] "rm" (bits));
|
||||
return res;
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
/*! \brief Find the bit position of the lowest set bit in a word
|
||||
\param bits The word to be searched
|
||||
\return The bit number of the lowest set bit, or -1 if the word is zero. */
|
||||
static __inline__ int bottom_bit(unsigned int bits)
|
||||
{
|
||||
int res;
|
||||
|
||||
__asm__ (" xorl %[res],%[res];\n"
|
||||
" decl %[res];\n"
|
||||
" bsfl %[bits],%[res]\n"
|
||||
: [res] "=&r" (res)
|
||||
: [bits] "rm" (bits));
|
||||
return res;
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
#else
|
||||
static __inline__ int top_bit(unsigned int bits)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (bits == 0)
|
||||
return -1;
|
||||
i = 0;
|
||||
if (bits & 0xFFFF0000)
|
||||
{
|
||||
bits &= 0xFFFF0000;
|
||||
i += 16;
|
||||
}
|
||||
if (bits & 0xFF00FF00)
|
||||
{
|
||||
bits &= 0xFF00FF00;
|
||||
i += 8;
|
||||
}
|
||||
if (bits & 0xF0F0F0F0)
|
||||
{
|
||||
bits &= 0xF0F0F0F0;
|
||||
i += 4;
|
||||
}
|
||||
if (bits & 0xCCCCCCCC)
|
||||
{
|
||||
bits &= 0xCCCCCCCC;
|
||||
i += 2;
|
||||
}
|
||||
if (bits & 0xAAAAAAAA)
|
||||
{
|
||||
bits &= 0xAAAAAAAA;
|
||||
i += 1;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
static __inline__ int bottom_bit(unsigned int bits)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (bits == 0)
|
||||
return -1;
|
||||
i = 32;
|
||||
if (bits & 0x0000FFFF)
|
||||
{
|
||||
bits &= 0x0000FFFF;
|
||||
i -= 16;
|
||||
}
|
||||
if (bits & 0x00FF00FF)
|
||||
{
|
||||
bits &= 0x00FF00FF;
|
||||
i -= 8;
|
||||
}
|
||||
if (bits & 0x0F0F0F0F)
|
||||
{
|
||||
bits &= 0x0F0F0F0F;
|
||||
i -= 4;
|
||||
}
|
||||
if (bits & 0x33333333)
|
||||
{
|
||||
bits &= 0x33333333;
|
||||
i -= 2;
|
||||
}
|
||||
if (bits & 0x55555555)
|
||||
{
|
||||
bits &= 0x55555555;
|
||||
i -= 1;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
#endif
|
||||
|
||||
/*! \brief Bit reverse a byte.
|
||||
\param data The byte to be reversed.
|
||||
\return The bit reversed version of data. */
|
||||
static __inline__ uint8_t bit_reverse8(uint8_t x)
|
||||
{
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
/* If multiply is fast */
|
||||
return ((x*0x0802U & 0x22110U) | (x*0x8020U & 0x88440U))*0x10101U >> 16;
|
||||
#else
|
||||
/* If multiply is slow, but we have a barrel shifter */
|
||||
x = (x >> 4) | (x << 4);
|
||||
x = ((x & 0xCC) >> 2) | ((x & 0x33) << 2);
|
||||
return ((x & 0xAA) >> 1) | ((x & 0x55) << 1);
|
||||
#endif
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
/*! \brief Bit reverse a 16 bit word.
|
||||
\param data The word to be reversed.
|
||||
\return The bit reversed version of data. */
|
||||
uint16_t bit_reverse16(uint16_t data);
|
||||
|
||||
/*! \brief Bit reverse a 32 bit word.
|
||||
\param data The word to be reversed.
|
||||
\return The bit reversed version of data. */
|
||||
uint32_t bit_reverse32(uint32_t data);
|
||||
|
||||
/*! \brief Bit reverse each of the four bytes in a 32 bit word.
|
||||
\param data The word to be reversed.
|
||||
\return The bit reversed version of data. */
|
||||
uint32_t bit_reverse_4bytes(uint32_t data);
|
||||
|
||||
/*! \brief Find the number of set bits in a 32 bit word.
|
||||
\param x The word to be searched.
|
||||
\return The number of set bits. */
|
||||
int one_bits32(uint32_t x);
|
||||
|
||||
/*! \brief Create a mask as wide as the number in a 32 bit word.
|
||||
\param x The word to be searched.
|
||||
\return The mask. */
|
||||
uint32_t make_mask32(uint32_t x);
|
||||
|
||||
/*! \brief Create a mask as wide as the number in a 16 bit word.
|
||||
\param x The word to be searched.
|
||||
\return The mask. */
|
||||
uint16_t make_mask16(uint16_t x);
|
||||
|
||||
/*! \brief Find the least significant one in a word, and return a word
|
||||
with just that bit set.
|
||||
\param x The word to be searched.
|
||||
\return The word with the single set bit. */
|
||||
static __inline__ uint32_t least_significant_one32(uint32_t x)
|
||||
{
|
||||
return (x & (-(int32_t) x));
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
/*! \brief Find the most significant one in a word, and return a word
|
||||
with just that bit set.
|
||||
\param x The word to be searched.
|
||||
\return The word with the single set bit. */
|
||||
static __inline__ uint32_t most_significant_one32(uint32_t x)
|
||||
{
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
return 1 << top_bit(x);
|
||||
#else
|
||||
x = make_mask32(x);
|
||||
return (x ^ (x >> 1));
|
||||
#endif
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
/*! \brief Find the parity of a byte.
|
||||
\param x The byte to be checked.
|
||||
\return 1 for odd, or 0 for even. */
|
||||
static __inline__ int parity8(uint8_t x)
|
||||
{
|
||||
x = (x ^ (x >> 4)) & 0x0F;
|
||||
return (0x6996 >> x) & 1;
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
/*! \brief Find the parity of a 16 bit word.
|
||||
\param x The word to be checked.
|
||||
\return 1 for odd, or 0 for even. */
|
||||
static __inline__ int parity16(uint16_t x)
|
||||
{
|
||||
x ^= (x >> 8);
|
||||
x = (x ^ (x >> 4)) & 0x0F;
|
||||
return (0x6996 >> x) & 1;
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
/*! \brief Find the parity of a 32 bit word.
|
||||
\param x The word to be checked.
|
||||
\return 1 for odd, or 0 for even. */
|
||||
static __inline__ int parity32(uint32_t x)
|
||||
{
|
||||
x ^= (x >> 16);
|
||||
x ^= (x >> 8);
|
||||
x = (x ^ (x >> 4)) & 0x0F;
|
||||
return (0x6996 >> x) & 1;
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
/*- End of file ------------------------------------------------------------*/
|
|
@ -0,0 +1,658 @@
|
|||
/*
|
||||
* SpanDSP - a series of DSP components for telephony
|
||||
*
|
||||
* echo.c - A line echo canceller. This code is being developed
|
||||
* against and partially complies with G168.
|
||||
*
|
||||
* Written by Steve Underwood <steveu@coppice.org>
|
||||
* and David Rowe <david_at_rowetel_dot_com>
|
||||
*
|
||||
* Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe
|
||||
*
|
||||
* Based on a bit from here, a bit from there, eye of toad, ear of
|
||||
* bat, 15 years of failed attempts by David and a few fried brain
|
||||
* cells.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*
|
||||
* $Id: echo.c,v 1.20 2006/12/01 18:00:48 steveu Exp $
|
||||
*/
|
||||
|
||||
/*! \file */
|
||||
|
||||
/* Implementation Notes
|
||||
David Rowe
|
||||
April 2007
|
||||
|
||||
This code started life as Steve's NLMS algorithm with a tap
|
||||
rotation algorithm to handle divergence during double talk. I
|
||||
added a Geigel Double Talk Detector (DTD) [2] and performed some
|
||||
G168 tests. However I had trouble meeting the G168 requirements,
|
||||
especially for double talk - there were always cases where my DTD
|
||||
failed, for example where near end speech was under the 6dB
|
||||
threshold required for declaring double talk.
|
||||
|
||||
So I tried a two path algorithm [1], which has so far given better
|
||||
results. The original tap rotation/Geigel algorithm is available
|
||||
in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit.
|
||||
It's probably possible to make it work if some one wants to put some
|
||||
serious work into it.
|
||||
|
||||
At present no special treatment is provided for tones, which
|
||||
generally cause NLMS algorithms to diverge. Initial runs of a
|
||||
subset of the G168 tests for tones (e.g ./echo_test 6) show the
|
||||
current algorithm is passing OK, which is kind of surprising. The
|
||||
full set of tests needs to be performed to confirm this result.
|
||||
|
||||
One other interesting change is that I have managed to get the NLMS
|
||||
code to work with 16 bit coefficients, rather than the original 32
|
||||
bit coefficents. This reduces the MIPs and storage required.
|
||||
I evaulated the 16 bit port using g168_tests.sh and listening tests
|
||||
on 4 real-world samples.
|
||||
|
||||
I also attempted the implementation of a block based NLMS update
|
||||
[2] but although this passes g168_tests.sh it didn't converge well
|
||||
on the real-world samples. I have no idea why, perhaps a scaling
|
||||
problem. The block based code is also available in SVN
|
||||
http://svn.rowetel.com/software/oslec/tags/before_16bit. If this
|
||||
code can be debugged, it will lead to further reduction in MIPS, as
|
||||
the block update code maps nicely onto DSP instruction sets (it's a
|
||||
dot product) compared to the current sample-by-sample update.
|
||||
|
||||
Steve also has some nice notes on echo cancellers in echo.h
|
||||
|
||||
|
||||
References:
|
||||
|
||||
[1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo
|
||||
Path Models", IEEE Transactions on communications, COM-25,
|
||||
No. 6, June
|
||||
1977.
|
||||
http://www.rowetel.com/images/echo/dual_path_paper.pdf
|
||||
|
||||
[2] The classic, very useful paper that tells you how to
|
||||
actually build a real world echo canceller:
|
||||
Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice
|
||||
Echo Canceller with a TMS320020,
|
||||
http://www.rowetel.com/images/echo/spra129.pdf
|
||||
|
||||
[3] I have written a series of blog posts on this work, here is
|
||||
Part 1: http://www.rowetel.com/blog/?p=18
|
||||
|
||||
[4] The source code http://svn.rowetel.com/software/oslec/
|
||||
|
||||
[5] A nice reference on LMS filters:
|
||||
http://en.wikipedia.org/wiki/Least_mean_squares_filter
|
||||
|
||||
Credits:
|
||||
|
||||
Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan
|
||||
Muthukrishnan for their suggestions and email discussions. Thanks
|
||||
also to those people who collected echo samples for me such as
|
||||
Mark, Pawel, and Pavel.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/kernel.h> /* We're doing kernel work */
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#define malloc(a) kmalloc((a), GFP_KERNEL)
|
||||
#define free(a) kfree(a)
|
||||
#else
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#endif
|
||||
|
||||
#include "bit_operations.h"
|
||||
#include "echo.h"
|
||||
|
||||
#if !defined(NULL)
|
||||
#define NULL (void *) 0
|
||||
#endif
|
||||
#if !defined(FALSE)
|
||||
#define FALSE 0
|
||||
#endif
|
||||
#if !defined(TRUE)
|
||||
#define TRUE (!FALSE)
|
||||
#endif
|
||||
|
||||
#define MIN_TX_POWER_FOR_ADAPTION 64
|
||||
#define MIN_RX_POWER_FOR_ADAPTION 64
|
||||
#define DTD_HANGOVER 600 /* 600 samples, or 75ms */
|
||||
#define DC_LOG2BETA 3 /* log2() of DC filter Beta */
|
||||
|
||||
/*-----------------------------------------------------------------------*\
|
||||
|
||||
FUNCTIONS
|
||||
|
||||
\*-----------------------------------------------------------------------*/
|
||||
|
||||
/* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */
|
||||
|
||||
|
||||
#ifdef __BLACKFIN_ASM__
|
||||
static void __inline__ lms_adapt_bg(echo_can_state_t *ec, int clean, int shift)
|
||||
{
|
||||
int i, j;
|
||||
int offset1;
|
||||
int offset2;
|
||||
int factor;
|
||||
int exp;
|
||||
int16_t *phist;
|
||||
int n;
|
||||
|
||||
if (shift > 0)
|
||||
factor = clean << shift;
|
||||
else
|
||||
factor = clean >> -shift;
|
||||
|
||||
/* Update the FIR taps */
|
||||
|
||||
offset2 = ec->curr_pos;
|
||||
offset1 = ec->taps - offset2;
|
||||
phist = &ec->fir_state_bg.history[offset2];
|
||||
|
||||
/* st: and en: help us locate the assembler in echo.s */
|
||||
|
||||
//asm("st:");
|
||||
n = ec->taps;
|
||||
for (i = 0, j = offset2; i < n; i++, j++)
|
||||
{
|
||||
exp = *phist++ * factor;
|
||||
ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
|
||||
}
|
||||
//asm("en:");
|
||||
|
||||
/* Note the asm for the inner loop above generated by Blackfin gcc
|
||||
4.1.1 is pretty good (note even parallel instructions used):
|
||||
|
||||
R0 = W [P0++] (X);
|
||||
R0 *= R2;
|
||||
R0 = R0 + R3 (NS) ||
|
||||
R1 = W [P1] (X) ||
|
||||
nop;
|
||||
R0 >>>= 15;
|
||||
R0 = R0 + R1;
|
||||
W [P1++] = R0;
|
||||
|
||||
A block based update algorithm would be much faster but the
|
||||
above can't be improved on much. Every instruction saved in
|
||||
the loop above is 2 MIPs/ch! The for loop above is where the
|
||||
Blackfin spends most of it's time - about 17 MIPs/ch measured
|
||||
with speedtest.c with 256 taps (32ms). Write-back and
|
||||
Write-through cache gave about the same performance.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
IDEAS for further optimisation of lms_adapt_bg():
|
||||
|
||||
1/ The rounding is quite costly. Could we keep as 32 bit coeffs
|
||||
then make filter pluck the MS 16-bits of the coeffs when filtering?
|
||||
However this would lower potential optimisation of filter, as I
|
||||
think the dual-MAC architecture requires packed 16 bit coeffs.
|
||||
|
||||
2/ Block based update would be more efficient, as per comments above,
|
||||
could use dual MAC architecture.
|
||||
|
||||
3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC
|
||||
packing.
|
||||
|
||||
4/ Execute the whole e/c in a block of say 20ms rather than sample
|
||||
by sample. Processing a few samples every ms is inefficient.
|
||||
*/
|
||||
|
||||
#else
|
||||
static __inline__ void lms_adapt_bg(echo_can_state_t *ec, int clean, int shift)
|
||||
{
|
||||
int i;
|
||||
|
||||
int offset1;
|
||||
int offset2;
|
||||
int factor;
|
||||
int exp;
|
||||
|
||||
if (shift > 0)
|
||||
factor = clean << shift;
|
||||
else
|
||||
factor = clean >> -shift;
|
||||
|
||||
/* Update the FIR taps */
|
||||
|
||||
offset2 = ec->curr_pos;
|
||||
offset1 = ec->taps - offset2;
|
||||
|
||||
for (i = ec->taps - 1; i >= offset1; i--)
|
||||
{
|
||||
exp = (ec->fir_state_bg.history[i - offset1]*factor);
|
||||
ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
|
||||
}
|
||||
for ( ; i >= 0; i--)
|
||||
{
|
||||
exp = (ec->fir_state_bg.history[i + offset2]*factor);
|
||||
ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
echo_can_state_t *echo_can_create(int len, int adaption_mode)
|
||||
{
|
||||
echo_can_state_t *ec;
|
||||
int i;
|
||||
int j;
|
||||
|
||||
ec = (echo_can_state_t *) malloc(sizeof(*ec));
|
||||
if (ec == NULL)
|
||||
return NULL;
|
||||
memset(ec, 0, sizeof(*ec));
|
||||
|
||||
ec->taps = len;
|
||||
ec->log2taps = top_bit(len);
|
||||
ec->curr_pos = ec->taps - 1;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if ((ec->fir_taps16[i] = (int16_t *) malloc((ec->taps)*sizeof(int16_t))) == NULL)
|
||||
{
|
||||
for (j = 0; j < i; j++)
|
||||
free(ec->fir_taps16[j]);
|
||||
free(ec);
|
||||
return NULL;
|
||||
}
|
||||
memset(ec->fir_taps16[i], 0, (ec->taps)*sizeof(int16_t));
|
||||
}
|
||||
|
||||
fir16_create(&ec->fir_state,
|
||||
ec->fir_taps16[0],
|
||||
ec->taps);
|
||||
fir16_create(&ec->fir_state_bg,
|
||||
ec->fir_taps16[1],
|
||||
ec->taps);
|
||||
|
||||
for(i=0; i<5; i++) {
|
||||
ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
|
||||
}
|
||||
|
||||
ec->cng_level = 1000;
|
||||
echo_can_adaption_mode(ec, adaption_mode);
|
||||
|
||||
ec->snapshot = (int16_t*)malloc(ec->taps*sizeof(int16_t));
|
||||
memset(ec->snapshot, 0, sizeof(int16_t)*ec->taps);
|
||||
|
||||
ec->cond_met = 0;
|
||||
ec->Pstates = 0;
|
||||
ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
|
||||
ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
|
||||
ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
|
||||
ec->Lbgn = ec->Lbgn_acc = 0;
|
||||
ec->Lbgn_upper = 200;
|
||||
ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
|
||||
|
||||
return ec;
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
void echo_can_free(echo_can_state_t *ec)
|
||||
{
|
||||
int i;
|
||||
|
||||
fir16_free(&ec->fir_state);
|
||||
fir16_free(&ec->fir_state_bg);
|
||||
for (i = 0; i < 2; i++)
|
||||
free(ec->fir_taps16[i]);
|
||||
free(ec->snapshot);
|
||||
free(ec);
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
void echo_can_adaption_mode(echo_can_state_t *ec, int adaption_mode)
|
||||
{
|
||||
ec->adaption_mode = adaption_mode;
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
void echo_can_flush(echo_can_state_t *ec)
|
||||
{
|
||||
int i;
|
||||
|
||||
ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
|
||||
ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
|
||||
ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
|
||||
|
||||
ec->Lbgn = ec->Lbgn_acc = 0;
|
||||
ec->Lbgn_upper = 200;
|
||||
ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
|
||||
|
||||
ec->nonupdate_dwell = 0;
|
||||
|
||||
fir16_flush(&ec->fir_state);
|
||||
fir16_flush(&ec->fir_state_bg);
|
||||
ec->fir_state.curr_pos = ec->taps - 1;
|
||||
ec->fir_state_bg.curr_pos = ec->taps - 1;
|
||||
for (i = 0; i < 2; i++)
|
||||
memset(ec->fir_taps16[i], 0, ec->taps*sizeof(int16_t));
|
||||
|
||||
ec->curr_pos = ec->taps - 1;
|
||||
ec->Pstates = 0;
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
void echo_can_snapshot(echo_can_state_t *ec) {
|
||||
memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps*sizeof(int16_t));
|
||||
}
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
/* Dual Path Echo Canceller ------------------------------------------------*/
|
||||
|
||||
int16_t echo_can_update(echo_can_state_t *ec, int16_t tx, int16_t rx)
|
||||
{
|
||||
int32_t echo_value;
|
||||
int clean_bg;
|
||||
int tmp, tmp1;
|
||||
|
||||
/* Input scaling was found be required to prevent problems when tx
|
||||
starts clipping. Another possible way to handle this would be the
|
||||
filter coefficent scaling. */
|
||||
|
||||
ec->tx = tx; ec->rx = rx;
|
||||
tx >>=1;
|
||||
rx >>=1;
|
||||
|
||||
/*
|
||||
Filter DC, 3dB point is 160Hz (I think), note 32 bit precision required
|
||||
otherwise values do not track down to 0. Zero at DC, Pole at (1-Beta)
|
||||
only real axis. Some chip sets (like Si labs) don't need
|
||||
this, but something like a $10 X100P card does. Any DC really slows
|
||||
down convergence.
|
||||
|
||||
Note: removes some low frequency from the signal, this reduces
|
||||
the speech quality when listening to samples through headphones
|
||||
but may not be obvious through a telephone handset.
|
||||
|
||||
Note that the 3dB frequency in radians is approx Beta, e.g. for
|
||||
Beta = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
|
||||
*/
|
||||
|
||||
if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
|
||||
tmp = rx << 15;
|
||||
#if 1
|
||||
/* Make sure the gain of the HPF is 1.0. This can still saturate a little under
|
||||
impulse conditions, and it might roll to 32768 and need clipping on sustained peak
|
||||
level signals. However, the scale of such clipping is small, and the error due to
|
||||
any saturation should not markedly affect the downstream processing. */
|
||||
tmp -= (tmp >> 4);
|
||||
#endif
|
||||
ec->rx_1 += -(ec->rx_1>>DC_LOG2BETA) + tmp - ec->rx_2;
|
||||
|
||||
/* hard limit filter to prevent clipping. Note that at this stage
|
||||
rx should be limited to +/- 16383 due to right shift above */
|
||||
tmp1 = ec->rx_1 >> 15;
|
||||
if (tmp1 > 16383) tmp1 = 16383;
|
||||
if (tmp1 < -16383) tmp1 = -16383;
|
||||
rx = tmp1;
|
||||
ec->rx_2 = tmp;
|
||||
}
|
||||
|
||||
/* Block average of power in the filter states. Used for
|
||||
adaption power calculation. */
|
||||
|
||||
{
|
||||
int new, old;
|
||||
|
||||
/* efficient "out with the old and in with the new" algorithm so
|
||||
we don't have to recalculate over the whole block of
|
||||
samples. */
|
||||
new = (int)tx * (int)tx;
|
||||
old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
|
||||
(int)ec->fir_state.history[ec->fir_state.curr_pos];
|
||||
ec->Pstates += ((new - old) + (1<<(ec->log2taps-1))) >> ec->log2taps;
|
||||
if (ec->Pstates < 0) ec->Pstates = 0;
|
||||
}
|
||||
|
||||
/* Calculate short term average levels using simple single pole IIRs */
|
||||
|
||||
ec->Ltxacc += abs(tx) - ec->Ltx;
|
||||
ec->Ltx = (ec->Ltxacc + (1<<4)) >> 5;
|
||||
ec->Lrxacc += abs(rx) - ec->Lrx;
|
||||
ec->Lrx = (ec->Lrxacc + (1<<4)) >> 5;
|
||||
|
||||
/* Foreground filter ---------------------------------------------------*/
|
||||
|
||||
ec->fir_state.coeffs = ec->fir_taps16[0];
|
||||
echo_value = fir16(&ec->fir_state, tx);
|
||||
ec->clean = rx - echo_value;
|
||||
ec->Lcleanacc += abs(ec->clean) - ec->Lclean;
|
||||
ec->Lclean = (ec->Lcleanacc + (1<<4)) >> 5;
|
||||
|
||||
/* Background filter ---------------------------------------------------*/
|
||||
|
||||
echo_value = fir16(&ec->fir_state_bg, tx);
|
||||
clean_bg = rx - echo_value;
|
||||
ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg;
|
||||
ec->Lclean_bg = (ec->Lclean_bgacc + (1<<4)) >> 5;
|
||||
|
||||
/* Background Filter adaption -----------------------------------------*/
|
||||
|
||||
/* Almost always adap bg filter, just simple DT and energy
|
||||
detection to minimise adaption in cases of strong double talk.
|
||||
However this is not critical for the dual path algorithm.
|
||||
*/
|
||||
ec->factor = 0;
|
||||
ec->shift = 0;
|
||||
if ((ec->nonupdate_dwell == 0)) {
|
||||
int P, logP, shift;
|
||||
|
||||
/* Determine:
|
||||
|
||||
f = Beta * clean_bg_rx/P ------ (1)
|
||||
|
||||
where P is the total power in the filter states.
|
||||
|
||||
The Boffins have shown that if we obey (1) we converge
|
||||
quickly and avoid instability.
|
||||
|
||||
The correct factor f must be in Q30, as this is the fixed
|
||||
point format required by the lms_adapt_bg() function,
|
||||
therefore the scaled version of (1) is:
|
||||
|
||||
(2^30) * f = (2^30) * Beta * clean_bg_rx/P
|
||||
factor = (2^30) * Beta * clean_bg_rx/P ----- (2)
|
||||
|
||||
We have chosen Beta = 0.25 by experiment, so:
|
||||
|
||||
factor = (2^30) * (2^-2) * clean_bg_rx/P
|
||||
|
||||
(30 - 2 - log2(P))
|
||||
factor = clean_bg_rx 2 ----- (3)
|
||||
|
||||
To avoid a divide we approximate log2(P) as top_bit(P),
|
||||
which returns the position of the highest non-zero bit in
|
||||
P. This approximation introduces an error as large as a
|
||||
factor of 2, but the algorithm seems to handle it OK.
|
||||
|
||||
Come to think of it a divide may not be a big deal on a
|
||||
modern DSP, so its probably worth checking out the cycles
|
||||
for a divide versus a top_bit() implementation.
|
||||
*/
|
||||
|
||||
P = MIN_TX_POWER_FOR_ADAPTION + ec->Pstates;
|
||||
logP = top_bit(P) + ec->log2taps;
|
||||
shift = 30 - 2 - logP;
|
||||
ec->shift = shift;
|
||||
|
||||
lms_adapt_bg(ec, clean_bg, shift);
|
||||
}
|
||||
|
||||
/* very simple DTD to make sure we dont try and adapt with strong
|
||||
near end speech */
|
||||
|
||||
ec->adapt = 0;
|
||||
if ((ec->Lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->Lrx > ec->Ltx))
|
||||
ec->nonupdate_dwell = DTD_HANGOVER;
|
||||
if (ec->nonupdate_dwell)
|
||||
ec->nonupdate_dwell--;
|
||||
|
||||
/* Transfer logic ------------------------------------------------------*/
|
||||
|
||||
/* These conditions are from the dual path paper [1], I messed with
|
||||
them a bit to improve performance. */
|
||||
|
||||
if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
|
||||
(ec->nonupdate_dwell == 0) &&
|
||||
(8*ec->Lclean_bg < 7*ec->Lclean) /* (ec->Lclean_bg < 0.875*ec->Lclean) */ &&
|
||||
(8*ec->Lclean_bg < ec->Ltx) /* (ec->Lclean_bg < 0.125*ec->Ltx) */ )
|
||||
{
|
||||
if (ec->cond_met == 6) {
|
||||
/* BG filter has had better results for 6 consecutive samples */
|
||||
ec->adapt = 1;
|
||||
memcpy(ec->fir_taps16[0], ec->fir_taps16[1], ec->taps*sizeof(int16_t));
|
||||
}
|
||||
else
|
||||
ec->cond_met++;
|
||||
}
|
||||
else
|
||||
ec->cond_met = 0;
|
||||
|
||||
/* Non-Linear Processing ---------------------------------------------------*/
|
||||
|
||||
ec->clean_nlp = ec->clean;
|
||||
if (ec->adaption_mode & ECHO_CAN_USE_NLP)
|
||||
{
|
||||
/* Non-linear processor - a fancy way to say "zap small signals, to avoid
|
||||
residual echo due to (uLaw/ALaw) non-linearity in the channel.". */
|
||||
|
||||
if ((16*ec->Lclean < ec->Ltx))
|
||||
{
|
||||
/* Our e/c has improved echo by at least 24 dB (each factor of 2 is 6dB,
|
||||
so 2*2*2*2=16 is the same as 6+6+6+6=24dB) */
|
||||
if (ec->adaption_mode & ECHO_CAN_USE_CNG)
|
||||
{
|
||||
ec->cng_level = ec->Lbgn;
|
||||
|
||||
/* Very elementary comfort noise generation. Just random
|
||||
numbers rolled off very vaguely Hoth-like. DR: This
|
||||
noise doesn't sound quite right to me - I suspect there
|
||||
are some overlfow issues in the filtering as it's too
|
||||
"crackly". TODO: debug this, maybe just play noise at
|
||||
high level or look at spectrum.
|
||||
*/
|
||||
|
||||
ec->cng_rndnum = 1664525U*ec->cng_rndnum + 1013904223U;
|
||||
ec->cng_filter = ((ec->cng_rndnum & 0xFFFF) - 32768 + 5*ec->cng_filter) >> 3;
|
||||
ec->clean_nlp = (ec->cng_filter*ec->cng_level*8) >> 14;
|
||||
|
||||
}
|
||||
else if (ec->adaption_mode & ECHO_CAN_USE_CLIP)
|
||||
{
|
||||
/* This sounds much better than CNG */
|
||||
if (ec->clean_nlp > ec->Lbgn)
|
||||
ec->clean_nlp = ec->Lbgn;
|
||||
if (ec->clean_nlp < -ec->Lbgn)
|
||||
ec->clean_nlp = -ec->Lbgn;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* just mute the residual, doesn't sound very good, used mainly
|
||||
in G168 tests */
|
||||
ec->clean_nlp = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Background noise estimator. I tried a few algorithms
|
||||
here without much luck. This very simple one seems to
|
||||
work best, we just average the level using a slow (1 sec
|
||||
time const) filter if the current level is less than a
|
||||
(experimentally derived) constant. This means we dont
|
||||
include high level signals like near end speech. When
|
||||
combined with CNG or especially CLIP seems to work OK.
|
||||
*/
|
||||
if (ec->Lclean < 40) {
|
||||
ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn;
|
||||
ec->Lbgn = (ec->Lbgn_acc + (1<<11)) >> 12;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Roll around the taps buffer */
|
||||
if (ec->curr_pos <= 0)
|
||||
ec->curr_pos = ec->taps;
|
||||
ec->curr_pos--;
|
||||
|
||||
if (ec->adaption_mode & ECHO_CAN_DISABLE)
|
||||
ec->clean_nlp = rx;
|
||||
|
||||
/* Output scaled back up again to match input scaling */
|
||||
|
||||
return (int16_t) ec->clean_nlp << 1;
|
||||
}
|
||||
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
|
||||
/* This function is seperated from the echo canceller is it is usually called
|
||||
as part of the tx process. See rx HP (DC blocking) filter above, it's
|
||||
the same design.
|
||||
|
||||
Some soft phones send speech signals with a lot of low frequency
|
||||
energy, e.g. down to 20Hz. This can make the hybrid non-linear
|
||||
which causes the echo canceller to fall over. This filter can help
|
||||
by removing any low frequency before it gets to the tx port of the
|
||||
hybrid.
|
||||
|
||||
It can also help by removing and DC in the tx signal. DC is bad
|
||||
for LMS algorithms.
|
||||
|
||||
This is one of the classic DC removal filters, adjusted to provide sufficient
|
||||
bass rolloff to meet the above requirement to protect hybrids from things that
|
||||
upset them. The difference between successive samples produces a lousy HPF, and
|
||||
then a suitably placed pole flattens things out. The final result is a nicely
|
||||
rolled off bass end. The filtering is implemented with extended fractional
|
||||
precision, which noise shapes things, giving very clean DC removal.
|
||||
*/
|
||||
|
||||
int16_t echo_can_hpf_tx(echo_can_state_t *ec, int16_t tx) {
|
||||
int tmp, tmp1;
|
||||
|
||||
if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
|
||||
tmp = tx << 15;
|
||||
#if 1
|
||||
/* Make sure the gain of the HPF is 1.0. The first can still saturate a little under
|
||||
impulse conditions, and it might roll to 32768 and need clipping on sustained peak
|
||||
level signals. However, the scale of such clipping is small, and the error due to
|
||||
any saturation should not markedly affect the downstream processing. */
|
||||
tmp -= (tmp >> 4);
|
||||
#endif
|
||||
ec->tx_1 += -(ec->tx_1>>DC_LOG2BETA) + tmp - ec->tx_2;
|
||||
tmp1 = ec->tx_1 >> 15;
|
||||
if (tmp1 > 32767) tmp1 = 32767;
|
||||
if (tmp1 < -32767) tmp1 = -32767;
|
||||
tx = tmp1;
|
||||
ec->tx_2 = tmp;
|
||||
}
|
||||
|
||||
return tx;
|
||||
}
|
||||
|
||||
/*- End of function --------------------------------------------------------*/
|
||||
/*- End of file ------------------------------------------------------------*/
|
|
@ -0,0 +1,230 @@
|
|||
/*
|
||||
* SpanDSP - a series of DSP components for telephony
|
||||
*
|
||||
* echo.c - A line echo canceller. This code is being developed
|
||||
* against and partially complies with G168.
|
||||
*
|
||||
* Written by Steve Underwood <steveu@coppice.org>
|
||||
* and David Rowe <david_at_rowetel_dot_com>
|
||||
*
|
||||
* Copyright (C) 2001 Steve Underwood and 2007 David Rowe
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*
|
||||
* $Id: echo.h,v 1.9 2006/10/24 13:45:28 steveu Exp $
|
||||
*/
|
||||
|
||||
/*! \file */
|
||||
|
||||
#if !defined(_ECHO_H_)
|
||||
#define _ECHO_H_
|
||||
|
||||
/*! \page echo_can_page Line echo cancellation for voice
|
||||
|
||||
\section echo_can_page_sec_1 What does it do?
|
||||
This module aims to provide G.168-2002 compliant echo cancellation, to remove
|
||||
electrical echoes (e.g. from 2-4 wire hybrids) from voice calls.
|
||||
|
||||
\section echo_can_page_sec_2 How does it work?
|
||||
The heart of the echo cancellor is FIR filter. This is adapted to match the echo
|
||||
impulse response of the telephone line. It must be long enough to adequately cover
|
||||
the duration of that impulse response. The signal transmitted to the telephone line
|
||||
is passed through the FIR filter. Once the FIR is properly adapted, the resulting
|
||||
output is an estimate of the echo signal received from the line. This is subtracted
|
||||
from the received signal. The result is an estimate of the signal which originated
|
||||
at the far end of the line, free from echos of our own transmitted signal.
|
||||
|
||||
The least mean squares (LMS) algorithm is attributed to Widrow and Hoff, and was
|
||||
introduced in 1960. It is the commonest form of filter adaption used in things
|
||||
like modem line equalisers and line echo cancellers. There it works very well.
|
||||
However, it only works well for signals of constant amplitude. It works very poorly
|
||||
for things like speech echo cancellation, where the signal level varies widely.
|
||||
This is quite easy to fix. If the signal level is normalised - similar to applying
|
||||
AGC - LMS can work as well for a signal of varying amplitude as it does for a modem
|
||||
signal. This normalised least mean squares (NLMS) algorithm is the commonest one used
|
||||
for speech echo cancellation. Many other algorithms exist - e.g. RLS (essentially
|
||||
the same as Kalman filtering), FAP, etc. Some perform significantly better than NLMS.
|
||||
However, factors such as computational complexity and patents favour the use of NLMS.
|
||||
|
||||
A simple refinement to NLMS can improve its performance with speech. NLMS tends
|
||||
to adapt best to the strongest parts of a signal. If the signal is white noise,
|
||||
the NLMS algorithm works very well. However, speech has more low frequency than
|
||||
high frequency content. Pre-whitening (i.e. filtering the signal to flatten
|
||||
its spectrum) the echo signal improves the adapt rate for speech, and ensures the
|
||||
final residual signal is not heavily biased towards high frequencies. A very low
|
||||
complexity filter is adequate for this, so pre-whitening adds little to the
|
||||
compute requirements of the echo canceller.
|
||||
|
||||
An FIR filter adapted using pre-whitened NLMS performs well, provided certain
|
||||
conditions are met:
|
||||
|
||||
- The transmitted signal has poor self-correlation.
|
||||
- There is no signal being generated within the environment being cancelled.
|
||||
|
||||
The difficulty is that neither of these can be guaranteed.
|
||||
|
||||
If the adaption is performed while transmitting noise (or something fairly noise
|
||||
like, such as voice) the adaption works very well. If the adaption is performed
|
||||
while transmitting something highly correlative (typically narrow band energy
|
||||
such as signalling tones or DTMF), the adaption can go seriously wrong. The reason
|
||||
is there is only one solution for the adaption on a near random signal - the impulse
|
||||
response of the line. For a repetitive signal, there are any number of solutions
|
||||
which converge the adaption, and nothing guides the adaption to choose the generalised
|
||||
one. Allowing an untrained canceller to converge on this kind of narrowband
|
||||
energy probably a good thing, since at least it cancels the tones. Allowing a well
|
||||
converged canceller to continue converging on such energy is just a way to ruin
|
||||
its generalised adaption. A narrowband detector is needed, so adapation can be
|
||||
suspended at appropriate times.
|
||||
|
||||
The adaption process is based on trying to eliminate the received signal. When
|
||||
there is any signal from within the environment being cancelled it may upset the
|
||||
adaption process. Similarly, if the signal we are transmitting is small, noise
|
||||
may dominate and disturb the adaption process. If we can ensure that the
|
||||
adaption is only performed when we are transmitting a significant signal level,
|
||||
and the environment is not, things will be OK. Clearly, it is easy to tell when
|
||||
we are sending a significant signal. Telling, if the environment is generating a
|
||||
significant signal, and doing it with sufficient speed that the adaption will
|
||||
not have diverged too much more we stop it, is a little harder.
|
||||
|
||||
The key problem in detecting when the environment is sourcing significant energy
|
||||
is that we must do this very quickly. Given a reasonably long sample of the
|
||||
received signal, there are a number of strategies which may be used to assess
|
||||
whether that signal contains a strong far end component. However, by the time
|
||||
that assessment is complete the far end signal will have already caused major
|
||||
mis-convergence in the adaption process. An assessment algorithm is needed which
|
||||
produces a fairly accurate result from a very short burst of far end energy.
|
||||
|
||||
\section echo_can_page_sec_3 How do I use it?
|
||||
The ec |