241 changed files with 16066 additions and 5573 deletions
@ -0,0 +1,9 @@
|
||||
config ECHO |
||||
tristate "Line Echo Canceller support" |
||||
default n |
||||
---help--- |
||||
This driver provides line echo cancelling support for mISDN and |
||||
Zaptel drivers. |
||||
|
||||
To compile this driver as a module, choose M here. The module |
||||
will be called echo. |
@ -0,0 +1,10 @@
|
||||
TODO: |
||||
- checkpatch.pl cleanups |
||||
- Lindent |
||||
- typedef removals |
||||
- handle bit_operations.h (merge in or make part of common code?) |
||||
- remove proc interface, only use echo.h interface (proc interface is |
||||
racy and not correct.) |
||||
|
||||
Please send patches to Greg Kroah-Hartman <greg@kroah.com> and Cc: Steve |
||||
Underwood <steveu@coppice.org> and David Rowe <david@rowetel.com> |
@ -0,0 +1,228 @@
|
||||
/*
|
||||
* SpanDSP - a series of DSP components for telephony |
||||
* |
||||
* bit_operations.h - Various bit level operations, such as bit reversal |
||||
* |
||||
* Written by Steve Underwood <steveu@coppice.org> |
||||
* |
||||
* Copyright (C) 2006 Steve Underwood |
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or modify |
||||
* it under the terms of the GNU General Public License version 2, as |
||||
* published by the Free Software Foundation. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
* GNU General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU General Public License |
||||
* along with this program; if not, write to the Free Software |
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||||
* |
||||
* $Id: bit_operations.h,v 1.11 2006/11/28 15:37:03 steveu Exp $ |
||||
*/ |
||||
|
||||
/*! \file */ |
||||
|
||||
#if !defined(_BIT_OPERATIONS_H_) |
||||
#define _BIT_OPERATIONS_H_ |
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) |
||||
/*! \brief Find the bit position of the highest set bit in a word
|
||||
\param bits The word to be searched |
||||
\return The bit number of the highest set bit, or -1 if the word is zero. */ |
||||
static __inline__ int top_bit(unsigned int bits) |
||||
{ |
||||
int res; |
||||
|
||||
__asm__(" xorl %[res],%[res];\n" |
||||
" decl %[res];\n" |
||||
" bsrl %[bits],%[res]\n" |
||||
:[res] "=&r" (res) |
||||
:[bits] "rm"(bits) |
||||
); |
||||
return res; |
||||
} |
||||
|
||||
/*! \brief Find the bit position of the lowest set bit in a word
|
||||
\param bits The word to be searched |
||||
\return The bit number of the lowest set bit, or -1 if the word is zero. */ |
||||
static __inline__ int bottom_bit(unsigned int bits) |
||||
{ |
||||
int res; |
||||
|
||||
__asm__(" xorl %[res],%[res];\n" |
||||
" decl %[res];\n" |
||||
" bsfl %[bits],%[res]\n" |
||||
:[res] "=&r" (res) |
||||
:[bits] "rm"(bits) |
||||
); |
||||
return res; |
||||
} |
||||
#else |
||||
static __inline__ int top_bit(unsigned int bits) |
||||
{ |
||||
int i; |
||||
|
||||
if (bits == 0) |
||||
return -1; |
||||
i = 0; |
||||
if (bits & 0xFFFF0000) { |
||||
bits &= 0xFFFF0000; |
||||
i += 16; |
||||
} |
||||
if (bits & 0xFF00FF00) { |
||||
bits &= 0xFF00FF00; |
||||
i += 8; |
||||
} |
||||
if (bits & 0xF0F0F0F0) { |
||||
bits &= 0xF0F0F0F0; |
||||
i += 4; |
||||
} |
||||
if (bits & 0xCCCCCCCC) { |
||||
bits &= 0xCCCCCCCC; |
||||
i += 2; |
||||
} |
||||
if (bits & 0xAAAAAAAA) { |
||||
bits &= 0xAAAAAAAA; |
||||
i += 1; |
||||
} |
||||
return i; |
||||
} |
||||
|
||||
static __inline__ int bottom_bit(unsigned int bits) |
||||
{ |
||||
int i; |
||||
|
||||
if (bits == 0) |
||||
return -1; |
||||
i = 32; |
||||
if (bits & 0x0000FFFF) { |
||||
bits &= 0x0000FFFF; |
||||
i -= 16; |
||||
} |
||||
if (bits & 0x00FF00FF) { |
||||
bits &= 0x00FF00FF; |
||||
i -= 8; |
||||
} |
||||
if (bits & 0x0F0F0F0F) { |
||||
bits &= 0x0F0F0F0F; |
||||
i -= 4; |
||||
} |
||||
if (bits & 0x33333333) { |
||||
bits &= 0x33333333; |
||||
i -= 2; |
||||
} |
||||
if (bits & 0x55555555) { |
||||
bits &= 0x55555555; |
||||
i -= 1; |
||||
} |
||||
return i; |
||||
} |
||||
#endif |
||||
|
||||
/*! \brief Bit reverse a byte.
|
||||
\param data The byte to be reversed. |
||||
\return The bit reversed version of data. */ |
||||
static __inline__ uint8_t bit_reverse8(uint8_t x) |
||||
{ |
||||
#if defined(__i386__) || defined(__x86_64__) |
||||
/* If multiply is fast */ |
||||
return ((x * 0x0802U & 0x22110U) | (x * 0x8020U & 0x88440U)) * |
||||
0x10101U >> 16; |
||||
#else |
||||
/* If multiply is slow, but we have a barrel shifter */ |
||||
x = (x >> 4) | (x << 4); |
||||
x = ((x & 0xCC) >> 2) | ((x & 0x33) << 2); |
||||
return ((x & 0xAA) >> 1) | ((x & 0x55) << 1); |
||||
#endif |
||||
} |
||||
|
||||
/*! \brief Bit reverse a 16 bit word.
|
||||
\param data The word to be reversed. |
||||
\return The bit reversed version of data. */ |
||||
uint16_t bit_reverse16(uint16_t data); |
||||
|
||||
/*! \brief Bit reverse a 32 bit word.
|
||||
\param data The word to be reversed. |
||||
\return The bit reversed version of data. */ |
||||
uint32_t bit_reverse32(uint32_t data); |
||||
|
||||
/*! \brief Bit reverse each of the four bytes in a 32 bit word.
|
||||
\param data The word to be reversed. |
||||
\return The bit reversed version of data. */ |
||||
uint32_t bit_reverse_4bytes(uint32_t data); |
||||
|
||||
/*! \brief Find the number of set bits in a 32 bit word.
|
||||
\param x The word to be searched. |
||||
\return The number of set bits. */ |
||||
int one_bits32(uint32_t x); |
||||
|
||||
/*! \brief Create a mask as wide as the number in a 32 bit word.
|
||||
\param x The word to be searched. |
||||
\return The mask. */ |
||||
uint32_t make_mask32(uint32_t x); |
||||
|
||||
/*! \brief Create a mask as wide as the number in a 16 bit word.
|
||||
\param x The word to be searched. |
||||
\return The mask. */ |
||||
uint16_t make_mask16(uint16_t x); |
||||
|
||||
/*! \brief Find the least significant one in a word, and return a word
|
||||
with just that bit set. |
||||
\param x The word to be searched. |
||||
\return The word with the single set bit. */ |
||||
static __inline__ uint32_t least_significant_one32(uint32_t x) |
||||
{ |
||||
return (x & (-(int32_t) x)); |
||||
} |
||||
|
||||
/*! \brief Find the most significant one in a word, and return a word
|
||||
with just that bit set. |
||||
\param x The word to be searched. |
||||
\return The word with the single set bit. */ |
||||
static __inline__ uint32_t most_significant_one32(uint32_t x) |
||||
{ |
||||
#if defined(__i386__) || defined(__x86_64__) |
||||
return 1 << top_bit(x); |
||||
#else |
||||
x = make_mask32(x); |
||||
return (x ^ (x >> 1)); |
||||
#endif |
||||
} |
||||
|
||||
/*! \brief Find the parity of a byte.
|
||||
\param x The byte to be checked. |
||||
\return 1 for odd, or 0 for even. */ |
||||
static __inline__ int parity8(uint8_t x) |
||||
{ |
||||
x = (x ^ (x >> 4)) & 0x0F; |
||||
return (0x6996 >> x) & 1; |
||||
} |
||||
|
||||
/*! \brief Find the parity of a 16 bit word.
|
||||
\param x The word to be checked. |
||||
\return 1 for odd, or 0 for even. */ |
||||
static __inline__ int parity16(uint16_t x) |
||||
{ |
||||
x ^= (x >> 8); |
||||
x = (x ^ (x >> 4)) & 0x0F; |
||||
return (0x6996 >> x) & 1; |
||||
} |
||||
|
||||
/*! \brief Find the parity of a 32 bit word.
|
||||
\param x The word to be checked. |
||||
\return 1 for odd, or 0 for even. */ |
||||
static __inline__ int parity32(uint32_t x) |
||||
{ |
||||
x ^= (x >> 16); |
||||
x ^= (x >> 8); |
||||
x = (x ^ (x >> 4)) & 0x0F; |
||||
return (0x6996 >> x) & 1; |
||||
} |
||||
|
||||
#endif |
||||
/*- End of file ------------------------------------------------------------*/ |
@ -0,0 +1,638 @@
|
||||
/*
|
||||
* SpanDSP - a series of DSP components for telephony |
||||
* |
||||
* echo.c - A line echo canceller. This code is being developed |
||||
* against and partially complies with G168. |
||||
* |
||||
* Written by Steve Underwood <steveu@coppice.org> |
||||
* and David Rowe <david_at_rowetel_dot_com> |
||||
* |
||||
* Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe |
||||
* |
||||
* Based on a bit from here, a bit from there, eye of toad, ear of |
||||
* bat, 15 years of failed attempts by David and a few fried brain |
||||
* cells. |
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or modify |
||||
* it under the terms of the GNU General Public License version 2, as |
||||
* published by the Free Software Foundation. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
* GNU General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU General Public License |
||||
* along with this program; if not, write to the Free Software |
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||||
* |
||||
* $Id: echo.c,v 1.20 2006/12/01 18:00:48 steveu Exp $ |
||||
*/ |
||||
|
||||
/*! \file */ |
||||
|
||||
/* Implementation Notes
|
||||
David Rowe |
||||
April 2007 |
||||
|
||||
This code started life as Steve's NLMS algorithm with a tap |
||||
rotation algorithm to handle divergence during double talk. I |
||||
added a Geigel Double Talk Detector (DTD) [2] and performed some |
||||
G168 tests. However I had trouble meeting the G168 requirements, |
||||
especially for double talk - there were always cases where my DTD |
||||
failed, for example where near end speech was under the 6dB |
||||
threshold required for declaring double talk. |
||||
|
||||
So I tried a two path algorithm [1], which has so far given better |
||||
results. The original tap rotation/Geigel algorithm is available |
||||
in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit.
|
||||
It's probably possible to make it work if some one wants to put some |
||||
serious work into it. |
||||
|
||||
At present no special treatment is provided for tones, which |
||||
generally cause NLMS algorithms to diverge. Initial runs of a |
||||
subset of the G168 tests for tones (e.g ./echo_test 6) show the |
||||
current algorithm is passing OK, which is kind of surprising. The |
||||
full set of tests needs to be performed to confirm this result. |
||||
|
||||
One other interesting change is that I have managed to get the NLMS |
||||
code to work with 16 bit coefficients, rather than the original 32 |
||||
bit coefficents. This reduces the MIPs and storage required. |
||||
I evaulated the 16 bit port using g168_tests.sh and listening tests |
||||
on 4 real-world samples. |
||||
|
||||
I also attempted the implementation of a block based NLMS update |
||||
[2] but although this passes g168_tests.sh it didn't converge well |
||||
on the real-world samples. I have no idea why, perhaps a scaling |
||||
problem. The block based code is also available in SVN |
||||
http://svn.rowetel.com/software/oslec/tags/before_16bit. If this
|
||||
code can be debugged, it will lead to further reduction in MIPS, as |
||||
the block update code maps nicely onto DSP instruction sets (it's a |
||||
dot product) compared to the current sample-by-sample update. |
||||
|
||||
Steve also has some nice notes on echo cancellers in echo.h |
||||
|
||||
References: |
||||
|
||||
[1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo |
||||
Path Models", IEEE Transactions on communications, COM-25, |
||||
No. 6, June |
||||
1977. |
||||
http://www.rowetel.com/images/echo/dual_path_paper.pdf
|
||||
|
||||
[2] The classic, very useful paper that tells you how to |
||||
actually build a real world echo canceller: |
||||
Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice |
||||
Echo Canceller with a TMS320020, |
||||
http://www.rowetel.com/images/echo/spra129.pdf
|
||||
|
||||
[3] I have written a series of blog posts on this work, here is |
||||
Part 1: http://www.rowetel.com/blog/?p=18
|
||||
|
||||
[4] The source code http://svn.rowetel.com/software/oslec/
|
||||
|
||||
[5] A nice reference on LMS filters: |
||||
http://en.wikipedia.org/wiki/Least_mean_squares_filter
|
||||
|
||||
Credits: |
||||
|
||||
Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan |
||||
Muthukrishnan for their suggestions and email discussions. Thanks |
||||
also to those people who collected echo samples for me such as |
||||
Mark, Pawel, and Pavel. |
||||
*/ |
||||
|
||||
#include <linux/kernel.h> /* We're doing kernel work */ |
||||
#include <linux/module.h> |
||||
#include <linux/slab.h> |
||||
|
||||
#include "bit_operations.h" |
||||
#include "echo.h" |
||||
|
||||
#define MIN_TX_POWER_FOR_ADAPTION 64 |
||||
#define MIN_RX_POWER_FOR_ADAPTION 64 |
||||
#define DTD_HANGOVER 600 /* 600 samples, or 75ms */ |
||||
#define DC_LOG2BETA 3 /* log2() of DC filter Beta */ |
||||
|
||||
/*-----------------------------------------------------------------------*\
|
||||
FUNCTIONS |
||||
\*-----------------------------------------------------------------------*/ |
||||
|
||||
/* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */ |
||||
|
||||
#ifdef __bfin__ |
||||
static void __inline__ lms_adapt_bg(struct oslec_state *ec, int clean, |
||||
int shift) |
||||
{ |
||||
int i, j; |
||||
int offset1; |
||||
int offset2; |
||||
int factor; |
||||
int exp; |
||||
int16_t *phist; |
||||
int n; |
||||
|
||||
if (shift > 0) |
||||
factor = clean << shift; |
||||
else |
||||
factor = clean >> -shift; |
||||
|
||||
/* Update the FIR taps */ |
||||
|
||||
offset2 = ec->curr_pos; |
||||
offset1 = ec->taps - offset2; |
||||
phist = &ec->fir_state_bg.history[offset2]; |
||||
|
||||
/* st: and en: help us locate the assembler in echo.s */ |
||||
|
||||
//asm("st:");
|
||||
n = ec->taps; |
||||
for (i = 0, j = offset2; i < n; i++, j++) { |
||||
exp = *phist++ * factor; |
||||
ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); |
||||
} |
||||
//asm("en:");
|
||||
|
||||
/* Note the asm for the inner loop above generated by Blackfin gcc
|
||||
4.1.1 is pretty good (note even parallel instructions used): |
||||
|
||||
R0 = W [P0++] (X); |
||||
R0 *= R2; |
||||
R0 = R0 + R3 (NS) || |
||||
R1 = W [P1] (X) || |
||||
nop; |
||||
R0 >>>= 15; |
||||
R0 = R0 + R1; |
||||
W [P1++] = R0; |
||||
|
||||
A block based update algorithm would be much faster but the |
||||
above can't be improved on much. Every instruction saved in |
||||
the loop above is 2 MIPs/ch! The for loop above is where the |
||||
Blackfin spends most of it's time - about 17 MIPs/ch measured |
||||
with speedtest.c with 256 taps (32ms). Write-back and |
||||
Write-through cache gave about the same performance. |
||||
*/ |
||||
} |
||||
|
||||
/*
|
||||
IDEAS for further optimisation of lms_adapt_bg(): |
||||
|
||||
1/ The rounding is quite costly. Could we keep as 32 bit coeffs |
||||
then make filter pluck the MS 16-bits of the coeffs when filtering? |
||||
However this would lower potential optimisation of filter, as I |
||||
think the dual-MAC architecture requires packed 16 bit coeffs. |
||||
|
||||
2/ Block based update would be more efficient, as per comments above, |
||||
could use dual MAC architecture. |
||||
|
||||
3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC |
||||
packing. |
||||
|
||||
4/ Execute the whole e/c in a block of say 20ms rather than sample |
||||
by sample. Processing a few samples every ms is inefficient. |
||||
*/ |
||||
|
||||
#else |
||||
static __inline__ void lms_adapt_bg(struct oslec_state *ec, int clean, |
||||
int shift) |
||||
{ |
||||
int i; |
||||
|
||||
int offset1; |
||||
int offset2; |
||||
int factor; |
||||
int exp; |
||||
|
||||
if (shift > 0) |
||||
factor = clean << shift; |
||||
else |
||||
factor = clean >> -shift; |
||||
|
||||
/* Update the FIR taps */ |
||||
|
||||
offset2 = ec->curr_pos; |
||||
offset1 = ec->taps - offset2; |
||||
|
||||
for (i = ec->taps - 1; i >= offset1; i--) { |
||||
exp = (ec->fir_state_bg.history[i - offset1] * factor); |
||||
ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); |
||||
} |
||||
for (; i >= 0; i--) { |
||||
exp = (ec->fir_state_bg.history[i + offset2] * factor); |
||||
ec->fir_taps16[1][i] += (int16_t) ((exp + (1 << 14)) >> 15); |
||||
} |
||||
} |
||||
#endif |
||||
|
||||
struct oslec_state *oslec_create(int len, int adaption_mode) |
||||
{ |
||||
struct oslec_state *ec; |
||||
int i; |
||||
|
||||
ec = kzalloc(sizeof(*ec), GFP_KERNEL); |
||||
if (!ec) |
||||
return NULL; |
||||
|
||||
ec->taps = len; |
||||
ec->log2taps = top_bit(len); |
||||
ec->curr_pos = ec->taps - 1; |
||||
|
||||
for (i = 0; i < 2; i++) { |
||||
ec->fir_taps16[i] = |
||||
kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); |
||||
if (!ec->fir_taps16[i]) |
||||
goto error_oom; |
||||
} |
||||
|
||||
fir16_create(&ec->fir_state, ec->fir_taps16[0], ec->taps); |
||||
fir16_create(&ec->fir_state_bg, ec->fir_taps16[1], ec->taps); |
||||
|
||||
for (i = 0; i < 5; i++) { |
||||
ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0; |
||||
} |
||||
|
||||
ec->cng_level = 1000; |
||||
oslec_adaption_mode(ec, adaption_mode); |
||||
|
||||
ec->snapshot = kcalloc(ec->taps, sizeof(int16_t), GFP_KERNEL); |
||||
if (!ec->snapshot) |
||||
goto error_oom; |
||||
|
||||
ec->cond_met = 0; |
||||
ec->Pstates = 0; |
||||
ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0; |
||||
ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0; |
||||
ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0; |
||||
ec->Lbgn = ec->Lbgn_acc = 0; |
||||
ec->Lbgn_upper = 200; |
||||
ec->Lbgn_upper_acc = ec->Lbgn_upper << 13; |
||||
|
||||
return ec; |
||||
|
||||
error_oom: |
||||
for (i = 0; i < 2; i++) |
||||
kfree(ec->fir_taps16[i]); |
||||
|
||||
kfree(ec); |
||||
return NULL; |
||||
} |
||||
|
||||
EXPORT_SYMBOL_GPL(oslec_create); |
||||
|
||||
void oslec_free(struct oslec_state *ec) |
||||
{ |
||||
int i; |
||||
|
||||
fir16_free(&ec->fir_state); |
||||
fir16_free(&ec->fir_state_bg); |
||||
for (i = 0; i < 2; i++) |
||||
kfree(ec->fir_taps16[i]); |
||||
kfree(ec->snapshot); |
||||
kfree(ec); |
||||
} |
||||
|
||||
EXPORT_SYMBOL_GPL(oslec_free); |
||||
|
||||
void oslec_adaption_mode(struct oslec_state *ec, int adaption_mode) |
||||
{ |
||||
ec->adaption_mode = adaption_mode; |
||||
} |
||||
|
||||
EXPORT_SYMBOL_GPL(oslec_adaption_mode); |
||||
|
||||
void oslec_flush(struct oslec_state *ec) |
||||
{ |
||||
int i; |
||||
|
||||
ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0; |
||||
ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0; |
||||
ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0; |
||||
|
||||
ec->Lbgn = ec->Lbgn_acc = 0; |
||||
ec->Lbgn_upper = 200; |
||||
ec->Lbgn_upper_acc = ec->Lbgn_upper << 13; |
||||
|
||||
ec->nonupdate_dwell = 0; |
||||
|
||||
fir16_flush(&ec->fir_state); |
||||
fir16_flush(&ec->fir_state_bg); |
||||
ec->fir_state.curr_pos = ec->taps - 1; |
||||
ec->fir_state_bg.curr_pos = ec->taps - 1; |
||||
for (i = 0; i < 2; i++) |
||||
memset(ec->fir_taps16[i], 0, ec->taps * sizeof(int16_t)); |
||||
|
||||
ec->curr_pos = ec->taps - 1; |
||||
ec->Pstates = 0; |
||||
} |
||||
|
||||
EXPORT_SYMBOL_GPL(oslec_flush); |
||||
|
||||
void oslec_snapshot(struct oslec_state *ec) |
||||
{ |
||||
memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps * sizeof(int16_t)); |
||||
} |
||||
|
||||
EXPORT_SYMBOL_GPL(oslec_snapshot); |
||||
|
||||
/* Dual Path Echo Canceller ------------------------------------------------*/ |
||||
|
||||
int16_t oslec_update(struct oslec_state *ec, int16_t tx, int16_t rx) |
||||
{ |
||||
int32_t echo_value; |
||||
int clean_bg; |
||||
int tmp, tmp1; |
||||
|
||||
/* Input scaling was found be required to prevent problems when tx
|
||||
starts clipping. Another possible way to handle this would be the |
||||
filter coefficent scaling. */ |
||||
|
||||
ec->tx = tx; |
||||
ec->rx = rx; |
||||
tx >>= 1; |
||||
rx >>= 1; |
||||
|
||||
/*
|
||||
Filter DC, 3dB point is 160Hz (I think), note 32 bit precision required |
||||
otherwise values do not track down to 0. Zero at DC, Pole at (1-Beta) |
||||
only real axis. Some chip sets (like Si labs) don't need |
||||
this, but something like a $10 X100P card does. Any DC really slows |
||||
down convergence. |
||||
|
||||
Note: removes some low frequency from the signal, this reduces |
||||
the speech quality when listening to samples through headphones |
||||
but may not be obvious through a telephone handset. |
||||
|
||||
Note that the 3dB frequency in radians is approx Beta, e.g. for |
||||
Beta = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz. |
||||
*/ |
||||
|
||||
if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) { |
||||
tmp = rx << 15; |
||||
#if 1 |
||||
/* Make sure the gain of the HPF is 1.0. This can still saturate a little under
|
||||
impulse conditions, and it might roll to 32768 and need clipping on sustained peak |
||||
level signals. However, the scale of such clipping is small, and the error due to |
||||
any saturation should not markedly affect the downstream processing. */ |
||||
tmp -= (tmp >> 4); |
||||
#endif |
||||
ec->rx_1 += -(ec->rx_1 >> DC_LOG2BETA) + tmp - ec->rx_2; |
||||
|
||||
/* hard limit filter to prevent clipping. Note that at this stage
|
||||
rx should be limited to +/- 16383 due to right shift above */ |
||||
tmp1 = ec->rx_1 >> 15; |
||||
if (tmp1 > 16383) |
||||
tmp1 = 16383; |
||||
if (tmp1 < -16383) |
||||
tmp1 = -16383; |
||||
rx = tmp1; |
||||
ec->rx_2 = tmp; |
||||
} |
||||
|
||||
/* Block average of power in the filter states. Used for
|
||||
adaption power calculation. */ |
||||
|
||||
{ |
||||
int new, old; |
||||
|
||||
/* efficient "out with the old and in with the new" algorithm so
|
||||
we don't have to recalculate over the whole block of |
||||
samples. */ |
||||
new = (int)tx *(int)tx; |
||||
old = (int)ec->fir_state.history[ec->fir_state.curr_pos] * |
||||
(int)ec->fir_state.history[ec->fir_state.curr_pos]; |
||||
ec->Pstates += |
||||
((new - old) + (1 << ec->log2taps)) >> ec->log2taps; |
||||
if (ec->Pstates < 0) |
||||
ec->Pstates = 0; |
||||
} |
||||
|
||||
/* Calculate short term average levels using simple single pole IIRs */ |
||||
|
||||
ec->Ltxacc += abs(tx) - ec->Ltx; |
||||
ec->Ltx = (ec->Ltxacc + (1 << 4)) >> 5; |
||||
ec->Lrxacc += abs(rx) - ec->Lrx; |
||||
ec->Lrx = (ec->Lrxacc + (1 << 4)) >> 5; |
||||
|
||||
/* Foreground filter --------------------------------------------------- */ |
||||
|
||||
ec->fir_state.coeffs = ec->fir_taps16[0]; |
||||
echo_value = fir16(&ec->fir_state, tx); |
||||
ec->clean = rx - echo_value; |
||||
ec->Lcleanacc += abs(ec->clean) - ec->Lclean; |
||||
ec->Lclean = (ec->Lcleanacc + (1 << 4)) >> 5; |
||||
|
||||
/* Background filter --------------------------------------------------- */ |
||||
|
||||
echo_value = fir16(&ec->fir_state_bg, tx); |
||||
clean_bg = rx - echo_value; |
||||
ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg; |
||||
ec->Lclean_bg = (ec->Lclean_bgacc + (1 << 4)) >> 5; |
||||
|
||||
/* Background Filter adaption ----------------------------------------- */ |
||||
|
||||
/* Almost always adap bg filter, just simple DT and energy
|
||||
detection to minimise adaption in cases of strong double talk. |
||||
However this is not critical for the dual path algorithm. |
||||
*/ |
||||
ec->factor = 0; |
||||
ec->shift = 0; |
||||
if ((ec->nonupdate_dwell == 0)) { |
||||
int P, logP, shift; |
||||
|
||||
/* Determine:
|
||||
|
||||
f = Beta * clean_bg_rx/P ------ (1) |
||||
|
||||
where P is the total power in the filter states. |
||||
|
||||
The Boffins have shown that if we obey (1) we converge |
||||
quickly and avoid instability. |
||||
|
||||
The correct factor f must be in Q30, as this is the fixed |
||||
point format required by the lms_adapt_bg() function, |
||||
therefore the scaled version of (1) is: |
||||
|
||||
(2^30) * f = (2^30) * Beta * clean_bg_rx/P |
||||
factor = (2^30) * Beta * clean_bg_rx/P ----- (2) |
||||
|
||||
We have chosen Beta = 0.25 by experiment, so: |
||||
|
||||
factor = (2^30) * (2^-2) * clean_bg_rx/P |
||||
|
||||
(30 - 2 - log2(P)) |
||||
factor = clean_bg_rx 2 ----- (3) |
||||
|
||||
To avoid a divide we approximate log2(P) as top_bit(P), |
||||
which returns the position of the highest non-zero bit in |
||||
P. This approximation introduces an error as large as a |
||||
factor of 2, but the algorithm seems to handle it OK. |
||||
|
||||
Come to think of it a divide may not be a big deal on a |
||||
modern DSP, so its probably worth checking out the cycles |
||||
for a divide versus a top_bit() implementation. |
||||
*/ |
||||
|
||||
P = MIN_TX_POWER_FOR_ADAPTION + ec->Pstates; |
||||
logP = top_bit(P) + ec->log2taps; |
||||
shift = 30 - 2 - logP; |
||||
ec->shift = shift; |
||||
|
||||
lms_adapt_bg(ec, clean_bg, shift); |
||||
} |
||||
|
||||
/* very simple DTD to make sure we dont try and adapt with strong
|
||||
near end speech */ |
||||
|
||||
ec->adapt = 0; |
||||
if ((ec->Lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->Lrx > ec->Ltx)) |
||||
ec->nonupdate_dwell = DTD_HANGOVER; |
||||
if (ec->nonupdate_dwell) |
||||
ec->nonupdate_dwell--; |
||||
|
||||
/* Transfer logic ------------------------------------------------------ */ |
||||
|
||||
/* These conditions are from the dual path paper [1], I messed with
|
||||
them a bit to improve performance. */ |
||||
|
||||
if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) && |
||||
(ec->nonupdate_dwell == 0) && |
||||
(8 * ec->Lclean_bg < |
||||
7 * ec->Lclean) /* (ec->Lclean_bg < 0.875*ec->Lclean) */ && |
||||
(8 * ec->Lclean_bg < |
||||
ec->Ltx) /* (ec->Lclean_bg < 0.125*ec->Ltx) */ ) { |
||||
if (ec->cond_met == 6) { |
||||
/* BG filter has had better results for 6 consecutive samples */ |
||||
ec->adapt = 1; |
||||
memcpy(ec->fir_taps16[0], ec->fir_taps16[1], |
||||
ec->taps * sizeof(int16_t)); |
||||
} else |
||||
ec->cond_met++; |
||||
} else |
||||
ec->cond_met = 0; |
||||
|
||||
/* Non-Linear Processing --------------------------------------------------- */ |
||||
|
||||
ec->clean_nlp = ec->clean; |
||||
if (ec->adaption_mode & ECHO_CAN_USE_NLP) { |
||||
/* Non-linear processor - a fancy way to say "zap small signals, to avoid
|
||||
residual echo due to (uLaw/ALaw) non-linearity in the channel.". */ |
||||
|
||||
if ((16 * ec->Lclean < ec->Ltx)) { |
||||
/* Our e/c has improved echo by at least 24 dB (each factor of 2 is 6dB,
|
||||
so 2*2*2*2=16 is the same as 6+6+6+6=24dB) */ |
||||
if (ec->adaption_mode & ECHO_CAN_USE_CNG) { |
||||
ec->cng_level = ec->Lbgn; |
||||
|
||||
/* Very elementary comfort noise generation. Just random
|
||||
numbers rolled off very vaguely Hoth-like. DR: This |
||||
noise doesn't sound quite right to me - I suspect there |
||||
are some overlfow issues in the filtering as it's too |
||||
"crackly". TODO: debug this, maybe just play noise at |
||||
high level or look at spectrum. |
||||
*/ |
||||
|
||||
ec->cng_rndnum = |
||||
1664525U * ec->cng_rndnum + 1013904223U; |
||||
ec->cng_filter = |
||||
((ec->cng_rndnum & 0xFFFF) - 32768 + |
||||
5 * ec->cng_filter) >> 3; |
||||
ec->clean_nlp = |
||||
(ec->cng_filter * ec->cng_level * 8) >> 14; |
||||
|
||||
} else if (ec->adaption_mode & ECHO_CAN_USE_CLIP) { |
||||
/* This sounds much better than CNG */ |
||||
if (ec->clean_nlp > ec->Lbgn) |
||||
ec->clean_nlp = ec->Lbgn; |
||||
if (ec->clean_nlp < -ec->Lbgn) |
||||
ec->clean_nlp = -ec->Lbgn; |
||||
} else { |
||||
/* just mute the residual, doesn't sound very good, used mainly
|
||||
in G168 tests */ |
||||
ec->clean_nlp = 0; |
||||
} |
||||
} else { |
||||
/* Background noise estimator. I tried a few algorithms
|
||||
here without much luck. This very simple one seems to |
||||
work best, we just average the level using a slow (1 sec |
||||
time const) filter if the current level is less than a |
||||
(experimentally derived) constant. This means we dont |
||||
include high level signals like near end speech. When |
||||
combined with CNG or especially CLIP seems to work OK. |
||||
*/ |
||||
if (ec->Lclean < 40) { |
||||
ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn; |
||||
ec->Lbgn = (ec->Lbgn_acc + (1 << 11)) >> 12; |
||||
} |
||||
} |
||||
} |
||||
|
||||
/* Roll around the taps buffer */ |
||||
if (ec->curr_pos <= 0) |
||||
ec->curr_pos = ec->taps; |
||||
ec->curr_pos--; |
||||
|
||||
if (ec->adaption_mode & ECHO_CAN_DISABLE) |
||||
ec->clean_nlp = rx; |
||||
|
||||
/* Output scaled back up again to match input scaling */ |
||||
|
||||
return (int16_t) ec->clean_nlp << 1; |
||||
} |
||||
|
||||
EXPORT_SYMBOL_GPL(oslec_update); |
||||
|
||||
/* This function is seperated from the echo canceller is it is usually called
|
||||
as part of the tx process. See rx HP (DC blocking) filter above, it's |
||||
the same design. |
||||
|
||||
Some soft phones send speech signals with a lot of low frequency |
||||
energy, e.g. down to 20Hz. This can make the hybrid non-linear |
||||
which causes the echo canceller to fall over. This filter can help |
||||
by removing any low frequency before it gets to the tx port of the |
||||
hybrid. |
||||
|
||||
It can also help by removing and DC in the tx signal. DC is bad |
||||
for LMS algorithms. |
||||
|
||||
This is one of the classic DC removal filters, adjusted to provide sufficient |
||||
bass rolloff to meet the above requirement to protect hybrids from things that |
||||
upset them. The difference between successive samples produces a lousy HPF, and |
||||
then a suitably placed pole flattens things out. The final result is a nicely |
||||
rolled off bass end. The filtering is implemented with extended fractional |
||||
precision, which noise shapes things, giving very clean DC removal. |
||||
*/ |
||||
|
||||
int16_t oslec_hpf_tx(struct oslec_state * ec, int16_t tx) |
||||
{ |
||||
int tmp, tmp1; |
||||
|
||||
if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) { |
||||
tmp = tx << 15; |
||||
#if 1 |
||||
/* Make sure the gain of the HPF is 1.0. The first can still saturate a little under
|
||||
impulse conditions, and it might roll to 32768 and need clipping on sustained peak |
||||
level signals. However, the scale of such clipping is small, and the error due to |
||||
any saturation should not markedly affect the downstream processing. */ |
||||
tmp -= (tmp >> 4); |
||||
#endif |
||||
ec->tx_1 += -(ec->tx_1 >> DC_LOG2BETA) + tmp - ec->tx_2; |
||||
tmp1 = ec->tx_1 >> 15; |
||||
if (tmp1 > 32767) |
||||
tmp1 = 32767; |
||||
if (tmp1 < -32767) |
||||
tmp1 = -32767; |
||||
tx = tmp1; |
||||
ec->tx_2 = tmp; |
||||
} |
||||
|
||||
return tx; |
||||
} |
||||
|
||||
EXPORT_SYMBOL_GPL(oslec_hpf_tx); |
||||
|
||||
MODULE_LICENSE("GPL"); |
||||
MODULE_AUTHOR("David Rowe"); |
||||
MODULE_DESCRIPTION("Open Source Line Echo Canceller"); |
||||
MODULE_VERSION("0.3.0"); |
@ -0,0 +1,172 @@
|
||||
/*
|
||||
* SpanDSP - a series of DSP components for telephony |
||||
* |
||||
* echo.c - A line echo canceller. This code is being developed |
||||
* against and partially complies with G168. |
||||
* |
||||
* Written by Steve Underwood <steveu@coppice.org> |
||||
* and David Rowe <david_at_rowetel_dot_com> |
||||
* |
||||
* Copyright (C) 2001 Steve Underwood and 2007 David Rowe |
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or modify |
||||
* it under the terms of the GNU General Public License version 2, as |
||||
* published by the Free Software Foundation. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
* GNU General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU General Public License |
||||
* along with this program; if not, write to the Free Software |
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||||
* |
||||
* $Id: echo.h,v 1.9 2006/10/24 13:45:28 steveu Exp $ |
||||
*/ |
||||
|
||||
#ifndef __ECHO_H |
||||
#define __ECHO_H |
||||
|
||||
/*! \page echo_can_page Line echo cancellation for voice
|
||||
|
||||
\section echo_can_page_sec_1 What does it do? |
||||
This module aims to provide G.168-2002 compliant echo cancellation, to remove |
||||
electrical echoes (e.g. from 2-4 wire hybrids) from voice calls. |
||||
|
||||
\section echo_can_page_sec_2 How does it work? |
||||
The heart of the echo cancellor is FIR filter. This is adapted to match the |
||||
echo impulse response of the telephone line. It must be long enough to |
||||
adequately cover the duration of that impulse response. The signal transmitted |
||||
to the telephone line is passed through the FIR filter. Once the FIR is |
||||
properly adapted, the resulting output is an estimate of the echo signal |
||||
received from the line. This is subtracted from the received signal. The result |
||||
is an estimate of the signal which originated at the far end of the line, free |
||||
from echos of our own transmitted signal. |
||||
|
||||
The least mean squares (LMS) algorithm is attributed to Widrow and Hoff, and |
||||
was introduced in 1960. It is the commonest form of filter adaption used in |
||||
things like modem line equalisers and line echo cancellers. There it works very |
||||
well. However, it only works well for signals of constant amplitude. It works |
||||
very poorly for things like speech echo cancellation, where the signal level |
||||
varies widely. This is quite easy to fix. If the signal level is normalised - |
||||
similar to applying AGC - LMS can work as well for a signal of varying |
||||
amplitude as it does for a modem signal. This normalised least mean squares |
||||
(NLMS) algorithm is the commonest one used for speech echo cancellation. Many |
||||
other algorithms exist - e.g. RLS (essentially the same as Kalman filtering), |
||||
FAP, etc. Some perform significantly better than NLMS. However, factors such |
||||
as computational complexity and patents favour the use of NLMS. |
||||
|
||||
A simple refinement to NLMS can improve its performance with speech. NLMS tends |
||||
to adapt best to the strongest parts of a signal. If the signal is white noise, |
||||
the NLMS algorithm works very well. However, speech has more low frequency than |
||||
high frequency content. Pre-whitening (i.e. filtering the signal to flatten its |
||||
spectrum) the echo signal improves the adapt rate for speech, and ensures the |
||||
final residual signal is not heavily biased towards high frequencies. A very |
||||
low complexity filter is adequate for this, so pre-whitening adds little to the |
||||
compute requirements of the echo canceller. |
||||
|
||||
An FIR filter adapted using pre-whitened NLMS performs well, provided certain |
||||
conditions are met: |
||||
|
||||
- The transmitted signal has poor self-correlation. |
||||
- There is no signal being generated within the environment being |
||||
cancelled. |
||||
|
||||
The difficulty is that neither of these can be guaranteed. |
||||
|
||||
If the adaption is performed while transmitting noise (or something fairly |
||||
noise like, such as voice) the adaption works very well. If the adaption is |
||||
performed while transmitting something highly correlative (typically narrow |
||||
band energy such as signalling tones or DTMF), the adaption can go seriously |
||||
wrong. The reason is there is only one solution for the adaption on a near |
||||
random signal - the impulse response of the line. For a repetitive signal, |
||||
there are any number of solutions which converge the adaption, and nothing |
||||
guides the adaption to choose the generalised one. Allowing an untrained |
||||
canceller to converge on this kind of narrowband energy probably a good thing, |
||||
since at least it cancels the tones. Allowing a well converged canceller to |
||||
continue converging on such energy is just a way to ruin its generalised |
||||
adaption. A narrowband detector is needed, so adapation can be suspended at |
||||
appropriate times. |
||||
|
||||
The adaption process is based on trying to eliminate the received signal. When |
||||
there is any signal from within the environment being cancelled it may upset |
||||
the adaption process. Similarly, if the signal we are transmitting is small, |
||||
noise may dominate and disturb the adaption process. If we can ensure that the |
||||
adaption is only performed when we are transmitting a significant signal level, |
||||
and the environment is not, things will be OK. Clearly, it is easy to tell when |
||||
we are sending a significant signal. Telling, if the environment is generating |
||||
a significant signal, and doing it with sufficient speed that the adaption will |
||||
not have diverged too much more we stop it, is a little harder. |
||||
|
||||
The key problem in detecting when the environment is sourcing significant |
||||
energy is that we must do this very quickly. Given a reasonably long sample of |
||||
the received signal, there are a number of strategies which may be used to |
||||
assess whether that signal contains a strong far end component. However, by the |
||||
time that assessment is complete the far end signal will have already caused |
||||
major mis-convergence in the adaption process. An assessment algorithm is |
||||
needed which produces a fairly accurate result from a very short burst of far |
||||
end energy. |
||||
|
||||
\section echo_can_page_sec_3 How do I use it? |
||||
The echo cancellor processes both the transmit and receive streams sample by |
||||
sample. The processing function is not declared inline. Unfortunately, |
||||
cancellation requires many operations per sample, so the call overhead is only |
||||
a minor burden. |
||||
*/ |
||||
|
||||
#include "fir.h" |
||||
#include "oslec.h" |
||||
|
||||
/*!
|
||||
G.168 echo canceller descriptor. This defines the working state for a line |
||||
echo canceller. |
||||
*/ |
||||
struct oslec_state { |
||||
int16_t tx, rx; |
||||
int16_t clean; |
||||
int16_t clean_nlp; |
||||
|
||||
int nonupdate_dwell; |
||||
int curr_pos; |
||||
int taps; |
||||
int log2taps; |
||||
int adaption_mode; |
||||
|
||||
int cond_met; |
||||
int32_t Pstates; |
||||
int16_t adapt; |
||||
int32_t factor; |
||||
int16_t shift; |
||||
|
||||
/* Average levels and averaging filter states */ |
||||
int Ltxacc, Lrxacc, Lcleanacc, Lclean_bgacc; |
||||
int Ltx, Lrx; |
||||
int Lclean; |
||||
int Lclean_bg; |
||||
int Lbgn, Lbgn_acc, Lbgn_upper, Lbgn_upper_acc; |
||||
|
||||
/* foreground and background filter states */ |
||||
fir16_state_t fir_state; |
||||
fir16_state_t fir_state_bg; |
||||
int16_t *fir_taps16[2]; |
||||
|
||||
/* DC blocking filter states */ |
||||
int tx_1, tx_2, rx_1, rx_2; |
||||
|
||||
/* optional High Pass Filter states */ |
||||
int32_t xvtx[5], yvtx[5]; |
||||
int32_t xvrx[5], yvrx[5]; |
||||
|
||||
/* Parameters for the optional Hoth noise generator */ |
||||
int cng_level; |
||||
int cng_rndnum; |
||||
int cng_filter; |
||||
|
||||
/* snapshot sample of coeffs used for development */ |
||||
int16_t *snapshot; |
||||
}; |
||||
|
||||
#endif /* __ECHO_H */ |
@ -0,0 +1,295 @@
|
||||
/*
|
||||
* SpanDSP - a series of DSP components for telephony |
||||
* |
||||
* fir.h - General telephony FIR routines |
||||
* |
||||
* Written by Steve Underwood <steveu@coppice.org> |
||||
* |
||||
* Copyright (C) 2002 Steve Underwood |
||||
* |
||||
* All rights reserved. |
||||
* |
||||
* This program is free software; you can redistribute it and/or modify |
||||
* it under the terms of the GNU General Public License version 2, as |
||||
* published by the Free Software Foundation. |
||||
* |
||||
* This program is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
* GNU General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU General Public License |
||||
* along with this program; if not, write to the Free Software |
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
||||
* |
||||
* $Id: fir.h,v 1.8 2006/10/24 13:45:28 steveu Exp $ |
||||
*/ |
||||
|
||||
/*! \page fir_page FIR filtering
|
||||
\section fir_page_sec_1 What does it do? |
||||
???. |
||||
|
||||
\section fir_page_sec_2 How does it work? |
||||
???. |
||||
*/ |
||||
|
||||
#if !defined(_FIR_H_) |
||||
#define _FIR_H_ |
||||
|
||||
/*
|
||||
Blackfin NOTES & IDEAS: |
||||
|
||||
A simple dot product function is used to implement the filter. This performs |
||||
just one MAC/cycle which is inefficient but was easy to implement as a first |
||||
pass. The current Blackfin code also uses an unrolled form of the filter |
||||
history to avoid 0 length hardware loop issues. This is wasteful of |
||||
memory. |
||||
|
||||
Ideas for improvement: |
||||
|
||||
1/ Rewrite filter for dual MAC inner loop. The issue here is handling |
||||
history sample offsets that are 16 bit aligned - the dual MAC needs |
||||
32 bit aligmnent. There are some good examples in libbfdsp. |
||||
|
||||
2/ Use the hardware circular buffer facility tohalve memory usage. |
||||
|
||||
3/ Consider using internal memory. |
||||
|
||||
Using less memory might also improve speed as cache misses will be |
||||
reduced. A drop in MIPs and memory approaching 50% should be |
||||
possible. |
||||
|
||||
The foreground and background filters currenlty use a total of |
||||
about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo |
||||
can. |
||||
*/ |
||||
|
||||
#if defined(USE_MMX) || defined(USE_SSE2) |
||||
#include "mmx.h" |
||||
#endif |
||||
|
||||
/*!
|
||||
16 bit integer FIR descriptor. This defines the working state for a single |
||||
instance of an FIR filter using 16 bit integer coefficients. |
||||
*/ |
||||
typedef struct { |
||||
int taps; |
||||
int curr_pos; |
||||
const int16_t *coeffs; |
||||
int16_t *history; |
||||
} fir16_state_t; |
||||
|
||||
/*!
|
||||
32 bit integer FIR descriptor. This defines the working state for a single |
||||
instance of an FIR filter using 32 bit integer coefficients, and filtering |
||||
16 bit integer data. |
||||
*/ |
||||
typedef struct { |
||||
int taps; |
||||
int curr_pos; |
||||
const int32_t *coeffs; |
||||
int16_t *history; |
||||
} fir32_state_t; |
||||
|
||||
/*!
|
||||
Floating point FIR descriptor. This defines the working state for a single |
||||
instance of an FIR filter using floating point coefficients and data. |
||||
*/ |
||||
typedef struct { |
||||
int taps; |
||||
int curr_pos; |
||||
const float *coeffs; |
||||
float *history; |
||||
} fir_float_state_t; |
||||
|
||||
static __inline__ const int16_t *fir16_create(fir16_state_t * fir, |
||||
const int16_t * coeffs, int taps) |
||||
{ |
||||
fir->taps = taps; |
||||
fir->curr_pos = taps - 1; |
||||
fir->coeffs = coeffs; |
||||
#if defined(USE_MMX) || defined(USE_SSE2) || defined(__bfin__) |
||||
fir->history = kcalloc(2 * taps, sizeof(int16_t), GFP_KERNEL); |
||||
#else |
||||
fir->history = kcalloc(taps, sizeof(int16_t), GFP_KERNEL); |
||||
#endif |
||||
return fir->history; |
||||
} |
||||
|
||||
static __inline__ void fir16_flush(fir16_state_t * fir) |
||||
{ |
||||
#if defined(USE_MMX) || defined(USE_SSE2) || defined(__bfin__) |
||||
memset(fir->history, 0, 2 * fir->taps * sizeof(int16_t)); |
||||
#else |
||||