freeswitch/libs/libcodec2/src/quantise.c

869 lines
23 KiB
C

/*---------------------------------------------------------------------------*\
FILE........: quantise.c
AUTHOR......: David Rowe
DATE CREATED: 31/5/92
Quantisation functions for the sinusoidal coder.
\*---------------------------------------------------------------------------*/
/*
All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License version 2.1, as
published by the Free Software Foundation. This program is
distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "defines.h"
#include "dump.h"
#include "quantise.h"
#include "lpc.h"
#include "lsp.h"
#include "four1.h"
#include "codebook.h"
#define LSP_DELTA1 0.01 /* grid spacing for LSP root searches */
#define MAX_CB 20 /* max number of codebooks */
/* describes each codebook */
typedef struct {
int k; /* dimension of vector */
int log2m; /* number of bits in m */
int m; /* elements in codebook */
float *fn; /* file name of text file storing the VQ */
} LSP_CB;
/* lsp_q describes entire quantiser made up of several codebooks */
#ifdef OLDER
/* 10+10+6+6 = 32 bit LSP difference split VQ */
LSP_CB lsp_q[] = {
{3, 1024, "/usr/src/freeswitch/libs/libcodec2-1.0/unittest/lspd123.txt"},
{3, 1024, "/usr/src/freeswitch/libs/libcodec2-1.0/unittest/lspd456.txt"},
{2, 64, "/usr/src/freeswitch/libs/libcodec2-1.0/unittest/lspd78.txt"},
{2, 64, "/usr/src/freeswitch/libs/libcodec2-1.0/unittest/lspd910.txt"},
{0, 0, ""}
};
#endif
LSP_CB lsp_q[] = {
{1,4,16, codebook_lsp1 },
{1,4,16, codebook_lsp2 },
{1,4,16, codebook_lsp3 },
{1,4,16, codebook_lsp4 },
{1,4,16, codebook_lsp5 },
{1,4,16, codebook_lsp6 },
{1,4,16, codebook_lsp7 },
{1,3,8, codebook_lsp8 },
{1,3,8, codebook_lsp9 },
{1,2,4, codebook_lsp10 },
{0,0,0, NULL },
};
/* ptr to each codebook */
static float *plsp_cb[MAX_CB];
/*---------------------------------------------------------------------------*\
FUNCTION HEADERS
\*---------------------------------------------------------------------------*/
float speech_to_uq_lsps(float lsp[], float ak[], float Sn[], float w[],
int order);
/*---------------------------------------------------------------------------*\
FUNCTIONS
\*---------------------------------------------------------------------------*/
int lsp_bits(int i) {
return lsp_q[i].log2m;
}
/*---------------------------------------------------------------------------*\
quantise_uniform
Simulates uniform quantising of a float.
\*---------------------------------------------------------------------------*/
void quantise_uniform(float *val, float min, float max, int bits)
{
int levels = 1 << (bits-1);
float norm;
int index;
/* hard limit to quantiser range */
printf("min: %f max: %f val: %f ", min, max, val[0]);
if (val[0] < min) val[0] = min;
if (val[0] > max) val[0] = max;
norm = (*val - min)/(max-min);
printf("%f norm: %f ", val[0], norm);
index = fabs(levels*norm + 0.5);
*val = min + index*(max-min)/levels;
printf("index %d val_: %f\n", index, val[0]);
}
/*---------------------------------------------------------------------------*\
lspd_quantise
Simulates differential lsp quantiser
\*---------------------------------------------------------------------------*/
void lsp_quantise(
float lsp[],
float lsp_[],
int order
)
{
int i;
float dlsp[LPC_MAX];
float dlsp_[LPC_MAX];
dlsp[0] = lsp[0];
for(i=1; i<order; i++)
dlsp[i] = lsp[i] - lsp[i-1];
for(i=0; i<order; i++)
dlsp_[i] = dlsp[i];
quantise_uniform(&dlsp_[0], 0.1, 0.5, 5);
lsp_[0] = dlsp_[0];
for(i=1; i<order; i++)
lsp_[i] = lsp_[i-1] + dlsp_[i];
}
/*---------------------------------------------------------------------------*\
scan_line()
This function reads a vector of floats from a line in a text file.
\*---------------------------------------------------------------------------*/
void scan_line(FILE *fp, float f[], int n)
/* FILE *fp; file ptr to text file */
/* float f[]; array of floats to return */
/* int n; number of floats in line */
{
char s[MAX_STR];
char *ps,*pe;
int i;
fgets(s,MAX_STR,fp);
ps = pe = s;
for(i=0; i<n; i++) {
while( isspace(*pe)) pe++;
while( !isspace(*pe)) pe++;
sscanf(ps,"%f",&f[i]);
ps = pe;
}
}
/*---------------------------------------------------------------------------*\
load_cb
Loads a single codebook (LSP vector quantiser) into memory.
\*---------------------------------------------------------------------------*/
void load_cb(float *source, float *cb, int k, int m)
{
int lines;
int i;
lines = 0;
for(i=0; i<m; i++) {
cb[k*lines++] = source[i];
}
}
/*---------------------------------------------------------------------------*\
quantise_init
Loads the entire LSP quantiser comprised of several vector quantisers
(codebooks).
\*---------------------------------------------------------------------------*/
void quantise_init()
{
int i,k,m;
i = 0;
while(lsp_q[i].k) {
k = lsp_q[i].k;
m = lsp_q[i].m;
plsp_cb[i] = (float*)malloc(sizeof(float)*k*m);
assert(plsp_cb[i] != NULL);
load_cb(lsp_q[i].fn, plsp_cb[i], k, m);
i++;
assert(i < MAX_CB);
}
}
/*---------------------------------------------------------------------------*\
quantise
Quantises vec by choosing the nearest vector in codebook cb, and
returns the vector index. The squared error of the quantised vector
is added to se.
\*---------------------------------------------------------------------------*/
long quantise(float cb[], float vec[], float w[], int k, int m, float *se)
/* float cb[][K]; current VQ codebook */
/* float vec[]; vector to quantise */
/* float w[]; weighting vector */
/* int k; dimension of vectors */
/* int m; size of codebook */
/* float *se; accumulated squared error */
{
float e; /* current error */
long besti; /* best index so far */
float beste; /* best error so far */
long j;
int i;
besti = 0;
beste = 1E32;
for(j=0; j<m; j++) {
e = 0.0;
for(i=0; i<k; i++)
e += pow((cb[j*k+i]-vec[i])*w[i],2.0);
if (e < beste) {
beste = e;
besti = j;
}
}
*se += beste;
return(besti);
}
static float gmin=PI;
float get_gmin(void) { return gmin; }
void min_lsp_dist(float lsp[], int order)
{
int i;
for(i=1; i<order; i++)
if ((lsp[i]-lsp[i-1]) < gmin)
gmin = lsp[i]-lsp[i-1];
}
void check_lsp_order(float lsp[], int lpc_order)
{
int i;
float tmp;
for(i=1; i<lpc_order; i++)
if (lsp[i] < lsp[i-1]) {
printf("swap %d\n",i);
tmp = lsp[i-1];
lsp[i-1] = lsp[i]-0.05;
lsp[i] = tmp+0.05;
}
}
void force_min_lsp_dist(float lsp[], int lpc_order)
{
int i;
for(i=1; i<lpc_order; i++)
if ((lsp[i]-lsp[i-1]) < 0.01) {
lsp[i] += 0.01;
}
}
/*---------------------------------------------------------------------------*\
lpc_model_amplitudes
Derive a LPC model for amplitude samples then estimate amplitude samples
from this model with optional LSP quantisation.
Returns the spectral distortion for this frame.
\*---------------------------------------------------------------------------*/
float lpc_model_amplitudes(
float Sn[], /* Input frame of speech samples */
float w[],
MODEL *model, /* sinusoidal model parameters */
int order, /* LPC model order */
int lsp_quant, /* optional LSP quantisation if non-zero */
float ak[] /* output aks */
)
{
float Wn[M];
float R[LPC_MAX+1];
float E;
int i,j;
float snr;
float lsp[LPC_MAX];
float lsp_hz[LPC_MAX];
float lsp_[LPC_MAX];
int roots; /* number of LSP roots found */
int index;
float se;
int k,m;
float *cb;
float wt[LPC_MAX];
for(i=0; i<M; i++)
Wn[i] = Sn[i]*w[i];
autocorrelate(Wn,R,M,order);
levinson_durbin(R,ak,order);
E = 0.0;
for(i=0; i<=order; i++)
E += ak[i]*R[i];
for(i=0; i<order; i++)
wt[i] = 1.0;
if (lsp_quant) {
roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1);
if (roots != order)
printf("LSP roots not found\n");
/* convert from radians to Hz to make quantisers more
human readable */
for(i=0; i<order; i++)
lsp_hz[i] = (4000.0/PI)*lsp[i];
/* simple uniform scalar quantisers */
for(i=0; i<10; i++) {
k = lsp_q[i].k;
m = lsp_q[i].m;
cb = plsp_cb[i];
index = quantise(cb, &lsp_hz[i], wt, k, m, &se);
lsp_hz[i] = cb[index*k];
}
/* experiment: simulating uniform quantisation error
for(i=0; i<order; i++)
lsp[i] += PI*(12.5/4000.0)*(1.0 - 2.0*(float)rand()/RAND_MAX);
*/
for(i=0; i<order; i++)
lsp[i] = (PI/4000.0)*lsp_hz[i];
/* Bandwidth Expansion (BW). Prevents any two LSPs getting too
close together after quantisation. We know from experiment
that LSP quantisation errors < 12.5Hz (25Hz setp size) are
inaudible so we use that as the minimum LSP separation.
*/
for(i=1; i<5; i++) {
if (lsp[i] - lsp[i-1] < PI*(12.5/4000.0))
lsp[i] = lsp[i-1] + PI*(12.5/4000.0);
}
/* as quantiser gaps increased, larger BW expansion was required
to prevent twinkly noises */
for(i=5; i<8; i++) {
if (lsp[i] - lsp[i-1] < PI*(25.0/4000.0))
lsp[i] = lsp[i-1] + PI*(25.0/4000.0);
}
for(i=8; i<order; i++) {
if (lsp[i] - lsp[i-1] < PI*(75.0/4000.0))
lsp[i] = lsp[i-1] + PI*(75.0/4000.0);
}
for(j=0; j<order; j++)
lsp_[j] = lsp[j];
lsp_to_lpc(lsp_, ak, order);
dump_lsp(lsp);
}
dump_E(E);
#ifdef SIM_QUANT
/* simulated LPC energy quantisation */
{
float e = 10.0*log10(E);
e += 2.0*(1.0 - 2.0*(float)rand()/RAND_MAX);
E = pow(10.0,e/10.0);
}
#endif
aks_to_M2(ak,order,model,E,&snr, 1); /* {ak} -> {Am} LPC decode */
return snr;
}
/*---------------------------------------------------------------------------*\
aks_to_M2()
Transforms the linear prediction coefficients to spectral amplitude
samples. This function determines A(m) from the average energy per
band using an FFT.
\*---------------------------------------------------------------------------*/
void aks_to_M2(
float ak[], /* LPC's */
int order,
MODEL *model, /* sinusoidal model parameters for this frame */
float E, /* energy term */
float *snr, /* signal to noise ratio for this frame in dB */
int dump /* true to dump sample to dump file */
)
{
COMP Pw[FFT_DEC]; /* power spectrum */
int i,m; /* loop variables */
int am,bm; /* limits of current band */
float r; /* no. rads/bin */
float Em; /* energy in band */
float Am; /* spectral amplitude sample */
float signal, noise;
r = TWO_PI/(FFT_DEC);
/* Determine DFT of A(exp(jw)) --------------------------------------------*/
for(i=0; i<FFT_DEC; i++) {
Pw[i].real = 0.0;
Pw[i].imag = 0.0;
}
for(i=0; i<=order; i++)
Pw[i].real = ak[i];
four1(&Pw[-1].imag,FFT_DEC,1);
/* Determine power spectrum P(w) = E/(A(exp(jw))^2 ------------------------*/
for(i=0; i<FFT_DEC/2; i++)
Pw[i].real = E/(Pw[i].real*Pw[i].real + Pw[i].imag*Pw[i].imag);
if (dump)
dump_Pw(Pw);
/* Determine magnitudes by linear interpolation of P(w) -------------------*/
signal = noise = 0.0;
for(m=1; m<=model->L; m++) {
am = floor((m - 0.5)*model->Wo/r + 0.5);
bm = floor((m + 0.5)*model->Wo/r + 0.5);
Em = 0.0;
for(i=am; i<bm; i++)
Em += Pw[i].real;
Am = sqrt(Em);
signal += pow(model->A[m],2.0);
noise += pow(model->A[m] - Am,2.0);
model->A[m] = Am;
}
*snr = 10.0*log10(signal/noise);
}
/*---------------------------------------------------------------------------*\
FUNCTION....: encode_Wo()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
Encodes Wo using a WO_LEVELS quantiser.
\*---------------------------------------------------------------------------*/
int encode_Wo(float Wo)
{
int index;
float Wo_min = TWO_PI/P_MAX;
float Wo_max = TWO_PI/P_MIN;
float norm;
norm = (Wo - Wo_min)/(Wo_max - Wo_min);
index = floor(WO_LEVELS * norm + 0.5);
if (index < 0 ) index = 0;
if (index > (WO_LEVELS-1)) index = WO_LEVELS-1;
return index;
}
/*---------------------------------------------------------------------------*\
FUNCTION....: decode_Wo()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
Decodes Wo using a WO_LEVELS quantiser.
\*---------------------------------------------------------------------------*/
float decode_Wo(int index)
{
float Wo_min = TWO_PI/P_MAX;
float Wo_max = TWO_PI/P_MIN;
float step;
float Wo;
step = (Wo_max - Wo_min)/WO_LEVELS;
Wo = Wo_min + step*(index);
return Wo;
}
/*---------------------------------------------------------------------------*\
FUNCTION....: speech_to_uq_lsps()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
Analyse a windowed frame of time domain speech to determine LPCs
which are the converted to LSPs for quantisation and transmission
over the channel.
\*---------------------------------------------------------------------------*/
float speech_to_uq_lsps(float lsp[],
float ak[],
float Sn[],
float w[],
int order
)
{
int i, roots;
float Wn[M];
float R[LPC_MAX+1];
float E;
for(i=0; i<M; i++)
Wn[i] = Sn[i]*w[i];
autocorrelate(Wn, R, M, order);
levinson_durbin(R, ak, order);
E = 0.0;
for(i=0; i<=order; i++)
E += ak[i]*R[i];
roots = lpc_to_lsp(ak, order, lsp, 5, LSP_DELTA1);
// assert(roots == order);
return E;
}
/*---------------------------------------------------------------------------*\
FUNCTION....: encode_lsps()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
From a vector of unquantised (floating point) LSPs finds the quantised
LSP indexes.
\*---------------------------------------------------------------------------*/
void encode_lsps(int indexes[], float lsp[], int order)
{
int i,k,m;
float wt[1];
float lsp_hz[LPC_MAX];
float *cb, se;
/* convert from radians to Hz so we can use human readable
frequencies */
for(i=0; i<order; i++)
lsp_hz[i] = (4000.0/PI)*lsp[i];
/* simple uniform scalar quantisers */
wt[0] = 1.0;
for(i=0; i<order; i++) {
k = lsp_q[i].k;
m = lsp_q[i].m;
cb = plsp_cb[i];
indexes[i] = quantise(cb, &lsp_hz[i], wt, k, m, &se);
}
}
/*---------------------------------------------------------------------------*\
FUNCTION....: decode_lsps()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
From a vector of quantised LSP indexes, returns the quantised
(floating point) LSPs.
\*---------------------------------------------------------------------------*/
void decode_lsps(float lsp[], int indexes[], int order)
{
int i,k;
float lsp_hz[LPC_MAX];
float *cb;
for(i=0; i<order; i++) {
k = lsp_q[i].k;
cb = plsp_cb[i];
lsp_hz[i] = cb[indexes[i]*k];
}
/* convert back to radians */
for(i=0; i<order; i++)
lsp[i] = (PI/4000.0)*lsp_hz[i];
}
/*---------------------------------------------------------------------------*\
FUNCTION....: bw_expand_lsps()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
Applies Bandwidth Expansion (BW) to a vector of LSPs. Prevents any
two LSPs getting too close together after quantisation. We know
from experiment that LSP quantisation errors < 12.5Hz (25Hz setp
size) are inaudible so we use that as the minimum LSP separation.
\*---------------------------------------------------------------------------*/
void bw_expand_lsps(float lsp[],
int order
)
{
int i;
for(i=1; i<5; i++) {
if (lsp[i] - lsp[i-1] < PI*(12.5/4000.0))
lsp[i] = lsp[i-1] + PI*(12.5/4000.0);
}
/* As quantiser gaps increased, larger BW expansion was required
to prevent twinkly noises. This may need more experiment for
different quanstisers.
*/
for(i=5; i<8; i++) {
if (lsp[i] - lsp[i-1] < PI*(25.0/4000.0))
lsp[i] = lsp[i-1] + PI*(25.0/4000.0);
}
for(i=8; i<order; i++) {
if (lsp[i] - lsp[i-1] < PI*(75.0/4000.0))
lsp[i] = lsp[i-1] + PI*(75.0/4000.0);
}
}
/*---------------------------------------------------------------------------*\
FUNCTION....: need_lpc_correction()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
Determine if we need LPC correction of first harmonic.
\*---------------------------------------------------------------------------*/
int need_lpc_correction(MODEL *model, float ak[], float E)
{
MODEL tmp;
float snr,E1;
/* Find amplitudes so we can check if we need LPC correction.
TODO: replace call to aks_to_M2() by a single DFT calculation
of E/A(exp(jWo)) to make much more efficient. We only need
A[1].
*/
memcpy(&tmp, model, sizeof(MODEL));
aks_to_M2(ak, LPC_ORD, &tmp, E, &snr, 0);
/*
Attenuate fundamental by 30dB if F0 < 150 Hz and LPC modelling
error for A[1] is larger than 6dB.
LPC modelling often makes big errors on 1st harmonic, for example
when the fundamental has been removed by analog high pass
filtering before sampling. However on unfiltered speech from
high quality sources we would like to keep the fundamental to
maintain the speech quality. So we check the error in A[1] and
attenuate it if the error is large to avoid annoying low
frequency energy after LPC modelling.
This requires a single bit to quantise, on top of the other
spectral magnitude bits (i.e. LSP bits + 1 total).
*/
E1 = fabs(20.0*log10(model->A[1]) - 20.0*log10(tmp.A[1]));
if (E1 > 6.0)
return 1;
else
return 0;
}
/*---------------------------------------------------------------------------*\
FUNCTION....: apply_lpc_correction()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
Apply first harmonic LPC correction at decoder.
\*---------------------------------------------------------------------------*/
void apply_lpc_correction(MODEL *model, int lpc_correction)
{
if (lpc_correction) {
if (model->Wo < (PI*150.0/4000)) {
model->A[1] *= 0.032;
}
}
}
/*---------------------------------------------------------------------------*\
FUNCTION....: encode_energy()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
Encodes LPC energy using an E_LEVELS quantiser.
\*---------------------------------------------------------------------------*/
int encode_energy(float e)
{
int index;
float e_min = E_MIN_DB;
float e_max = E_MAX_DB;
float norm;
e = 10.0*log10(e);
norm = (e - e_min)/(e_max - e_min);
index = floor(E_LEVELS * norm + 0.5);
if (index < 0 ) index = 0;
if (index > (E_LEVELS-1)) index = E_LEVELS-1;
return index;
}
/*---------------------------------------------------------------------------*\
FUNCTION....: decode_energy()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
Decodes energy using a WO_BITS quantiser.
\*---------------------------------------------------------------------------*/
float decode_energy(int index)
{
float e_min = E_MIN_DB;
float e_max = E_MAX_DB;
float step;
float e;
step = (e_max - e_min)/E_LEVELS;
e = e_min + step*(index);
e = pow(10.0,e/10.0);
return e;
}
/*---------------------------------------------------------------------------*\
FUNCTION....: encode_amplitudes()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
Time domain LPC is used model the amplitudes which are then
converted to LSPs and quantised. So we don't actually encode the
amplitudes directly, rather we derive an equivalent representation
from the time domain speech.
\*---------------------------------------------------------------------------*/
void encode_amplitudes(int lsp_indexes[],
int *lpc_correction,
int *energy_index,
MODEL *model,
float Sn[],
float w[])
{
float lsps[LPC_ORD];
float ak[LPC_ORD+1];
float e;
e = speech_to_uq_lsps(lsps, ak, Sn, w, LPC_ORD);
encode_lsps(lsp_indexes, lsps, LPC_ORD);
*lpc_correction = need_lpc_correction(model, ak, e);
*energy_index = encode_energy(e);
}
/*---------------------------------------------------------------------------*\
FUNCTION....: decode_amplitudes()
AUTHOR......: David Rowe
DATE CREATED: 22/8/2010
Given the amplitude quantiser indexes recovers the harmonic
amplitudes.
\*---------------------------------------------------------------------------*/
float decode_amplitudes(MODEL *model,
float ak[],
int lsp_indexes[],
int lpc_correction,
int energy_index
)
{
float lsps[LPC_ORD];
float e;
float snr;
decode_lsps(lsps, lsp_indexes, LPC_ORD);
bw_expand_lsps(lsps, LPC_ORD);
lsp_to_lpc(lsps, ak, LPC_ORD);
e = decode_energy(energy_index);
aks_to_M2(ak, LPC_ORD, model, e, &snr, 1);
apply_lpc_correction(model, lpc_correction);
return snr;
}