@ -26,6 +26,31 @@
# include "config.h"
# endif
/* Architecture dependant function pointers */
struct convolve_cpu_context {
void ( * conv_cmplx_4n ) ( const float * , int , const float * , int , float * ,
int , int , int , int , int ) ;
void ( * conv_cmplx_8n ) ( const float * , int , const float * , int , float * ,
int , int , int , int , int ) ;
void ( * conv_cmplx ) ( const float * , int , const float * , int , float * ,
int , int , int , int , int ) ;
void ( * conv_real4 ) ( const float * , int , const float * , int , float * ,
int , int , int , int , int ) ;
void ( * conv_real8 ) ( const float * , int , const float * , int , float * ,
int , int , int , int , int ) ;
void ( * conv_real12 ) ( const float * , int , const float * , int , float * ,
int , int , int , int , int ) ;
void ( * conv_real16 ) ( const float * , int , const float * , int , float * ,
int , int , int , int , int ) ;
void ( * conv_real20 ) ( const float * , int , const float * , int , float * ,
int , int , int , int , int ) ;
void ( * conv_real4n ) ( const float * , int , const float * , int , float * ,
int , int , int , int , int ) ;
void ( * conv_real ) ( const float * , int , const float * , int , float * , int ,
int , int , int , int ) ;
} ;
static struct convolve_cpu_context c ;
/* Forward declarations from base implementation */
int _base_convolve_real ( const float * x , int x_len ,
const float * h , int h_len ,
@ -565,45 +590,77 @@ static void sse_conv_cmplx_8n(const float *x, int x_len,
}
# endif
/* API: Initalize convolve module */
void convolve_init ( void )
{
c . conv_cmplx_4n = ( void * ) _base_convolve_complex ;
c . conv_cmplx_8n = ( void * ) _base_convolve_complex ;
c . conv_cmplx = ( void * ) _base_convolve_complex ;
c . conv_real4 = ( void * ) _base_convolve_real ;
c . conv_real8 = ( void * ) _base_convolve_real ;
c . conv_real12 = ( void * ) _base_convolve_real ;
c . conv_real16 = ( void * ) _base_convolve_real ;
c . conv_real20 = ( void * ) _base_convolve_real ;
c . conv_real4n = ( void * ) _base_convolve_real ;
c . conv_real = ( void * ) _base_convolve_real ;
# ifdef HAVE_SSE3
if ( __builtin_cpu_supports ( " sse3 " ) ) {
c . conv_cmplx_4n = sse_conv_cmplx_4n ;
c . conv_cmplx_8n = sse_conv_cmplx_8n ;
c . conv_real4 = sse_conv_real4 ;
c . conv_real8 = sse_conv_real8 ;
c . conv_real12 = sse_conv_real12 ;
c . conv_real16 = sse_conv_real16 ;
c . conv_real20 = sse_conv_real20 ;
c . conv_real4n = sse_conv_real4n ;
}
# endif
}
/* API: Aligned complex-real */
int convolve_real ( const float * x , int x_len ,
const float * h , int h_len ,
float * y , int y_len , int start , int len , int step , int offset )
{
void ( * conv_func ) ( const float * , int , const float * , int , float * , int ,
int , int , int , int ) = ( void * ) _base_convolve_real ;
if ( bounds_check ( x_len , h_len , y_len , start , len , step ) < 0 )
return - 1 ;
memset ( y , 0 , len * 2 * sizeof ( float ) ) ;
# ifdef HAVE_SSE3
if ( step < = 4 ) {
switch ( h_len ) {
case 4 :
conv_func = sse_conv_real4 ;
c . conv_real4 ( x , x_len , h , h_len , y , y_len , start , len ,
step , offset ) ;
break ;
case 8 :
conv_func = sse_conv_real8 ;
c . conv_real8 ( x , x_len , h , h_len , y , y_len , start , len ,
step , offset ) ;
break ;
case 12 :
conv_func = sse_conv_real12 ;
c . conv_real12 ( x , x_len , h , h_len , y , y_len , start , len ,
step , offset ) ;
break ;
case 16 :
conv_func = sse_conv_real16 ;
c . conv_real16 ( x , x_len , h , h_len , y , y_len , start , len ,
step , offset ) ;
break ;
case 20 :
conv_func = sse_conv_real20 ;
c . conv_real20 ( x , x_len , h , h_len , y , y_len , start , len ,
step , offset ) ;
break ;
default :
if ( ! ( h_len % 4 ) )
conv_func = sse_conv_real4n ;
c . conv_real4n ( x , x_len , h , h_len , y , y_len ,
start , len , step , offset ) ;
else
c . conv_real ( x , x_len , h , h_len , y , y_len , start ,
len , step , offset ) ;
}
}
# endif
conv_func ( x , x_len , h , h_len , y , y_len , start , len , step , offset ) ;
} else
c . conv_real ( x , x_len , h , h_len , y , y_len , start , len , step ,
offset ) ;
return len ;
}
@ -614,25 +671,24 @@ int convolve_complex(const float *x, int x_len,
float * y , int y_len ,
int start , int len , int step , int offset )
{
void ( * conv_func ) ( const float * , int , const float * , int , float * , int ,
int , int , int , int ) =
( void * ) _base_convolve_complex ;
if ( bounds_check ( x_len , h_len , y_len , start , len , step ) < 0 )
return - 1 ;
memset ( y , 0 , len * 2 * sizeof ( float ) ) ;
# ifdef HAVE_SSE3
if ( step < = 4 ) {
if ( ! ( h_len % 8 ) )
conv_func = sse_conv_cmplx_8n ;
c . conv_cmplx_8n ( x , x_len , h , h_len , y , y_len , start ,
len , step , offset ) ;
else if ( ! ( h_len % 4 ) )
conv_func = sse_conv_cmplx_4n ;
}
# endif
conv_func ( x , x_len , h , h_len , y , y_len , start , len , step , offset ) ;
c . conv_cmplx_4n ( x , x_len , h , h_len , y , y_len , start ,
len , step , offset ) ;
else
c . conv_cmplx ( x , x_len , h , h_len , y , y_len , start , len ,
step , offset ) ;
} else
c . conv_cmplx ( x , x_len , h , h_len , y , y_len , start , len , step ,
offset ) ;
return len ;
}