/* * SSE Convolution * Copyright (C) 2012, 2013 Thomas Tsou * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. */ #include #include #include #include "convolve.h" #include "convolve_sse_3.h" #ifdef HAVE_CONFIG_H #include "config.h" #endif /* Architecture dependent function pointers */ struct convolve_cpu_context { void (*conv_cmplx_4n) (const float *, int, const float *, int, float *, int, int, int); void (*conv_cmplx_8n) (const float *, int, const float *, int, float *, int, int, int); void (*conv_cmplx) (const float *, int, const float *, int, float *, int, int, int); void (*conv_real4) (const float *, int, const float *, int, float *, int, int, int); void (*conv_real8) (const float *, int, const float *, int, float *, int, int, int); void (*conv_real12) (const float *, int, const float *, int, float *, int, int, int); void (*conv_real16) (const float *, int, const float *, int, float *, int, int, int); void (*conv_real20) (const float *, int, const float *, int, float *, int, int, int); void (*conv_real4n) (const float *, int, const float *, int, float *, int, int, int); void (*conv_real) (const float *, int, const float *, int, float *, int, int, int); }; static struct convolve_cpu_context c; /* Forward declarations from base implementation */ int _base_convolve_real(const float *x, int x_len, const float *h, int h_len, float *y, int y_len, int start, int len); int _base_convolve_complex(const float *x, int x_len, const float *h, int h_len, float *y, int y_len, int start, int len); int bounds_check(int x_len, int h_len, int y_len, int start, int len); /* API: Initialize convolve module */ void convolve_init(void) { c.conv_cmplx_4n = (void *)_base_convolve_complex; c.conv_cmplx_8n = (void *)_base_convolve_complex; c.conv_cmplx = (void *)_base_convolve_complex; c.conv_real4 = (void *)_base_convolve_real; c.conv_real8 = (void *)_base_convolve_real; c.conv_real12 = (void *)_base_convolve_real; c.conv_real16 = (void *)_base_convolve_real; c.conv_real20 = (void *)_base_convolve_real; c.conv_real4n = (void *)_base_convolve_real; c.conv_real = (void *)_base_convolve_real; #if defined(HAVE_SSE3) && defined(HAVE___BUILTIN_CPU_SUPPORTS) if (__builtin_cpu_supports("sse3")) { c.conv_cmplx_4n = sse_conv_cmplx_4n; c.conv_cmplx_8n = sse_conv_cmplx_8n; c.conv_real4 = sse_conv_real4; c.conv_real8 = sse_conv_real8; c.conv_real12 = sse_conv_real12; c.conv_real16 = sse_conv_real16; c.conv_real20 = sse_conv_real20; c.conv_real4n = sse_conv_real4n; } #endif } /* API: Aligned complex-real */ int convolve_real(const float *x, int x_len, const float *h, int h_len, float *y, int y_len, int start, int len) { #ifndef __OPTIMIZE__ if (bounds_check(x_len, h_len, y_len, start, len) < 0) return -1; #endif memset(y, 0, len * 2 * sizeof(float)); switch (h_len) { case 4: c.conv_real4(x, x_len, h, h_len, y, y_len, start, len); break; case 8: c.conv_real8(x, x_len, h, h_len, y, y_len, start, len); break; case 12: c.conv_real12(x, x_len, h, h_len, y, y_len, start, len); break; case 16: c.conv_real16(x, x_len, h, h_len, y, y_len, start, len); break; case 20: c.conv_real20(x, x_len, h, h_len, y, y_len, start, len); break; default: if (!(h_len % 4)) c.conv_real4n(x, x_len, h, h_len, y, y_len, start, len); else c.conv_real(x, x_len, h, h_len, y, y_len, start, len); } return len; } /* API: Aligned complex-complex */ int convolve_complex(const float *x, int x_len, const float *h, int h_len, float *y, int y_len, int start, int len) { #ifndef __OPTIMIZE__ if (bounds_check(x_len, h_len, y_len, start, len) < 0) return -1; #endif memset(y, 0, len * 2 * sizeof(float)); if (!(h_len % 8)) c.conv_cmplx_8n(x, x_len, h, h_len, y, y_len, start, len); else if (!(h_len % 4)) c.conv_cmplx_4n(x, x_len, h, h_len, y, y_len, start, len); else c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len); return len; }