2013-08-20 23:31:14 +00:00
|
|
|
/*
|
|
|
|
* SSE Convolution
|
|
|
|
* Copyright (C) 2012, 2013 Thomas Tsou <tom@tsou.cc>
|
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <malloc.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdio.h>
|
2013-10-31 01:24:40 +00:00
|
|
|
#include "convolve.h"
|
2017-03-20 11:08:42 +00:00
|
|
|
#include "convolve_sse_3.h"
|
2013-08-20 23:31:14 +00:00
|
|
|
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
#include "config.h"
|
|
|
|
#endif
|
|
|
|
|
2019-10-13 17:08:00 +00:00
|
|
|
/* Architecture dependent function pointers */
|
2017-03-15 17:09:35 +00:00
|
|
|
struct convolve_cpu_context {
|
|
|
|
void (*conv_cmplx_4n) (const float *, int, const float *, int, float *,
|
2018-12-20 18:10:26 +00:00
|
|
|
int, int, int);
|
2017-03-15 17:09:35 +00:00
|
|
|
void (*conv_cmplx_8n) (const float *, int, const float *, int, float *,
|
2018-12-20 18:10:26 +00:00
|
|
|
int, int, int);
|
2017-03-15 17:09:35 +00:00
|
|
|
void (*conv_cmplx) (const float *, int, const float *, int, float *,
|
2018-12-20 18:10:26 +00:00
|
|
|
int, int, int);
|
2017-03-15 17:09:35 +00:00
|
|
|
void (*conv_real4) (const float *, int, const float *, int, float *,
|
2018-12-20 18:10:26 +00:00
|
|
|
int, int, int);
|
2017-03-15 17:09:35 +00:00
|
|
|
void (*conv_real8) (const float *, int, const float *, int, float *,
|
2018-12-20 18:10:26 +00:00
|
|
|
int, int, int);
|
2017-03-15 17:09:35 +00:00
|
|
|
void (*conv_real12) (const float *, int, const float *, int, float *,
|
2018-12-20 18:10:26 +00:00
|
|
|
int, int, int);
|
2017-03-15 17:09:35 +00:00
|
|
|
void (*conv_real16) (const float *, int, const float *, int, float *,
|
2018-12-20 18:10:26 +00:00
|
|
|
int, int, int);
|
2017-03-15 17:09:35 +00:00
|
|
|
void (*conv_real20) (const float *, int, const float *, int, float *,
|
2018-12-20 18:10:26 +00:00
|
|
|
int, int, int);
|
2017-03-15 17:09:35 +00:00
|
|
|
void (*conv_real4n) (const float *, int, const float *, int, float *,
|
2018-12-20 18:10:26 +00:00
|
|
|
int, int, int);
|
2017-03-15 17:09:35 +00:00
|
|
|
void (*conv_real) (const float *, int, const float *, int, float *, int,
|
2018-12-20 18:10:26 +00:00
|
|
|
int, int);
|
2017-03-15 17:09:35 +00:00
|
|
|
};
|
|
|
|
static struct convolve_cpu_context c;
|
|
|
|
|
2013-10-31 01:24:40 +00:00
|
|
|
/* Forward declarations from base implementation */
|
2015-03-25 19:55:11 +00:00
|
|
|
int _base_convolve_real(const float *x, int x_len,
|
|
|
|
const float *h, int h_len,
|
2013-10-31 01:24:40 +00:00
|
|
|
float *y, int y_len,
|
2018-12-20 18:10:26 +00:00
|
|
|
int start, int len);
|
2013-10-31 01:24:40 +00:00
|
|
|
|
2015-03-25 19:55:11 +00:00
|
|
|
int _base_convolve_complex(const float *x, int x_len,
|
|
|
|
const float *h, int h_len,
|
2013-10-31 01:24:40 +00:00
|
|
|
float *y, int y_len,
|
2018-12-20 18:10:26 +00:00
|
|
|
int start, int len);
|
2013-10-31 01:24:40 +00:00
|
|
|
|
|
|
|
int bounds_check(int x_len, int h_len, int y_len,
|
2018-12-20 18:10:26 +00:00
|
|
|
int start, int len);
|
2013-10-31 01:24:40 +00:00
|
|
|
|
2019-10-13 17:08:00 +00:00
|
|
|
/* API: Initialize convolve module */
|
2017-03-15 17:09:35 +00:00
|
|
|
void convolve_init(void)
|
|
|
|
{
|
|
|
|
c.conv_cmplx_4n = (void *)_base_convolve_complex;
|
|
|
|
c.conv_cmplx_8n = (void *)_base_convolve_complex;
|
|
|
|
c.conv_cmplx = (void *)_base_convolve_complex;
|
|
|
|
c.conv_real4 = (void *)_base_convolve_real;
|
|
|
|
c.conv_real8 = (void *)_base_convolve_real;
|
|
|
|
c.conv_real12 = (void *)_base_convolve_real;
|
|
|
|
c.conv_real16 = (void *)_base_convolve_real;
|
|
|
|
c.conv_real20 = (void *)_base_convolve_real;
|
|
|
|
c.conv_real4n = (void *)_base_convolve_real;
|
|
|
|
c.conv_real = (void *)_base_convolve_real;
|
|
|
|
|
2017-05-19 22:46:51 +00:00
|
|
|
#if defined(HAVE_SSE3) && defined(HAVE___BUILTIN_CPU_SUPPORTS)
|
2017-03-15 17:09:35 +00:00
|
|
|
if (__builtin_cpu_supports("sse3")) {
|
|
|
|
c.conv_cmplx_4n = sse_conv_cmplx_4n;
|
|
|
|
c.conv_cmplx_8n = sse_conv_cmplx_8n;
|
|
|
|
c.conv_real4 = sse_conv_real4;
|
|
|
|
c.conv_real8 = sse_conv_real8;
|
|
|
|
c.conv_real12 = sse_conv_real12;
|
|
|
|
c.conv_real16 = sse_conv_real16;
|
|
|
|
c.conv_real20 = sse_conv_real20;
|
|
|
|
c.conv_real4n = sse_conv_real4n;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2013-08-20 23:31:14 +00:00
|
|
|
/* API: Aligned complex-real */
|
2015-03-25 19:55:11 +00:00
|
|
|
int convolve_real(const float *x, int x_len,
|
|
|
|
const float *h, int h_len,
|
2018-12-20 18:10:26 +00:00
|
|
|
float *y, int y_len, int start, int len)
|
2013-08-20 23:31:14 +00:00
|
|
|
{
|
2020-08-14 01:11:22 +00:00
|
|
|
#ifndef __OPTIMIZE__
|
2018-12-20 18:10:26 +00:00
|
|
|
if (bounds_check(x_len, h_len, y_len, start, len) < 0)
|
2013-08-20 23:31:14 +00:00
|
|
|
return -1;
|
2020-08-14 01:11:22 +00:00
|
|
|
#endif
|
2020-09-18 18:11:14 +00:00
|
|
|
memset(y, 0, len * 2 * sizeof(float));
|
|
|
|
|
2018-12-20 18:10:26 +00:00
|
|
|
switch (h_len) {
|
|
|
|
case 4:
|
|
|
|
c.conv_real4(x, x_len, h, h_len, y, y_len, start, len);
|
|
|
|
break;
|
|
|
|
case 8:
|
|
|
|
c.conv_real8(x, x_len, h, h_len, y, y_len, start, len);
|
|
|
|
break;
|
|
|
|
case 12:
|
|
|
|
c.conv_real12(x, x_len, h, h_len, y, y_len, start, len);
|
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
c.conv_real16(x, x_len, h, h_len, y, y_len, start, len);
|
|
|
|
break;
|
|
|
|
case 20:
|
|
|
|
c.conv_real20(x, x_len, h, h_len, y, y_len, start, len);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
if (!(h_len % 4))
|
|
|
|
c.conv_real4n(x, x_len, h, h_len, y, y_len,
|
|
|
|
start, len);
|
|
|
|
else
|
|
|
|
c.conv_real(x, x_len, h, h_len, y, y_len, start,
|
|
|
|
len);
|
|
|
|
}
|
2013-08-20 23:31:14 +00:00
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* API: Aligned complex-complex */
|
2015-03-25 19:55:11 +00:00
|
|
|
int convolve_complex(const float *x, int x_len,
|
|
|
|
const float *h, int h_len,
|
2013-08-20 23:31:14 +00:00
|
|
|
float *y, int y_len,
|
2018-12-20 18:10:26 +00:00
|
|
|
int start, int len)
|
2013-08-20 23:31:14 +00:00
|
|
|
{
|
2020-08-14 01:11:22 +00:00
|
|
|
#ifndef __OPTIMIZE__
|
2018-12-20 18:10:26 +00:00
|
|
|
if (bounds_check(x_len, h_len, y_len, start, len) < 0)
|
2013-08-20 23:31:14 +00:00
|
|
|
return -1;
|
2020-08-14 01:11:22 +00:00
|
|
|
#endif
|
2020-09-18 18:11:14 +00:00
|
|
|
memset(y, 0, len * 2 * sizeof(float));
|
|
|
|
|
2018-12-20 18:10:26 +00:00
|
|
|
if (!(h_len % 8))
|
|
|
|
c.conv_cmplx_8n(x, x_len, h, h_len, y, y_len, start, len);
|
|
|
|
else if (!(h_len % 4))
|
|
|
|
c.conv_cmplx_4n(x, x_len, h, h_len, y, y_len, start, len);
|
|
|
|
else
|
|
|
|
c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len);
|
2013-08-20 23:31:14 +00:00
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|