84 lines
2.6 KiB
C
84 lines
2.6 KiB
C
/*
|
|
* SSE type conversions
|
|
* Copyright (C) 2013 Thomas Tsou <tom@tsou.cc>
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include <malloc.h>
|
|
#include <string.h>
|
|
#include "convert.h"
|
|
#include "convert_sse_3.h"
|
|
#include "convert_sse_4_1.h"
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
/* Architecture dependant function pointers */
|
|
struct convert_cpu_context {
|
|
void (*convert_si16_ps_16n) (float *, const short *, int);
|
|
void (*convert_si16_ps) (float *, const short *, int);
|
|
void (*convert_scale_ps_si16_16n)(short *, const float *, float, int);
|
|
void (*convert_scale_ps_si16_8n)(short *, const float *, float, int);
|
|
void (*convert_scale_ps_si16)(short *, const float *, float, int);
|
|
};
|
|
|
|
static struct convert_cpu_context c;
|
|
|
|
void convert_init(void)
|
|
{
|
|
c.convert_scale_ps_si16_16n = base_convert_float_short;
|
|
c.convert_scale_ps_si16_8n = base_convert_float_short;
|
|
c.convert_scale_ps_si16 = base_convert_float_short;
|
|
c.convert_si16_ps_16n = base_convert_short_float;
|
|
c.convert_si16_ps = base_convert_short_float;
|
|
|
|
#ifdef HAVE___BUILTIN_CPU_SUPPORTS
|
|
#ifdef HAVE_SSE4_1
|
|
if (__builtin_cpu_supports("sse4.1")) {
|
|
c.convert_si16_ps_16n = &_sse_convert_si16_ps_16n;
|
|
c.convert_si16_ps = &_sse_convert_si16_ps;
|
|
}
|
|
#endif
|
|
|
|
#ifdef HAVE_SSE3
|
|
if (__builtin_cpu_supports("sse3")) {
|
|
c.convert_scale_ps_si16_16n = _sse_convert_scale_ps_si16_16n;
|
|
c.convert_scale_ps_si16_8n = _sse_convert_scale_ps_si16_8n;
|
|
c.convert_scale_ps_si16 = _sse_convert_scale_ps_si16;
|
|
}
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
void convert_float_short(short *out, const float *in, float scale, int len)
|
|
{
|
|
if (!(len % 16))
|
|
c.convert_scale_ps_si16_16n(out, in, scale, len);
|
|
else if (!(len % 8))
|
|
c.convert_scale_ps_si16_8n(out, in, scale, len);
|
|
else
|
|
c.convert_scale_ps_si16(out, in, scale, len);
|
|
}
|
|
|
|
void convert_short_float(float *out, const short *in, int len)
|
|
{
|
|
if (!(len % 16))
|
|
c.convert_si16_ps_16n(out, in, len);
|
|
else
|
|
c.convert_si16_ps(out, in, len);
|
|
}
|