osmo-trx/Transceiver52M/arch/x86/convert.c

/*
 * SSE type conversions
 * Copyright (C) 2013 Thomas Tsou <tom@tsou.cc>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include <malloc.h>
#include <string.h>
#include "convert.h"
#include "convert_sse_3.h"
#include "convert_sse_4_1.h"

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

/* Architecture dependant function pointers */
struct convert_cpu_context {
	void (*convert_si16_ps_16n) (float *, const short *, int);
	void (*convert_si16_ps) (float *, const short *, int);
	void (*convert_scale_ps_si16_16n)(short *, const float *, float, int);
	void (*convert_scale_ps_si16_8n)(short *, const float *, float, int);
	void (*convert_scale_ps_si16)(short *, const float *, float, int);
};

static struct convert_cpu_context c;

void convert_init(void)
{
	c.convert_scale_ps_si16_16n = base_convert_float_short;
	c.convert_scale_ps_si16_8n = base_convert_float_short;
	c.convert_scale_ps_si16 = base_convert_float_short;
	c.convert_si16_ps_16n = base_convert_short_float;
	c.convert_si16_ps = base_convert_short_float;

#ifdef HAVE___BUILTIN_CPU_SUPPORTS
#ifdef HAVE_SSE4_1
	if (__builtin_cpu_supports("sse4.1")) {
		c.convert_si16_ps_16n = &_sse_convert_si16_ps_16n;
		c.convert_si16_ps = &_sse_convert_si16_ps;
	}
#endif

#ifdef HAVE_SSE3
	if (__builtin_cpu_supports("sse3")) {
		c.convert_scale_ps_si16_16n = _sse_convert_scale_ps_si16_16n;
		c.convert_scale_ps_si16_8n = _sse_convert_scale_ps_si16_8n;
		c.convert_scale_ps_si16 = _sse_convert_scale_ps_si16;
	}
#endif
#endif
}

void convert_float_short(short *out, const float *in, float scale, int len)
{
	if (!(len % 16))
		c.convert_scale_ps_si16_16n(out, in, scale, len);
	else if (!(len % 8))
		c.convert_scale_ps_si16_8n(out, in, scale, len);
	else
		c.convert_scale_ps_si16(out, in, scale, len);
}

void convert_short_float(float *out, const short *in, int len)
{
	if (!(len % 16))
		c.convert_si16_ps_16n(out, in, len);
	else
		c.convert_si16_ps(out, in, len);
}
Transceiver52M: Add SSE floating point / integer conversion Convertions are performed in multiples of 4 or 8. All loads are considered unaligned. Signed-off-by: Thomas Tsou <tom@tsou.cc> 2013-08-21 01:24:24 +00:00			`/*`
			`* SSE type conversions`
			`* Copyright (C) 2013 Thomas Tsou <tom@tsou.cc>`
			`*`
			`* This library is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
			`* This library is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with this library; if not, write to the Free Software`
			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

			`#include <malloc.h>`
			`#include <string.h>`
Transceiver52M: Separate architecture specific files Move x86 specific files into their own directory as this area is about to get crowded with the addition of ARM support. Signed-off-by: Thomas Tsou <tom@tsou.cc> 2013-10-31 01:24:40 +00:00			`#include "convert.h"`
buildenv: Split up SSE3 and SSE4.1 code Currently we find SSE3 and SSE4.1 code mixed togehter along with generic code in one file. This introduces the risk that the compiler exidantly mixes SSE4.1 instructions into an SSE3, or even worse into a generic code path. This commit splits the SSE3 and SSE4.1 code into separate files and compiles them with the matching target options. Change-Id: I846e190e92f1258cd412d1b2d79b539e204e04b3 2017-03-20 11:08:42 +00:00			`#include "convert_sse_3.h"`
			`#include "convert_sse_4_1.h"`
Transceiver52M: Add SSE floating point / integer conversion Convertions are performed in multiples of 4 or 8. All loads are considered unaligned. Signed-off-by: Thomas Tsou <tom@tsou.cc> 2013-08-21 01:24:24 +00:00
			`#ifdef HAVE_CONFIG_H`
			`#include "config.h"`
			`#endif`

ssedetect: Add runtime CPU detection The current implementation can select the SSE support level during compiletime only. This commit adds functionality to automatically detect and switch the SSE support level and automatically switch the Implementation if the CPU does not support the required SSE level. Change-Id: Iba74f8a6e4e921ff31e4bd9f0c7c881fe547423a 2017-03-15 17:09:35 +00:00			`/* Architecture dependant function pointers */`
			`struct convert_cpu_context {`
			`void (convert_si16_ps_16n) (float , const short *, int);`
			`void (convert_si16_ps) (float , const short *, int);`
			`void (convert_scale_ps_si16_16n)(short , const float *, float, int);`
			`void (convert_scale_ps_si16_8n)(short , const float *, float, int);`
			`void (convert_scale_ps_si16)(short , const float *, float, int);`
			`};`

			`static struct convert_cpu_context c;`

			`void convert_init(void)`
Transceiver52M: Add SSE floating point / integer conversion Convertions are performed in multiples of 4 or 8. All loads are considered unaligned. Signed-off-by: Thomas Tsou <tom@tsou.cc> 2013-08-21 01:24:24 +00:00			`{`
cosmetic: remove code duplication The ARM and the X86 implementation of the conversion functions share the same, non cpu specific implementation in separate files. This commit removes the code duplication by putting the generic implementation into a convert_base.c, similar to to convolve_base.c Change-Id: Ic8d8534a343e27cde79ddc85be4998ebd0cb6e5c 2017-03-16 13:50:25 +00:00			`c.convert_scale_ps_si16_16n = base_convert_float_short;`
			`c.convert_scale_ps_si16_8n = base_convert_float_short;`
			`c.convert_scale_ps_si16 = base_convert_float_short;`
			`c.convert_si16_ps_16n = base_convert_short_float;`
			`c.convert_si16_ps = base_convert_short_float;`
ssedetect: Add runtime CPU detection The current implementation can select the SSE support level during compiletime only. This commit adds functionality to automatically detect and switch the SSE support level and automatically switch the Implementation if the CPU does not support the required SSE level. Change-Id: Iba74f8a6e4e921ff31e4bd9f0c7c881fe547423a 2017-03-15 17:09:35 +00:00
ssedetect: call __builtin_cpu_supports() only if supported Some compilers don't support the __builtin_cpu_supports built-in, so let's make them able to compile the project anyway. Change-Id: I0c90402d8e4c9f196c54b066ff30891c8de3ad2b 2017-05-19 22:46:51 +00:00			`#ifdef HAVE___BUILTIN_CPU_SUPPORTS`
ssedetect: Add runtime CPU detection The current implementation can select the SSE support level during compiletime only. This commit adds functionality to automatically detect and switch the SSE support level and automatically switch the Implementation if the CPU does not support the required SSE level. Change-Id: Iba74f8a6e4e921ff31e4bd9f0c7c881fe547423a 2017-03-15 17:09:35 +00:00			`#ifdef HAVE_SSE4_1`
			`if (__builtin_cpu_supports("sse4.1")) {`
			`c.convert_si16_ps_16n = &_sse_convert_si16_ps_16n;`
			`c.convert_si16_ps = &_sse_convert_si16_ps;`
			`}`
			`#endif`
cosmetic: Make parameter lists uniform The non-sse implementation and the sse implementation of the convert and convolve functions have different parameter lists. This makes it difficult to use function pointers in order to select the right function depending on the SSE-Level and CPU. This commit uniformizes the parameter lists in preparation for planned runtime cpu detection support Change-Id: Ice063b89791537c4b591751f12f5ef5c413a2d27 2017-03-15 11:39:25 +00:00
Transceiver52M: Add SSE floating point / integer conversion Convertions are performed in multiples of 4 or 8. All loads are considered unaligned. Signed-off-by: Thomas Tsou <tom@tsou.cc> 2013-08-21 01:24:24 +00:00			`#ifdef HAVE_SSE3`
ssedetect: Add runtime CPU detection The current implementation can select the SSE support level during compiletime only. This commit adds functionality to automatically detect and switch the SSE support level and automatically switch the Implementation if the CPU does not support the required SSE level. Change-Id: Iba74f8a6e4e921ff31e4bd9f0c7c881fe547423a 2017-03-15 17:09:35 +00:00			`if (__builtin_cpu_supports("sse3")) {`
			`c.convert_scale_ps_si16_16n = _sse_convert_scale_ps_si16_16n;`
			`c.convert_scale_ps_si16_8n = _sse_convert_scale_ps_si16_8n;`
			`c.convert_scale_ps_si16 = _sse_convert_scale_ps_si16;`
			`}`
			`#endif`
ssedetect: call __builtin_cpu_supports() only if supported Some compilers don't support the __builtin_cpu_supports built-in, so let's make them able to compile the project anyway. Change-Id: I0c90402d8e4c9f196c54b066ff30891c8de3ad2b 2017-05-19 22:46:51 +00:00			`#endif`
ssedetect: Add runtime CPU detection The current implementation can select the SSE support level during compiletime only. This commit adds functionality to automatically detect and switch the SSE support level and automatically switch the Implementation if the CPU does not support the required SSE level. Change-Id: Iba74f8a6e4e921ff31e4bd9f0c7c881fe547423a 2017-03-15 17:09:35 +00:00			`}`

			`void convert_float_short(short out, const float in, float scale, int len)`
			`{`
Transceiver52M: Add SSE floating point / integer conversion Convertions are performed in multiples of 4 or 8. All loads are considered unaligned. Signed-off-by: Thomas Tsou <tom@tsou.cc> 2013-08-21 01:24:24 +00:00			`if (!(len % 16))`
ssedetect: Add runtime CPU detection The current implementation can select the SSE support level during compiletime only. This commit adds functionality to automatically detect and switch the SSE support level and automatically switch the Implementation if the CPU does not support the required SSE level. Change-Id: Iba74f8a6e4e921ff31e4bd9f0c7c881fe547423a 2017-03-15 17:09:35 +00:00			`c.convert_scale_ps_si16_16n(out, in, scale, len);`
Transceiver52M: Add SSE floating point / integer conversion Convertions are performed in multiples of 4 or 8. All loads are considered unaligned. Signed-off-by: Thomas Tsou <tom@tsou.cc> 2013-08-21 01:24:24 +00:00			`else if (!(len % 8))`
ssedetect: Add runtime CPU detection The current implementation can select the SSE support level during compiletime only. This commit adds functionality to automatically detect and switch the SSE support level and automatically switch the Implementation if the CPU does not support the required SSE level. Change-Id: Iba74f8a6e4e921ff31e4bd9f0c7c881fe547423a 2017-03-15 17:09:35 +00:00			`c.convert_scale_ps_si16_8n(out, in, scale, len);`
Transceiver52M: Add SSE floating point / integer conversion Convertions are performed in multiples of 4 or 8. All loads are considered unaligned. Signed-off-by: Thomas Tsou <tom@tsou.cc> 2013-08-21 01:24:24 +00:00			`else`
ssedetect: Add runtime CPU detection The current implementation can select the SSE support level during compiletime only. This commit adds functionality to automatically detect and switch the SSE support level and automatically switch the Implementation if the CPU does not support the required SSE level. Change-Id: Iba74f8a6e4e921ff31e4bd9f0c7c881fe547423a 2017-03-15 17:09:35 +00:00			`c.convert_scale_ps_si16(out, in, scale, len);`
Transceiver52M: Add SSE floating point / integer conversion Convertions are performed in multiples of 4 or 8. All loads are considered unaligned. Signed-off-by: Thomas Tsou <tom@tsou.cc> 2013-08-21 01:24:24 +00:00			`}`

sigproc: Make convolution and convert input buffers immutable For good practice, use const specifier when applicable. Signed-off-by: Tom Tsou <tom@tsou.cc> 2015-03-25 19:55:11 +00:00			`void convert_short_float(float out, const short in, int len)`
Transceiver52M: Add SSE floating point / integer conversion Convertions are performed in multiples of 4 or 8. All loads are considered unaligned. Signed-off-by: Thomas Tsou <tom@tsou.cc> 2013-08-21 01:24:24 +00:00			`{`
			`if (!(len % 16))`
ssedetect: Add runtime CPU detection The current implementation can select the SSE support level during compiletime only. This commit adds functionality to automatically detect and switch the SSE support level and automatically switch the Implementation if the CPU does not support the required SSE level. Change-Id: Iba74f8a6e4e921ff31e4bd9f0c7c881fe547423a 2017-03-15 17:09:35 +00:00			`c.convert_si16_ps_16n(out, in, len);`
Transceiver52M: Add SSE floating point / integer conversion Convertions are performed in multiples of 4 or 8. All loads are considered unaligned. Signed-off-by: Thomas Tsou <tom@tsou.cc> 2013-08-21 01:24:24 +00:00			`else`
ssedetect: Add runtime CPU detection The current implementation can select the SSE support level during compiletime only. This commit adds functionality to automatically detect and switch the SSE support level and automatically switch the Implementation if the CPU does not support the required SSE level. Change-Id: Iba74f8a6e4e921ff31e4bd9f0c7c881fe547423a 2017-03-15 17:09:35 +00:00			`c.convert_si16_ps(out, in, len);`
Transceiver52M: Add SSE floating point / integer conversion Convertions are performed in multiples of 4 or 8. All loads are considered unaligned. Signed-off-by: Thomas Tsou <tom@tsou.cc> 2013-08-21 01:24:24 +00:00			`}`