diff --git a/Transceiver52M/Makefile.am b/Transceiver52M/Makefile.am index d002b041..c03116e9 100644 --- a/Transceiver52M/Makefile.am +++ b/Transceiver52M/Makefile.am @@ -21,10 +21,13 @@ include $(top_srcdir)/Makefile.common -AM_CFLAGS = $(STD_DEFINES_AND_INCLUDES) -std=gnu99 -march=native -AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) +AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) -I./common AM_CXXFLAGS = -ldl -lpthread +SUBDIRS = x86 + +ARCH_LA = x86/libarch.la + #UHD wins if both are defined if UHD AM_CPPFLAGS += $(UHD_CFLAGS) @@ -52,9 +55,7 @@ COMMON_SOURCES = \ radioClock.cpp \ sigProcLib.cpp \ Transceiver.cpp \ - DummyLoad.cpp \ - convolve.c \ - convert.c + DummyLoad.cpp libtransceiver_la_SOURCES = \ $(COMMON_SOURCES) \ @@ -79,8 +80,8 @@ noinst_HEADERS = \ rcvLPF_651.h \ sendLPF_961.h \ Resampler.h \ - convolve.h \ - convert.h + common/convolve.h \ + common/convert.h USRPping_SOURCES = USRPping.cpp USRPping_LDADD = \ @@ -90,12 +91,14 @@ USRPping_LDADD = \ transceiver_SOURCES = runTransceiver.cpp transceiver_LDADD = \ libtransceiver.la \ + $(ARCH_LA) \ $(GSM_LA) \ $(COMMON_LA) $(SQLITE_LA) sigProcLibTest_SOURCES = sigProcLibTest.cpp sigProcLibTest_LDADD = \ libtransceiver.la \ + $(ARCH_LA) \ $(GSM_LA) \ $(COMMON_LA) $(SQLITE_LA) diff --git a/Transceiver52M/convert.h b/Transceiver52M/common/convert.h similarity index 100% rename from Transceiver52M/convert.h rename to Transceiver52M/common/convert.h diff --git a/Transceiver52M/convolve.h b/Transceiver52M/common/convolve.h similarity index 100% rename from Transceiver52M/convolve.h rename to Transceiver52M/common/convolve.h diff --git a/Transceiver52M/common/convolve_base.c b/Transceiver52M/common/convolve_base.c new file mode 100644 index 00000000..41dba1c3 --- /dev/null +++ b/Transceiver52M/common/convolve_base.c @@ -0,0 +1,156 @@ +/* + * Convolution + * Copyright (C) 2012, 2013 Thomas Tsou + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* Base multiply and accumulate complex-real */ +static void mac_real(float *x, float *h, float *y) +{ + y[0] += x[0] * h[0]; + y[1] += x[1] * h[0]; +} + +/* Base multiply and accumulate complex-complex */ +static void mac_cmplx(float *x, float *h, float *y) +{ + y[0] += x[0] * h[0] - x[1] * h[1]; + y[1] += x[0] * h[1] + x[1] * h[0]; +} + +/* Base vector complex-complex multiply and accumulate */ +static void mac_real_vec_n(float *x, float *h, float *y, + int len, int step, int offset) +{ + for (int i = offset; i < len; i += step) + mac_real(&x[2 * i], &h[2 * i], y); +} + +/* Base vector complex-complex multiply and accumulate */ +static void mac_cmplx_vec_n(float *x, float *h, float *y, + int len, int step, int offset) +{ + for (int i = offset; i < len; i += step) + mac_cmplx(&x[2 * i], &h[2 * i], y); +} + +/* Base complex-real convolution */ +int _base_convolve_real(float *x, int x_len, + float *h, int h_len, + float *y, int y_len, + int start, int len, + int step, int offset) +{ + for (int i = 0; i < len; i++) { + mac_real_vec_n(&x[2 * (i - (h_len - 1) + start)], + h, + &y[2 * i], h_len, + step, offset); + } + + return len; +} + +/* Base complex-complex convolution */ +int _base_convolve_complex(float *x, int x_len, + float *h, int h_len, + float *y, int y_len, + int start, int len, + int step, int offset) +{ + for (int i = 0; i < len; i++) { + mac_cmplx_vec_n(&x[2 * (i - (h_len - 1) + start)], + h, + &y[2 * i], + h_len, step, offset); + } + + return len; +} + +/* Buffer validity checks */ +int bounds_check(int x_len, int h_len, int y_len, + int start, int len, int step) +{ + if ((x_len < 1) || (h_len < 1) || + (y_len < 1) || (len < 1) || (step < 1)) { + fprintf(stderr, "Convolve: Invalid input\n"); + return -1; + } + + if ((start + len > x_len) || (len > y_len) || (x_len < h_len)) { + fprintf(stderr, "Convolve: Boundary exception\n"); + fprintf(stderr, "start: %i, len: %i, x: %i, h: %i, y: %i\n", + start, len, x_len, h_len, y_len); + return -1; + } + + return 0; +} + +/* API: Non-aligned (no SSE) complex-real */ +int base_convolve_real(float *x, int x_len, + float *h, int h_len, + float *y, int y_len, + int start, int len, + int step, int offset) +{ + if (bounds_check(x_len, h_len, y_len, start, len, step) < 0) + return -1; + + memset(y, 0, len * 2 * sizeof(float)); + + return _base_convolve_real(x, x_len, + h, h_len, + y, y_len, + start, len, step, offset); +} + +/* API: Non-aligned (no SSE) complex-complex */ +int base_convolve_complex(float *x, int x_len, + float *h, int h_len, + float *y, int y_len, + int start, int len, + int step, int offset) +{ + if (bounds_check(x_len, h_len, y_len, start, len, step) < 0) + return -1; + + memset(y, 0, len * 2 * sizeof(float)); + + return _base_convolve_complex(x, x_len, + h, h_len, + y, y_len, + start, len, step, offset); +} + +/* Aligned filter tap allocation */ +void *convolve_h_alloc(int len) +{ +#ifdef HAVE_SSE3 + return memalign(16, len * 2 * sizeof(float)); +#else + return malloc(len * 2 * sizeof(float)); +#endif +} diff --git a/Transceiver52M/x86/Makefile.am b/Transceiver52M/x86/Makefile.am new file mode 100644 index 00000000..0621b173 --- /dev/null +++ b/Transceiver52M/x86/Makefile.am @@ -0,0 +1,8 @@ +AM_CFLAGS = -Wall -std=gnu99 -march=native -I../common + +noinst_LTLIBRARIES = libarch.la + +libarch_la_SOURCES = \ + ../common/convolve_base.c \ + convert.c \ + convolve.c diff --git a/Transceiver52M/convert.c b/Transceiver52M/x86/convert.c similarity index 99% rename from Transceiver52M/convert.c rename to Transceiver52M/x86/convert.c index dc5e748d..1d2f2087 100644 --- a/Transceiver52M/convert.c +++ b/Transceiver52M/x86/convert.c @@ -19,6 +19,7 @@ #include #include +#include "convert.h" #ifdef HAVE_CONFIG_H #include "config.h" @@ -164,7 +165,7 @@ static void convert_scale_ps_si16(short *out, float *in, float scale, int len) } #endif -#ifndef HAVE_SSE_4_1 +#ifndef HAVE_SSE3 static void convert_si16_ps(float *out, short *in, int len) { for (int i = 0; i < len; i++) diff --git a/Transceiver52M/convolve.c b/Transceiver52M/x86/convolve.c similarity index 83% rename from Transceiver52M/convolve.c rename to Transceiver52M/x86/convolve.c index 6f48ea0c..ed85d974 100644 --- a/Transceiver52M/convolve.c +++ b/Transceiver52M/x86/convolve.c @@ -20,11 +20,28 @@ #include #include #include +#include "convolve.h" #ifdef HAVE_CONFIG_H #include "config.h" #endif +/* Forward declarations from base implementation */ +int _base_convolve_real(float *x, int x_len, + float *h, int h_len, + float *y, int y_len, + int start, int len, + int step, int offset); + +int _base_convolve_complex(float *x, int x_len, + float *h, int h_len, + float *y, int y_len, + int start, int len, + int step, int offset); + +int bounds_check(int x_len, int h_len, int y_len, + int start, int len, int step); + #ifdef HAVE_SSE3 #include #include @@ -493,90 +510,6 @@ static void sse_conv_cmplx_8n(float *x, float *h, float *y, int h_len, int len) } #endif -/* Base multiply and accumulate complex-real */ -static void mac_real(float *x, float *h, float *y) -{ - y[0] += x[0] * h[0]; - y[1] += x[1] * h[0]; -} - -/* Base multiply and accumulate complex-complex */ -static void mac_cmplx(float *x, float *h, float *y) -{ - y[0] += x[0] * h[0] - x[1] * h[1]; - y[1] += x[0] * h[1] + x[1] * h[0]; -} - -/* Base vector complex-complex multiply and accumulate */ -static void mac_real_vec_n(float *x, float *h, float *y, - int len, int step, int offset) -{ - for (int i = offset; i < len; i += step) - mac_real(&x[2 * i], &h[2 * i], y); -} - -/* Base vector complex-complex multiply and accumulate */ -static void mac_cmplx_vec_n(float *x, float *h, float *y, - int len, int step, int offset) -{ - for (int i = offset; i < len; i += step) - mac_cmplx(&x[2 * i], &h[2 * i], y); -} - -/* Base complex-real convolution */ -static int _base_convolve_real(float *x, int x_len, - float *h, int h_len, - float *y, int y_len, - int start, int len, - int step, int offset) -{ - for (int i = 0; i < len; i++) { - mac_real_vec_n(&x[2 * (i - (h_len - 1) + start)], - h, - &y[2 * i], h_len, - step, offset); - } - - return len; -} - -/* Base complex-complex convolution */ -static int _base_convolve_complex(float *x, int x_len, - float *h, int h_len, - float *y, int y_len, - int start, int len, - int step, int offset) -{ - for (int i = 0; i < len; i++) { - mac_cmplx_vec_n(&x[2 * (i - (h_len - 1) + start)], - h, - &y[2 * i], - h_len, step, offset); - } - - return len; -} - -/* Buffer validity checks */ -static int bounds_check(int x_len, int h_len, int y_len, - int start, int len, int step) -{ - if ((x_len < 1) || (h_len < 1) || - (y_len < 1) || (len < 1) || (step < 1)) { - fprintf(stderr, "Convolve: Invalid input\n"); - return -1; - } - - if ((start + len > x_len) || (len > y_len) || (x_len < h_len)) { - fprintf(stderr, "Convolve: Boundary exception\n"); - fprintf(stderr, "start: %i, len: %i, x: %i, h: %i, y: %i\n", - start, len, x_len, h_len, y_len); - return -1; - } - - return 0; -} - /* API: Aligned complex-real */ int convolve_real(float *x, int x_len, float *h, int h_len, @@ -666,49 +599,3 @@ int convolve_complex(float *x, int x_len, return len; } - -/* API: Non-aligned (no SSE) complex-real */ -int base_convolve_real(float *x, int x_len, - float *h, int h_len, - float *y, int y_len, - int start, int len, - int step, int offset) -{ - if (bounds_check(x_len, h_len, y_len, start, len, step) < 0) - return -1; - - memset(y, 0, len * 2 * sizeof(float)); - - return _base_convolve_real(x, x_len, - h, h_len, - y, y_len, - start, len, step, offset); -} - -/* API: Non-aligned (no SSE) complex-complex */ -int base_convolve_complex(float *x, int x_len, - float *h, int h_len, - float *y, int y_len, - int start, int len, - int step, int offset) -{ - if (bounds_check(x_len, h_len, y_len, start, len, step) < 0) - return -1; - - memset(y, 0, len * 2 * sizeof(float)); - - return _base_convolve_complex(x, x_len, - h, h_len, - y, y_len, - start, len, step, offset); -} - -/* Aligned filter tap allocation */ -void *convolve_h_alloc(int len) -{ -#ifdef HAVE_SSE3 - return memalign(16, len * 2 * sizeof(float)); -#else - return malloc(len * 2 * sizeof(float)); -#endif -} diff --git a/configure.ac b/configure.ac index 42fcde63..2bfe8253 100644 --- a/configure.ac +++ b/configure.ac @@ -101,6 +101,7 @@ AC_CONFIG_FILES([\ CommonLibs/Makefile \ GSM/Makefile \ Transceiver52M/Makefile \ + Transceiver52M/x86/Makefile \ sqlite3/Makefile \ ])