Transceiver52M: Replace convolve and related calls with SSE implementation

This large patch replaced the convolve() call with an SSE vector
enabled version. The lower C and SSE intrinsic based code operates
on fixed and aligned vectors for the filter taps. The storage format
of interleaved I/Q for both complex and real vectors is maintained.

SSE filter tap values must:

  1. Start 16-byte aligned
  2. Number with a multiple of 4 between 4 and 20 for real taps
  3. Number with a multiple of 4 for complex taps

Non-compliant values will fall back to non-SSE usage. Fixed length
iterators mean that head and tail cases may require reallocation of
the input vector, which is automatically handled by the upper C++
interface.

Other calls are affected by these changes and adjusted or rewritten
accordingly. The underlying algorithms, however, are unchanged.

  generateGSMPulse()
  analyzeTrafficBurst()
  detectRACHBurst()

Intel SSE configuration is automatically detected and configured at
build time with Autoconf macros.

Signed-off-by: Thomas Tsou <tom@tsou.cc>
This commit is contained in:
Thomas Tsou 2013-08-20 19:31:14 -04:00
parent e57004d0c3
commit 3eaae80c90
12 changed files with 1569 additions and 410 deletions

View File

@ -20,6 +20,7 @@
include $(top_srcdir)/Makefile.common include $(top_srcdir)/Makefile.common
ACLOCAL_AMFLAGS = -I config
AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(USB_INCLUDES) $(WITH_INCLUDES) AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(USB_INCLUDES) $(WITH_INCLUDES)
AM_CXXFLAGS = -Wall -pthread -ldl AM_CXXFLAGS = -Wall -pthread -ldl
#AM_CXXFLAGS = -Wall -O2 -NDEBUG -pthread -ldl #AM_CXXFLAGS = -Wall -O2 -NDEBUG -pthread -ldl

View File

@ -21,19 +21,18 @@
include $(top_srcdir)/Makefile.common include $(top_srcdir)/Makefile.common
AM_CFLAGS = $(STD_DEFINES_AND_INCLUDES) -std=gnu99 -march=native
AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES)
AM_CXXFLAGS = -ldl -lpthread
#UHD wins if both are defined #UHD wins if both are defined
if UHD if UHD
AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(UHD_CFLAGS) AM_CPPFLAGS += $(UHD_CFLAGS)
else else
if USRP1 if USRP1
AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(USRP_CFLAGS) AM_CPPFLAGS += $(USRP_CFLAGS)
else
#we should never be here, as this doesn't build if one of the above
#doesn't exist
AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES)
endif endif
endif endif
AM_CXXFLAGS = -ldl -lpthread
rev2dir = $(datadir)/usrp/rev2 rev2dir = $(datadir)/usrp/rev2
rev4dir = $(datadir)/usrp/rev4 rev4dir = $(datadir)/usrp/rev4
@ -53,7 +52,8 @@ COMMON_SOURCES = \
radioClock.cpp \ radioClock.cpp \
sigProcLib.cpp \ sigProcLib.cpp \
Transceiver.cpp \ Transceiver.cpp \
DummyLoad.cpp DummyLoad.cpp \
convolve.c
libtransceiver_la_SOURCES = \ libtransceiver_la_SOURCES = \
$(COMMON_SOURCES) \ $(COMMON_SOURCES) \
@ -75,7 +75,8 @@ noinst_HEADERS = \
USRPDevice.h \ USRPDevice.h \
DummyLoad.h \ DummyLoad.h \
rcvLPF_651.h \ rcvLPF_651.h \
sendLPF_961.h sendLPF_961.h \
convolve.h
USRPping_SOURCES = USRPping.cpp USRPping_SOURCES = USRPping.cpp
USRPping_LDADD = \ USRPping_LDADD = \

714
Transceiver52M/convolve.c Normal file
View File

@ -0,0 +1,714 @@
/*
* SSE Convolution
* Copyright (C) 2012, 2013 Thomas Tsou <tom@tsou.cc>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <malloc.h>
#include <string.h>
#include <stdio.h>
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_SSE3
#include <xmmintrin.h>
#include <pmmintrin.h>
/* 4-tap SSE complex-real convolution */
static void sse_conv_real4(float *restrict x,
float *restrict h,
float *restrict y,
int len)
{
__m128 m0, m1, m2, m3, m4, m5, m6, m7;
/* Load (aligned) filter taps */
m0 = _mm_load_ps(&h[0]);
m1 = _mm_load_ps(&h[4]);
m7 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
for (int i = 0; i < len; i++) {
/* Load (unaligned) input data */
m0 = _mm_loadu_ps(&x[2 * i + 0]);
m1 = _mm_loadu_ps(&x[2 * i + 4]);
m2 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m3 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
/* Quad multiply */
m4 = _mm_mul_ps(m2, m7);
m5 = _mm_mul_ps(m3, m7);
/* Sum and store */
m6 = _mm_hadd_ps(m4, m5);
m0 = _mm_hadd_ps(m6, m6);
_mm_store_ss(&y[2 * i + 0], m0);
m0 = _mm_shuffle_ps(m0, m0, _MM_SHUFFLE(0, 3, 2, 1));
_mm_store_ss(&y[2 * i + 1], m0);
}
}
/* 8-tap SSE complex-real convolution */
static void sse_conv_real8(float *restrict x,
float *restrict h,
float *restrict y,
int len)
{
__m128 m0, m1, m2, m3, m4, m5, m6, m7, m8, m9;
/* Load (aligned) filter taps */
m0 = _mm_load_ps(&h[0]);
m1 = _mm_load_ps(&h[4]);
m2 = _mm_load_ps(&h[8]);
m3 = _mm_load_ps(&h[12]);
m4 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m5 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
for (int i = 0; i < len; i++) {
/* Load (unaligned) input data */
m0 = _mm_loadu_ps(&x[2 * i + 0]);
m1 = _mm_loadu_ps(&x[2 * i + 4]);
m2 = _mm_loadu_ps(&x[2 * i + 8]);
m3 = _mm_loadu_ps(&x[2 * i + 12]);
m6 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m7 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
m8 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
m9 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
/* Quad multiply */
m6 = _mm_mul_ps(m6, m4);
m7 = _mm_mul_ps(m7, m4);
m8 = _mm_mul_ps(m8, m5);
m9 = _mm_mul_ps(m9, m5);
/* Sum and store */
m6 = _mm_add_ps(m6, m8);
m7 = _mm_add_ps(m7, m9);
m6 = _mm_hadd_ps(m6, m7);
m6 = _mm_hadd_ps(m6, m6);
_mm_store_ss(&y[2 * i + 0], m6);
m6 = _mm_shuffle_ps(m6, m6, _MM_SHUFFLE(0, 3, 2, 1));
_mm_store_ss(&y[2 * i + 1], m6);
}
}
/* 12-tap SSE complex-real convolution */
static void sse_conv_real12(float *restrict x,
float *restrict h,
float *restrict y,
int len)
{
__m128 m0, m1, m2, m3, m4, m5, m6, m7;
__m128 m8, m9, m10, m11, m12, m13, m14;
/* Load (aligned) filter taps */
m0 = _mm_load_ps(&h[0]);
m1 = _mm_load_ps(&h[4]);
m2 = _mm_load_ps(&h[8]);
m3 = _mm_load_ps(&h[12]);
m4 = _mm_load_ps(&h[16]);
m5 = _mm_load_ps(&h[20]);
m12 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m13 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
m14 = _mm_shuffle_ps(m4, m5, _MM_SHUFFLE(0, 2, 0, 2));
for (int i = 0; i < len; i++) {
/* Load (unaligned) input data */
m0 = _mm_loadu_ps(&x[2 * i + 0]);
m1 = _mm_loadu_ps(&x[2 * i + 4]);
m2 = _mm_loadu_ps(&x[2 * i + 8]);
m3 = _mm_loadu_ps(&x[2 * i + 12]);
m4 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m5 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
m6 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
m7 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
m0 = _mm_loadu_ps(&x[2 * i + 16]);
m1 = _mm_loadu_ps(&x[2 * i + 20]);
m8 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m9 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
/* Quad multiply */
m0 = _mm_mul_ps(m4, m12);
m1 = _mm_mul_ps(m5, m12);
m2 = _mm_mul_ps(m6, m13);
m3 = _mm_mul_ps(m7, m13);
m4 = _mm_mul_ps(m8, m14);
m5 = _mm_mul_ps(m9, m14);
/* Sum and store */
m8 = _mm_add_ps(m0, m2);
m9 = _mm_add_ps(m1, m3);
m10 = _mm_add_ps(m8, m4);
m11 = _mm_add_ps(m9, m5);
m2 = _mm_hadd_ps(m10, m11);
m3 = _mm_hadd_ps(m2, m2);
_mm_store_ss(&y[2 * i + 0], m3);
m3 = _mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0, 3, 2, 1));
_mm_store_ss(&y[2 * i + 1], m3);
}
}
/* 16-tap SSE complex-real convolution */
static void sse_conv_real16(float *restrict x,
float *restrict h,
float *restrict y,
int len)
{
__m128 m0, m1, m2, m3, m4, m5, m6, m7;
__m128 m8, m9, m10, m11, m12, m13, m14, m15;
/* Load (aligned) filter taps */
m0 = _mm_load_ps(&h[0]);
m1 = _mm_load_ps(&h[4]);
m2 = _mm_load_ps(&h[8]);
m3 = _mm_load_ps(&h[12]);
m4 = _mm_load_ps(&h[16]);
m5 = _mm_load_ps(&h[20]);
m6 = _mm_load_ps(&h[24]);
m7 = _mm_load_ps(&h[28]);
m12 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m13 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
m14 = _mm_shuffle_ps(m4, m5, _MM_SHUFFLE(0, 2, 0, 2));
m15 = _mm_shuffle_ps(m6, m7, _MM_SHUFFLE(0, 2, 0, 2));
for (int i = 0; i < len; i++) {
/* Load (unaligned) input data */
m0 = _mm_loadu_ps(&x[2 * i + 0]);
m1 = _mm_loadu_ps(&x[2 * i + 4]);
m2 = _mm_loadu_ps(&x[2 * i + 8]);
m3 = _mm_loadu_ps(&x[2 * i + 12]);
m4 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m5 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
m6 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
m7 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
m0 = _mm_loadu_ps(&x[2 * i + 16]);
m1 = _mm_loadu_ps(&x[2 * i + 20]);
m2 = _mm_loadu_ps(&x[2 * i + 24]);
m3 = _mm_loadu_ps(&x[2 * i + 28]);
m8 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m9 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
m10 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
m11 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
/* Quad multiply */
m0 = _mm_mul_ps(m4, m12);
m1 = _mm_mul_ps(m5, m12);
m2 = _mm_mul_ps(m6, m13);
m3 = _mm_mul_ps(m7, m13);
m4 = _mm_mul_ps(m8, m14);
m5 = _mm_mul_ps(m9, m14);
m6 = _mm_mul_ps(m10, m15);
m7 = _mm_mul_ps(m11, m15);
/* Sum and store */
m8 = _mm_add_ps(m0, m2);
m9 = _mm_add_ps(m1, m3);
m10 = _mm_add_ps(m4, m6);
m11 = _mm_add_ps(m5, m7);
m0 = _mm_add_ps(m8, m10);
m1 = _mm_add_ps(m9, m11);
m2 = _mm_hadd_ps(m0, m1);
m3 = _mm_hadd_ps(m2, m2);
_mm_store_ss(&y[2 * i + 0], m3);
m3 = _mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0, 3, 2, 1));
_mm_store_ss(&y[2 * i + 1], m3);
}
}
/* 20-tap SSE complex-real convolution */
static void sse_conv_real20(float *restrict x,
float *restrict h,
float *restrict y,
int len)
{
__m128 m0, m1, m2, m3, m4, m5, m6, m7;
__m128 m8, m9, m11, m12, m13, m14, m15;
/* Load (aligned) filter taps */
m0 = _mm_load_ps(&h[0]);
m1 = _mm_load_ps(&h[4]);
m2 = _mm_load_ps(&h[8]);
m3 = _mm_load_ps(&h[12]);
m4 = _mm_load_ps(&h[16]);
m5 = _mm_load_ps(&h[20]);
m6 = _mm_load_ps(&h[24]);
m7 = _mm_load_ps(&h[28]);
m8 = _mm_load_ps(&h[32]);
m9 = _mm_load_ps(&h[36]);
m11 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m12 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
m13 = _mm_shuffle_ps(m4, m5, _MM_SHUFFLE(0, 2, 0, 2));
m14 = _mm_shuffle_ps(m6, m7, _MM_SHUFFLE(0, 2, 0, 2));
m15 = _mm_shuffle_ps(m8, m9, _MM_SHUFFLE(0, 2, 0, 2));
for (int i = 0; i < len; i++) {
/* Multiply-accumulate first 12 taps */
m0 = _mm_loadu_ps(&x[2 * i + 0]);
m1 = _mm_loadu_ps(&x[2 * i + 4]);
m2 = _mm_loadu_ps(&x[2 * i + 8]);
m3 = _mm_loadu_ps(&x[2 * i + 12]);
m4 = _mm_loadu_ps(&x[2 * i + 16]);
m5 = _mm_loadu_ps(&x[2 * i + 20]);
m6 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m7 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
m8 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
m9 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
m0 = _mm_shuffle_ps(m4, m5, _MM_SHUFFLE(0, 2, 0, 2));
m1 = _mm_shuffle_ps(m4, m5, _MM_SHUFFLE(1, 3, 1, 3));
m2 = _mm_mul_ps(m6, m11);
m3 = _mm_mul_ps(m7, m11);
m4 = _mm_mul_ps(m8, m12);
m5 = _mm_mul_ps(m9, m12);
m6 = _mm_mul_ps(m0, m13);
m7 = _mm_mul_ps(m1, m13);
m0 = _mm_add_ps(m2, m4);
m1 = _mm_add_ps(m3, m5);
m8 = _mm_add_ps(m0, m6);
m9 = _mm_add_ps(m1, m7);
/* Multiply-accumulate last 8 taps */
m0 = _mm_loadu_ps(&x[2 * i + 24]);
m1 = _mm_loadu_ps(&x[2 * i + 28]);
m2 = _mm_loadu_ps(&x[2 * i + 32]);
m3 = _mm_loadu_ps(&x[2 * i + 36]);
m4 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m5 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
m6 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
m7 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
m0 = _mm_mul_ps(m4, m14);
m1 = _mm_mul_ps(m5, m14);
m2 = _mm_mul_ps(m6, m15);
m3 = _mm_mul_ps(m7, m15);
m4 = _mm_add_ps(m0, m2);
m5 = _mm_add_ps(m1, m3);
/* Final sum and store */
m0 = _mm_add_ps(m8, m4);
m1 = _mm_add_ps(m9, m5);
m2 = _mm_hadd_ps(m0, m1);
m3 = _mm_hadd_ps(m2, m2);
_mm_store_ss(&y[2 * i + 0], m3);
m3 = _mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0, 3, 2, 1));
_mm_store_ss(&y[2 * i + 1], m3);
}
}
/* 4*N-tap SSE complex-real convolution */
static void sse_conv_real4n(float *x, float *h, float *y, int h_len, int len)
{
__m128 m0, m1, m2, m4, m5, m6, m7;
for (int i = 0; i < len; i++) {
/* Zero */
m6 = _mm_setzero_ps();
m7 = _mm_setzero_ps();
for (int n = 0; n < h_len / 4; n++) {
/* Load (aligned) filter taps */
m0 = _mm_load_ps(&h[8 * n + 0]);
m1 = _mm_load_ps(&h[8 * n + 4]);
m2 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
/* Load (unaligned) input data */
m0 = _mm_loadu_ps(&x[2 * i + 8 * n + 0]);
m1 = _mm_loadu_ps(&x[2 * i + 8 * n + 4]);
m4 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m5 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
/* Quad multiply */
m0 = _mm_mul_ps(m2, m4);
m1 = _mm_mul_ps(m2, m5);
/* Accumulate */
m6 = _mm_add_ps(m6, m0);
m7 = _mm_add_ps(m7, m1);
}
m0 = _mm_hadd_ps(m6, m7);
m0 = _mm_hadd_ps(m0, m0);
_mm_store_ss(&y[2 * i + 0], m0);
m0 = _mm_shuffle_ps(m0, m0, _MM_SHUFFLE(0, 3, 2, 1));
_mm_store_ss(&y[2 * i + 1], m0);
}
}
/* 4*N-tap SSE complex-complex convolution */
static void sse_conv_cmplx_4n(float *x, float *h, float *y, int h_len, int len)
{
__m128 m0, m1, m2, m3, m4, m5, m6, m7;
for (int i = 0; i < len; i++) {
/* Zero */
m6 = _mm_setzero_ps();
m7 = _mm_setzero_ps();
for (int n = 0; n < h_len / 4; n++) {
/* Load (aligned) filter taps */
m0 = _mm_load_ps(&h[8 * n + 0]);
m1 = _mm_load_ps(&h[8 * n + 4]);
m2 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m3 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
/* Load (unaligned) input data */
m0 = _mm_loadu_ps(&x[2 * i + 8 * n + 0]);
m1 = _mm_loadu_ps(&x[2 * i + 8 * n + 4]);
m4 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m5 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
/* Quad multiply */
m0 = _mm_mul_ps(m2, m4);
m1 = _mm_mul_ps(m3, m5);
m2 = _mm_mul_ps(m2, m5);
m3 = _mm_mul_ps(m3, m4);
/* Sum */
m0 = _mm_sub_ps(m0, m1);
m2 = _mm_add_ps(m2, m3);
/* Accumulate */
m6 = _mm_add_ps(m6, m0);
m7 = _mm_add_ps(m7, m2);
}
m0 = _mm_hadd_ps(m6, m7);
m0 = _mm_hadd_ps(m0, m0);
_mm_store_ss(&y[2 * i + 0], m0);
m0 = _mm_shuffle_ps(m0, m0, _MM_SHUFFLE(0, 3, 2, 1));
_mm_store_ss(&y[2 * i + 1], m0);
}
}
/* 8*N-tap SSE complex-complex convolution */
static void sse_conv_cmplx_8n(float *x, float *h, float *y, int h_len, int len)
{
__m128 m0, m1, m2, m3, m4, m5, m6, m7;
__m128 m8, m9, m10, m11, m12, m13, m14, m15;
for (int i = 0; i < len; i++) {
/* Zero */
m12 = _mm_setzero_ps();
m13 = _mm_setzero_ps();
m14 = _mm_setzero_ps();
m15 = _mm_setzero_ps();
for (int n = 0; n < h_len / 8; n++) {
/* Load (aligned) filter taps */
m0 = _mm_load_ps(&h[16 * n + 0]);
m1 = _mm_load_ps(&h[16 * n + 4]);
m2 = _mm_load_ps(&h[16 * n + 8]);
m3 = _mm_load_ps(&h[16 * n + 12]);
m4 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m5 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
m6 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 2, 0, 2));
m7 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
/* Load (unaligned) input data */
m0 = _mm_loadu_ps(&x[2 * i + 16 * n + 0]);
m1 = _mm_loadu_ps(&x[2 * i + 16 * n + 4]);
m2 = _mm_loadu_ps(&x[2 * i + 16 * n + 8]);
m3 = _mm_loadu_ps(&x[2 * i + 16 * n + 12]);
m8 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
m9 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
m10 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
m11 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
/* Quad multiply */
m0 = _mm_mul_ps(m4, m8);
m1 = _mm_mul_ps(m5, m9);
m2 = _mm_mul_ps(m6, m10);
m3 = _mm_mul_ps(m7, m11);
m4 = _mm_mul_ps(m4, m9);
m5 = _mm_mul_ps(m5, m8);
m6 = _mm_mul_ps(m6, m11);
m7 = _mm_mul_ps(m7, m10);
/* Sum */
m0 = _mm_sub_ps(m0, m1);
m2 = _mm_sub_ps(m2, m3);
m4 = _mm_add_ps(m4, m5);
m6 = _mm_add_ps(m6, m7);
/* Accumulate */
m12 = _mm_add_ps(m12, m0);
m13 = _mm_add_ps(m13, m2);
m14 = _mm_add_ps(m14, m4);
m15 = _mm_add_ps(m15, m6);
}
m0 = _mm_add_ps(m12, m13);
m1 = _mm_add_ps(m14, m15);
m2 = _mm_hadd_ps(m0, m1);
m2 = _mm_hadd_ps(m2, m2);
_mm_store_ss(&y[2 * i + 0], m2);
m2 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 3, 2, 1));
_mm_store_ss(&y[2 * i + 1], m2);
}
}
#endif
/* Base multiply and accumulate complex-real */
static void mac_real(float *x, float *h, float *y)
{
y[0] += x[0] * h[0];
y[1] += x[1] * h[0];
}
/* Base multiply and accumulate complex-complex */
static void mac_cmplx(float *x, float *h, float *y)
{
y[0] += x[0] * h[0] - x[1] * h[1];
y[1] += x[0] * h[1] + x[1] * h[0];
}
/* Base vector complex-complex multiply and accumulate */
static void mac_real_vec_n(float *x, float *h, float *y,
int len, int step, int offset)
{
for (int i = offset; i < len; i += step)
mac_real(&x[2 * i], &h[2 * i], y);
}
/* Base vector complex-complex multiply and accumulate */
static void mac_cmplx_vec_n(float *x, float *h, float *y,
int len, int step, int offset)
{
for (int i = offset; i < len; i += step)
mac_cmplx(&x[2 * i], &h[2 * i], y);
}
/* Base complex-real convolution */
static int _base_convolve_real(float *x, int x_len,
float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset)
{
for (int i = 0; i < len; i++) {
mac_real_vec_n(&x[2 * (i - (h_len - 1) + start)],
h,
&y[2 * i], h_len,
step, offset);
}
return len;
}
/* Base complex-complex convolution */
static int _base_convolve_complex(float *x, int x_len,
float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset)
{
for (int i = 0; i < len; i++) {
mac_cmplx_vec_n(&x[2 * (i - (h_len - 1) + start)],
h,
&y[2 * i],
h_len, step, offset);
}
return len;
}
/* Buffer validity checks */
static int bounds_check(int x_len, int h_len, int y_len,
int start, int len, int step)
{
if ((x_len < 1) || (h_len < 1) ||
(y_len < 1) || (len < 1) || (step < 1)) {
fprintf(stderr, "Convolve: Invalid input\n");
return -1;
}
if ((start + len > x_len) || (len > y_len) || (x_len < h_len)) {
fprintf(stderr, "Convolve: Boundary exception\n");
fprintf(stderr, "start: %i, len: %i, x: %i, h: %i, y: %i\n",
start, len, x_len, h_len, y_len);
return -1;
}
return 0;
}
/* API: Aligned complex-real */
int convolve_real(float *x, int x_len,
float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset)
{
void (*conv_func)(float *, float *, float *, int) = NULL;
void (*conv_func_n)(float *, float *, float *, int, int) = NULL;
if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
return -1;
memset(y, 0, len * 2 * sizeof(float));
#ifdef HAVE_SSE3
if (step <= 4) {
switch (h_len) {
case 4:
conv_func = sse_conv_real4;
break;
case 8:
conv_func = sse_conv_real8;
break;
case 12:
conv_func = sse_conv_real12;
break;
case 16:
conv_func = sse_conv_real16;
break;
case 20:
conv_func = sse_conv_real20;
break;
default:
if (!(h_len % 4))
conv_func_n = sse_conv_real4n;
}
}
#endif
if (conv_func) {
conv_func(&x[2 * (-(h_len - 1) + start)],
h, y, len);
} else if (conv_func_n) {
conv_func_n(&x[2 * (-(h_len - 1) + start)],
h, y, h_len, len);
} else {
_base_convolve_real(x, x_len,
h, h_len,
y, y_len,
start, len, step, offset);
}
return len;
}
/* API: Aligned complex-complex */
int convolve_complex(float *x, int x_len,
float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset)
{
void (*conv_func)(float *, float *, float *, int, int) = NULL;
if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
return -1;
memset(y, 0, len * 2 * sizeof(float));
#ifdef HAVE_SSE3
if (step <= 4) {
if (!(h_len % 8))
conv_func = sse_conv_cmplx_8n;
else if (!(h_len % 4))
conv_func = sse_conv_cmplx_4n;
}
#endif
if (conv_func) {
conv_func(&x[2 * (-(h_len - 1) + start)],
h, y, h_len, len);
} else {
_base_convolve_complex(x, x_len,
h, h_len,
y, y_len,
start, len, step, offset);
}
return len;
}
/* API: Non-aligned (no SSE) complex-real */
int base_convolve_real(float *x, int x_len,
float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset)
{
if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
return -1;
memset(y, 0, len * 2 * sizeof(float));
return _base_convolve_real(x, x_len,
h, h_len,
y, y_len,
start, len, step, offset);
}
/* API: Non-aligned (no SSE) complex-complex */
int base_convolve_complex(float *x, int x_len,
float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset)
{
if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
return -1;
memset(y, 0, len * 2 * sizeof(float));
return _base_convolve_complex(x, x_len,
h, h_len,
y, y_len,
start, len, step, offset);
}
/* Aligned filter tap allocation */
void *convolve_h_alloc(int len)
{
#ifdef HAVE_SSE3
return memalign(16, len * 2 * sizeof(float));
#else
return malloc(len * 2 * sizeof(float));
#endif
}

30
Transceiver52M/convolve.h Normal file
View File

@ -0,0 +1,30 @@
#ifndef _CONVOLVE_H_
#define _CONVOLVE_H_
void *convolve_h_alloc(int num);
int convolve_real(float *x, int x_len,
float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset);
int convolve_complex(float *x, int x_len,
float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset);
int base_convolve_real(float *x, int x_len,
float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset);
int base_convolve_complex(float *x, int x_len,
float *h, int h_len,
float *y, int y_len,
int start, int len,
int step, int offset);
#endif /* _CONVOLVE_H_ */

View File

@ -29,6 +29,10 @@
using namespace GSM; using namespace GSM;
extern "C" {
#include "convolve.h"
}
#define TABLESIZE 1024 #define TABLESIZE 1024
/** Lookup tables for trigonometric approximation */ /** Lookup tables for trigonometric approximation */
@ -45,28 +49,35 @@ signalVector *GMSKRotation = NULL;
signalVector *GMSKReverseRotation = NULL; signalVector *GMSKReverseRotation = NULL;
/* /*
* RACH and midamble correlation waveforms * RACH and midamble correlation waveforms. Store the buffer separately
* because we need to allocate it explicitly outside of the signal vector
* constructor. This is because C++ (prior to C++11) is unable to natively
* perform 16-byte memory alignment required by many SSE instructions.
*/ */
struct CorrelationSequence { struct CorrelationSequence {
CorrelationSequence() : sequence(NULL) CorrelationSequence() : sequence(NULL), buffer(NULL)
{ {
} }
~CorrelationSequence() ~CorrelationSequence()
{ {
delete sequence; delete sequence;
free(buffer);
} }
signalVector *sequence; signalVector *sequence;
void *buffer;
float TOA; float TOA;
complex gain; complex gain;
}; };
/* /*
* Gaussian and empty modulation pulses * Gaussian and empty modulation pulses. Like the correlation sequences,
* store the runtime (Gaussian) buffer separately because of needed alignment
* for SSE instructions.
*/ */
struct PulseSequence { struct PulseSequence {
PulseSequence() : gaussian(NULL), empty(NULL) PulseSequence() : gaussian(NULL), empty(NULL), buffer(NULL)
{ {
} }
@ -74,10 +85,12 @@ struct PulseSequence {
{ {
delete gaussian; delete gaussian;
delete empty; delete empty;
free(buffer);
} }
signalVector *gaussian; signalVector *gaussian;
signalVector *empty; signalVector *empty;
void *buffer;
}; };
CorrelationSequence *gMidambles[] = {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}; CorrelationSequence *gMidambles[] = {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL};
@ -246,7 +259,7 @@ void initGMSKRotationTables(int sps)
bool sigProcLibSetup(int sps) bool sigProcLibSetup(int sps)
{ {
if ((sps != 0) && (sps != 2) && (sps != 4)) if ((sps != 1) && (sps != 2) && (sps != 4))
return false; return false;
initTrigTables(); initTrigTables();
@ -295,174 +308,106 @@ void GMSKReverseRotate(signalVector &x) {
} }
} }
signalVector *convolve(const signalVector *x,
signalVector* convolve(const signalVector *a, const signalVector *h,
const signalVector *b, signalVector *y,
signalVector *c, ConvType spanType, int start,
ConvType spanType, unsigned len, unsigned step, int offset)
unsigned startIx,
unsigned len)
{ {
if ((a==NULL) || (b==NULL)) return NULL; int rc, head = 0, tail = 0;
int La = a->size(); bool alloc = false, append = false;
int Lb = b->size(); const signalVector *_x = NULL;
int startIndex; if (!x || !h)
unsigned int outSize;
switch (spanType) {
case FULL_SPAN:
startIndex = 0;
outSize = La+Lb-1;
break;
case OVERLAP_ONLY:
startIndex = La;
outSize = abs(La-Lb)+1;
break;
case START_ONLY:
startIndex = 0;
outSize = La;
break;
case WITH_TAIL:
startIndex = Lb;
outSize = La;
break;
case NO_DELAY:
if (Lb % 2)
startIndex = Lb/2;
else
startIndex = Lb/2-1;
outSize = La;
break;
case CUSTOM:
startIndex = startIx;
outSize = len;
break;
default:
return NULL;
}
if (c==NULL)
c = new signalVector(outSize);
else if (c->size()!=outSize)
return NULL; return NULL;
signalVector::const_iterator aStart = a->begin(); switch (spanType) {
signalVector::const_iterator bStart = b->begin(); case START_ONLY:
signalVector::const_iterator aEnd = a->end(); start = 0;
signalVector::const_iterator bEnd = b->end(); head = h->size();
signalVector::iterator cPtr = c->begin(); len = x->size();
int t = startIndex; append = true;
int stopIndex = startIndex + outSize;
switch (b->getSymmetry()) {
case NONE:
{
while (t < stopIndex) {
signalVector::const_iterator aP = aStart+t;
signalVector::const_iterator bP = bStart;
if (a->isRealOnly() && b->isRealOnly()) {
float sum = 0.0;
while (bP < bEnd) {
if (aP < aStart) break;
if (aP < aEnd) sum += (aP->real())*(bP->real());
aP--;
bP++;
}
*cPtr++ = sum;
}
else if (a->isRealOnly()) {
complex sum = 0.0;
while (bP < bEnd) {
if (aP < aStart) break;
if (aP < aEnd) sum += (*bP)*(aP->real());
aP--;
bP++;
}
*cPtr++ = sum;
}
else if (b->isRealOnly()) {
complex sum = 0.0;
while (bP < bEnd) {
if (aP < aStart) break;
if (aP < aEnd) sum += (*aP)*(bP->real());
aP--;
bP++;
}
*cPtr++ = sum;
}
else {
complex sum = 0.0;
while (bP < bEnd) {
if (aP < aStart) break;
if (aP < aEnd) sum += (*aP)*(*bP);
aP--;
bP++;
}
*cPtr++ = sum;
}
t++;
}
}
break; break;
case ABSSYM: case NO_DELAY:
{ start = h->size() / 2;
complex sum = 0.0; head = start;
bool isOdd = (bool) (Lb % 2); tail = start;
if (isOdd) len = x->size();
bEnd = bStart + (Lb+1)/2; append = true;
else break;
bEnd = bStart + Lb/2; case CUSTOM:
while (t < stopIndex) { if (start < h->size() - 1) {
signalVector::const_iterator aP = aStart+t; head = h->size() - start;
signalVector::const_iterator aPsym = aP-Lb+1; append = true;
signalVector::const_iterator bP = bStart; }
sum = 0.0; if (start + len > x->size()) {
if (!b->isRealOnly()) { tail = start + len - x->size();
while (bP < bEnd) { append = true;
if (aP < aStart) break;
if (aP == aPsym)
sum+= (*aP)*(*bP);
else if ((aP < aEnd) && (aPsym >= aStart))
sum+= ((*aP)+(*aPsym))*(*bP);
else if (aP < aEnd)
sum += (*aP)*(*bP);
else if (aPsym >= aStart)
sum += (*aPsym)*(*bP);
aP--;
aPsym++;
bP++;
}
}
else {
while (bP < bEnd) {
if (aP < aStart) break;
if (aP == aPsym)
sum+= (*aP)*(bP->real());
else if ((aP < aEnd) && (aPsym >= aStart))
sum+= ((*aP)+(*aPsym))*(bP->real());
else if (aP < aEnd)
sum += (*aP)*(bP->real());
else if (aPsym >= aStart)
sum += (*aPsym)*(bP->real());
aP--;
aPsym++;
bP++;
}
}
*cPtr++ = sum;
t++;
}
} }
break; break;
default: default:
return NULL; return NULL;
break;
} }
return c;
}
/*
* Error if the output vector is too small. Create the output vector
* if the pointer is NULL.
*/
if (y && (len > y->size()))
return NULL;
if (!y) {
y = new signalVector(len);
alloc = true;
}
/* Prepend or post-pend the input vector if the parameters require it */
if (append)
_x = new signalVector(*x, head, tail);
else
_x = x;
/*
* Four convovle types:
* 1. Complex-Real (aligned)
* 2. Complex-Complex (aligned)
* 3. Complex-Real (!aligned)
* 4. Complex-Complex (!aligned)
*/
if (h->isRealOnly() && h->isAligned()) {
rc = convolve_real((float *) _x->begin(), _x->size(),
(float *) h->begin(), h->size(),
(float *) y->begin(), y->size(),
start, len, step, offset);
} else if (!h->isRealOnly() && h->isAligned()) {
rc = convolve_complex((float *) _x->begin(), _x->size(),
(float *) h->begin(), h->size(),
(float *) y->begin(), y->size(),
start, len, step, offset);
} else if (h->isRealOnly() && !h->isAligned()) {
rc = base_convolve_real((float *) _x->begin(), _x->size(),
(float *) h->begin(), h->size(),
(float *) y->begin(), y->size(),
start, len, step, offset);
} else if (!h->isRealOnly() && !h->isAligned()) {
rc = base_convolve_complex((float *) _x->begin(), _x->size(),
(float *) h->begin(), h->size(),
(float *) y->begin(), y->size(),
start, len, step, offset);
} else {
rc = -1;
}
if (append)
delete _x;
if (rc < 0) {
if (alloc)
delete y;
return NULL;
}
return y;
}
void generateGSMPulse(int sps, int symbolLength) void generateGSMPulse(int sps, int symbolLength)
{ {
@ -477,9 +422,17 @@ void generateGSMPulse(int sps, int symbolLength)
GSMPulse->empty->isRealOnly(true); GSMPulse->empty->isRealOnly(true);
*(GSMPulse->empty->begin()) = 1.0f; *(GSMPulse->empty->begin()) = 1.0f;
len = sps * symbolLength;
if (len < 4)
len = 4;
/* GSM pulse approximation */ /* GSM pulse approximation */
GSMPulse->gaussian = new signalVector(len); GSMPulse->buffer = convolve_h_alloc(len);
GSMPulse->gaussian = new signalVector((complex *)
GSMPulse->buffer, 0, len);
GSMPulse->gaussian->setAligned(true);
GSMPulse->gaussian->isRealOnly(true); GSMPulse->gaussian->isRealOnly(true);
signalVector::iterator xP = GSMPulse->gaussian->begin(); signalVector::iterator xP = GSMPulse->gaussian->begin();
center = (float) (len - 1.0) / 2.0; center = (float) (len - 1.0) / 2.0;
@ -560,31 +513,6 @@ signalVector* reverseConjugate(signalVector *b)
return tmp; return tmp;
} }
signalVector* correlate(signalVector *a,
signalVector *b,
signalVector *c,
ConvType spanType,
bool bReversedConjugated,
unsigned startIx,
unsigned len)
{
signalVector *tmp = NULL;
if (!bReversedConjugated) {
tmp = reverseConjugate(b);
}
else {
tmp = b;
}
c = convolve(a,tmp,c,spanType,startIx,len);
if (!bReversedConjugated) delete tmp;
return c;
}
/* soft output slicer */ /* soft output slicer */
bool vectorSlicer(signalVector *x) bool vectorSlicer(signalVector *x)
{ {
@ -599,12 +527,13 @@ bool vectorSlicer(signalVector *x)
} }
return true; return true;
} }
/* Assume input bits are not differentially encoded */
signalVector *modulateBurst(const BitVector &wBurst, int guardPeriodLength, signalVector *modulateBurst(const BitVector &wBurst, int guardPeriodLength,
int sps, bool emptyPulse) int sps, bool emptyPulse)
{ {
int burstLen; int burstLen;
signalVector *pulse, modBurst; signalVector *pulse, *shapedBurst, modBurst;
signalVector::iterator modBurstItr; signalVector::iterator modBurstItr;
if (emptyPulse) if (emptyPulse)
@ -628,7 +557,9 @@ signalVector *modulateBurst(const BitVector &wBurst, int guardPeriodLength,
modBurst.isRealOnly(false); modBurst.isRealOnly(false);
// filter w/ pulse shape // filter w/ pulse shape
signalVector *shapedBurst = convolve(&modBurst, pulse, NULL, NO_DELAY); shapedBurst = convolve(&modBurst, pulse, NULL, START_ONLY);
if (!shapedBurst)
return NULL;
return shapedBurst; return shapedBurst;
} }
@ -639,24 +570,24 @@ float sinc(float x)
return 1.0F; return 1.0F;
} }
void delayVector(signalVector &wBurst, bool delayVector(signalVector &wBurst, float delay)
float delay)
{ {
int intOffset = (int) floor(delay); int intOffset = (int) floor(delay);
float fracOffset = delay - intOffset; float fracOffset = delay - intOffset;
// do fractional shift first, only do it for reasonable offsets // do fractional shift first, only do it for reasonable offsets
if (fabs(fracOffset) > 1e-2) { if (fabs(fracOffset) > 1e-2) {
// create sinc function // create sinc function
signalVector sincVector(21); signalVector sincVector(21);
sincVector.isRealOnly(true); sincVector.isRealOnly(true);
signalVector::iterator sincBurstItr = sincVector.begin(); signalVector::iterator sincBurstItr = sincVector.end();
for (int i = 0; i < 21; i++) for (int i = 0; i < 21; i++)
*sincBurstItr++ = (complex) sinc(M_PI_F*(i-10-fracOffset)); *--sincBurstItr = (complex) sinc(M_PI_F*(i-10-fracOffset));
signalVector shiftedBurst(wBurst.size()); signalVector shiftedBurst(wBurst.size());
convolve(&wBurst,&sincVector,&shiftedBurst,NO_DELAY); if (!convolve(&wBurst, &sincVector, &shiftedBurst, NO_DELAY))
return false;
wBurst.clone(shiftedBurst); wBurst.clone(shiftedBurst);
} }
@ -861,25 +792,25 @@ bool generateMidamble(int sps, int tsc)
bool status = true; bool status = true;
complex *data = NULL; complex *data = NULL;
signalVector *autocorr = NULL, *midamble = NULL; signalVector *autocorr = NULL, *midamble = NULL;
signalVector *midMidamble = NULL; signalVector *midMidamble = NULL, *_midMidamble = NULL;
if ((tsc < 0) || (tsc > 7)) if ((tsc < 0) || (tsc > 7))
return false; return false;
delete gMidambles[tsc]; delete gMidambles[tsc];
/* Use middle 16 bits of each TSC. Correlation sequence is not pulse shaped */ /* Use middle 16 bits of each TSC. Correlation sequence is not pulse shaped */
midMidamble = modulateBurst(gTrainingSequence[tsc].segment(5,16), 0, sps, true); midMidamble = modulateBurst(gTrainingSequence[tsc].segment(5,16), 0, sps, true);
if (!midMidamble) if (!midMidamble)
return false; return false;
/* Simulated receive sequence is pulse shaped */ /* Simulated receive sequence is pulse shaped */
midamble = modulateBurst(gTrainingSequence[tsc], 0, sps, false); midamble = modulateBurst(gTrainingSequence[tsc], 0, sps, false);
if (!midamble) { if (!midamble) {
status = false; status = false;
goto release; goto release;
} }
// NOTE: Because ideal TSC 16-bit midamble is 66 symbols into burst, // NOTE: Because ideal TSC 16-bit midamble is 66 symbols into burst,
// the ideal TSC has an + 180 degree phase shift, // the ideal TSC has an + 180 degree phase shift,
// due to the pi/2 frequency shift, that // due to the pi/2 frequency shift, that
@ -890,22 +821,32 @@ bool generateMidamble(int sps, int tsc)
conjugateVector(*midMidamble); conjugateVector(*midMidamble);
autocorr = correlate(midamble, midMidamble, NULL, NO_DELAY); /* For SSE alignment, reallocate the midamble sequence on 16-byte boundary */
data = (complex *) convolve_h_alloc(midMidamble->size());
_midMidamble = new signalVector(data, 0, midMidamble->size());
_midMidamble->setAligned(true);
memcpy(_midMidamble->begin(), midMidamble->begin(),
midMidamble->size() * sizeof(complex));
autocorr = convolve(midamble, _midMidamble, NULL, NO_DELAY);
if (!autocorr) { if (!autocorr) {
status = false; status = false;
goto release; goto release;
} }
gMidambles[tsc] = new CorrelationSequence; gMidambles[tsc] = new CorrelationSequence;
gMidambles[tsc]->sequence = midMidamble; gMidambles[tsc]->buffer = data;
gMidambles[tsc]->gain = peakDetect(*autocorr,&gMidambles[tsc]->TOA,NULL); gMidambles[tsc]->sequence = _midMidamble;
gMidambles[tsc]->gain = peakDetect(*autocorr,&gMidambles[tsc]->TOA, NULL);
release: release:
delete autocorr; delete autocorr;
delete midamble; delete midamble;
delete midMidamble;
if (!status) { if (!status) {
delete midMidamble; delete _midMidamble;
free(data);
gMidambles[tsc] = NULL; gMidambles[tsc] = NULL;
} }
@ -917,7 +858,7 @@ bool generateRACHSequence(int sps)
bool status = true; bool status = true;
complex *data = NULL; complex *data = NULL;
signalVector *autocorr = NULL; signalVector *autocorr = NULL;
signalVector *seq0 = NULL, *seq1 = NULL; signalVector *seq0 = NULL, *seq1 = NULL, *_seq1 = NULL;
delete gRACHSequence; delete gRACHSequence;
@ -933,74 +874,100 @@ bool generateRACHSequence(int sps)
conjugateVector(*seq1); conjugateVector(*seq1);
autocorr = new signalVector(seq0->size()); /* For SSE alignment, reallocate the midamble sequence on 16-byte boundary */
if (!convolve(seq0, seq1, autocorr, NO_DELAY)) { data = (complex *) convolve_h_alloc(seq1->size());
_seq1 = new signalVector(data, 0, seq1->size());
_seq1->setAligned(true);
memcpy(_seq1->begin(), seq1->begin(), seq1->size() * sizeof(complex));
autocorr = convolve(seq0, _seq1, autocorr, NO_DELAY);
if (!autocorr) {
status = false; status = false;
goto release; goto release;
} }
gRACHSequence = new CorrelationSequence; gRACHSequence = new CorrelationSequence;
gRACHSequence->sequence = seq1; gRACHSequence->sequence = _seq1;
gRACHSequence->gain = peakDetect(*autocorr,&gRACHSequence->TOA,NULL); gRACHSequence->buffer = data;
gRACHSequence->gain = peakDetect(*autocorr,&gRACHSequence->TOA, NULL);
release: release:
delete autocorr; delete autocorr;
delete seq0; delete seq0;
delete seq1;
if (!status) { if (!status) {
delete seq1; delete _seq1;
free(data);
gRACHSequence = NULL; gRACHSequence = NULL;
} }
return status; return status;
} }
bool detectRACHBurst(signalVector &rxBurst, int detectRACHBurst(signalVector &rxBurst,
float detectThreshold, float thresh,
int sps, int sps,
complex *amplitude, complex *amp,
float* TOA) float *toa)
{ {
int start, len, num = 0;
float _toa, rms, par, avg = 0.0f;
complex _amp, *peak;
signalVector corr, *sync = gRACHSequence->sequence;
//static complex staticData[500]; if ((sps != 1) && (sps != 2) && (sps != 4))
return -1;
//signalVector correlatedRACH(staticData,0,rxBurst.size());
signalVector correlatedRACH(rxBurst.size());
correlate(&rxBurst,gRACHSequence->sequence,&correlatedRACH,NO_DELAY,true);
float meanPower; start = 40 * sps;
complex peakAmpl = peakDetect(correlatedRACH,TOA,&meanPower); len = 24 * sps;
corr = signalVector(len);
float valleyPower = 0.0; if (!convolve(&rxBurst, sync, &corr,
CUSTOM, start, len, sps, 0)) {
// check for bogus results return -1;
if ((*TOA < 0.0) || (*TOA > correlatedRACH.size())) {
*amplitude = 0.0;
return false;
}
complex *peakPtr = correlatedRACH.begin() + (int) rint(*TOA);
float numSamples = 0.0;
for (int i = 57 * sps; i <= 107 * sps; i++) {
if (peakPtr+i >= correlatedRACH.end())
break;
valleyPower += (peakPtr+i)->norm2();
numSamples++;
} }
if (numSamples < 2) { _amp = peakDetect(corr, &_toa, NULL);
*amplitude = 0.0; if ((_toa < 3) || (_toa > len - 3))
return false; goto notfound;
peak = corr.begin() + (int) rint(_toa);
for (int i = 2 * sps; i <= 5 * sps; i++) {
if (peak - i >= corr.begin()) {
avg += (peak - i)->norm2();
num++;
}
if (peak + i < corr.end()) {
avg += (peak + i)->norm2();
num++;
}
} }
float RMS = sqrtf(valleyPower/(float) numSamples)+0.00001; if (num < 2)
float peakToMean = peakAmpl.abs()/RMS; goto notfound;
*amplitude = peakAmpl/(gRACHSequence->gain); rms = sqrtf(avg / (float) num) + 0.00001;
par = _amp.abs() / rms;
if (par < thresh)
goto notfound;
*TOA = (*TOA) - gRACHSequence->TOA - 8 * sps; /* Subtract forward tail bits from delay */
if (toa)
*toa = _toa - 8 * sps;
if (amp)
*amp = _amp / gRACHSequence->gain;
return (peakToMean > detectThreshold); return 1;
notfound:
if (amp)
*amp = 0.0f;
if (toa)
*toa = 0.0f;
return 0;
} }
bool energyDetect(signalVector &rxBurst, bool energyDetect(signalVector &rxBurst,
@ -1020,120 +987,95 @@ bool energyDetect(signalVector &rxBurst,
if (avgPwr) *avgPwr = energy/windowLength; if (avgPwr) *avgPwr = energy/windowLength;
return (energy/windowLength > detectThreshold*detectThreshold); return (energy/windowLength > detectThreshold*detectThreshold);
} }
bool analyzeTrafficBurst(signalVector &rxBurst, int analyzeTrafficBurst(signalVector &rxBurst, unsigned tsc, float thresh,
unsigned TSC, int sps, complex *amp, float *toa, unsigned max_toa,
float detectThreshold, bool chan_req, signalVector **chan, float *chan_offset)
int sps,
complex *amplitude,
float *TOA,
unsigned maxTOA,
bool requestChannel,
signalVector **channelResponse,
float *channelResponseOffset)
{ {
int start, target, len, num = 0;
complex _amp, *peak;
float _toa, rms, par, avg = 0.0f;
signalVector corr, *sync, *_chan;
assert(TSC<8); if ((tsc < 0) || (tsc > 7) || ((sps != 1) && (sps != 2) && (sps != 4)))
assert(amplitude); return -1;
assert(TOA);
assert(gMidambles[TSC]);
if (maxTOA < 3*sps) maxTOA = 3*sps; target = 3 + 58 + 5 + 16;
unsigned spanTOA = maxTOA; start = (target - 8) * sps;
if (spanTOA < 5*sps) spanTOA = 5*sps; len = (8 + 8 + max_toa) * sps;
unsigned startIx = 66*sps-spanTOA; sync = gMidambles[tsc]->sequence;
unsigned endIx = (66+16)*sps+spanTOA; sync = gMidambles[tsc]->sequence;
unsigned windowLen = endIx - startIx; corr = signalVector(len);
unsigned corrLen = 2*maxTOA+1;
unsigned expectedTOAPeak = (unsigned) round(gMidambles[TSC]->TOA + (gMidambles[TSC]->sequence->size()-1)/2); if (!convolve(&rxBurst, sync, &corr,
CUSTOM, start, len, sps, 0)) {
signalVector burstSegment(rxBurst.begin(),startIx,windowLen); return -1;
//static complex staticData[200];
//signalVector correlatedBurst(staticData,0,corrLen);
signalVector correlatedBurst(corrLen);
correlate(&burstSegment, gMidambles[TSC]->sequence,
&correlatedBurst, CUSTOM,true,
expectedTOAPeak-maxTOA,corrLen);
float meanPower;
*amplitude = peakDetect(correlatedBurst,TOA,&meanPower);
float valleyPower = 0.0; //amplitude->norm2();
complex *peakPtr = correlatedBurst.begin() + (int) rint(*TOA);
// check for bogus results
if ((*TOA < 0.0) || (*TOA > correlatedBurst.size())) {
*amplitude = 0.0;
return false;
} }
int numRms = 0; _amp = peakDetect(corr, &_toa, NULL);
for (int i = 2*sps; i <= 5*sps;i++) { peak = corr.begin() + (int) rint(_toa);
if (peakPtr - i >= correlatedBurst.begin()) {
valleyPower += (peakPtr-i)->norm2(); /* Check for bogus results */
numRms++; if ((_toa < 0.0) || (_toa > corr.size()))
goto notfound;
for (int i = 2 * sps; i <= 5 * sps; i++) {
if (peak - i >= corr.begin()) {
avg += (peak - i)->norm2();
num++;
} }
if (peakPtr + i < correlatedBurst.end()) { if (peak + i < corr.end()) {
valleyPower += (peakPtr+i)->norm2(); avg += (peak + i)->norm2();
numRms++; num++;
} }
} }
if (numRms < 2) { if (num < 2)
// check for bogus results goto notfound;
*amplitude = 0.0;
return false; rms = sqrtf(avg / (float) num) + 0.00001;
par = (_amp.abs()) / rms;
if (par < thresh)
goto notfound;
/*
* NOTE: Because ideal TSC is 66 symbols into burst,
* the ideal TSC has an +/- 180 degree phase shift,
* due to the pi/4 frequency shift, that
* needs to be accounted for.
*/
if (amp)
*amp = _amp / gMidambles[tsc]->gain;
/* Delay one half of peak-centred correlation length */
_toa -= sps * 8;
if (toa)
*toa = _toa;
if (chan_req) {
_chan = new signalVector(6 * sps);
delayVector(corr, -_toa);
corr.segmentCopyTo(*_chan, target - 3, _chan->size());
scaleVector(*_chan, complex(1.0, 0.0) / gMidambles[tsc]->gain);
*chan = _chan;
if (chan_offset)
*chan_offset = 3.0 * sps;;
} }
float RMS = sqrtf(valleyPower/(float)numRms)+0.00001; return 1;
float peakToMean = (amplitude->abs())/RMS;
// NOTE: Because ideal TSC is 66 symbols into burst, notfound:
// the ideal TSC has an +/- 180 degree phase shift, if (amp)
// due to the pi/4 frequency shift, that *amp = 0.0f;
// needs to be accounted for. if (toa)
*toa = 0.0f;
*amplitude = (*amplitude)/gMidambles[TSC]->gain;
*TOA = (*TOA) - (maxTOA);
if (requestChannel && (peakToMean > detectThreshold)) { return 0;
float TOAoffset = maxTOA;
delayVector(correlatedBurst,-(*TOA));
// midamble only allows estimation of a 6-tap channel
signalVector chanVector(6 * sps);
float maxEnergy = -1.0;
int maxI = -1;
for (int i = 0; i < 7; i++) {
if (TOAoffset + (i-5) * sps + chanVector.size() > correlatedBurst.size())
continue;
if (TOAoffset + (i-5) * sps < 0)
continue;
correlatedBurst.segmentCopyTo(chanVector,
(int) floor(TOAoffset + (i - 5) * sps),
chanVector.size());
float energy = vectorNorm2(chanVector);
if (energy > 0.95*maxEnergy) {
maxI = i;
maxEnergy = energy;
}
}
*channelResponse = new signalVector(chanVector.size());
correlatedBurst.segmentCopyTo(**channelResponse,
(int) floor(TOAoffset + (maxI - 5) * sps),
(*channelResponse)->size());
scaleVector(**channelResponse, complex(1.0, 0.0) / gMidambles[TSC]->gain);
if (channelResponseOffset)
*channelResponseOffset = 5 * sps - maxI;
}
return (peakToMean > detectThreshold);
} }
signalVector *decimateVector(signalVector &wVector, signalVector *decimateVector(signalVector &wVector,
@ -1452,7 +1394,7 @@ bool designDFE(signalVector &channelResponse,
} }
*feedForwardFilter = new signalVector(Nf); *feedForwardFilter = new signalVector(Nf);
signalVector::iterator w = (*feedForwardFilter)->begin(); signalVector::iterator w = (*feedForwardFilter)->end();
for (int i = 0; i < Nf; i++) { for (int i = 0; i < Nf; i++) {
delete L[i]; delete L[i];
complex w_i = 0.0; complex w_i = 0.0;
@ -1463,8 +1405,7 @@ bool designDFE(signalVector &channelResponse,
w_i += (*vPtr)*(chanPtr->conj()); w_i += (*vPtr)*(chanPtr->conj());
vPtr++; chanPtr++; vPtr++; chanPtr++;
} }
*w = w_i/d; *--w = w_i/d;
w++;
} }
@ -1479,10 +1420,15 @@ SoftVector *equalizeBurst(signalVector &rxBurst,
signalVector &w, // feedforward filter signalVector &w, // feedforward filter
signalVector &b) // feedback filter signalVector &b) // feedback filter
{ {
signalVector *postForwardFull;
delayVector(rxBurst,-TOA); if (!delayVector(rxBurst, -TOA))
return NULL;
signalVector* postForwardFull = convolve(&rxBurst,&w,NULL,FULL_SPAN); postForwardFull = convolve(&rxBurst, &w, NULL,
CUSTOM, 0, rxBurst.size() + w.size() - 1);
if (!postForwardFull)
return NULL;
signalVector* postForward = new signalVector(rxBurst.size()); signalVector* postForward = new signalVector(rxBurst.size());
postForwardFull->segmentCopyTo(*postForward,w.size()-1,rxBurst.size()); postForwardFull->segmentCopyTo(*postForward,w.size()-1,rxBurst.size());

View File

@ -27,13 +27,10 @@ enum Symmetry {
/** Convolution type indicator */ /** Convolution type indicator */
enum ConvType { enum ConvType {
FULL_SPAN = 0, START_ONLY,
OVERLAP_ONLY = 1, NO_DELAY,
START_ONLY = 2, CUSTOM,
WITH_TAIL = 3, UNDEFINED,
NO_DELAY = 4,
CUSTOM = 5,
UNDEFINED = 255
}; };
/** the core data structure of the Transceiver */ /** the core data structure of the Transceiver */
@ -44,13 +41,14 @@ class signalVector: public Vector<complex>
Symmetry symmetry; ///< the symmetry of the vector Symmetry symmetry; ///< the symmetry of the vector
bool realOnly; ///< true if vector is real-valued, not complex-valued bool realOnly; ///< true if vector is real-valued, not complex-valued
bool aligned;
public: public:
/** Constructors */ /** Constructors */
signalVector(int dSize=0, Symmetry wSymmetry = NONE): signalVector(int dSize=0, Symmetry wSymmetry = NONE):
Vector<complex>(dSize), Vector<complex>(dSize),
realOnly(false) realOnly(false), aligned(false)
{ {
symmetry = wSymmetry; symmetry = wSymmetry;
}; };
@ -58,26 +56,45 @@ class signalVector: public Vector<complex>
signalVector(complex* wData, size_t start, signalVector(complex* wData, size_t start,
size_t span, Symmetry wSymmetry = NONE): size_t span, Symmetry wSymmetry = NONE):
Vector<complex>(NULL,wData+start,wData+start+span), Vector<complex>(NULL,wData+start,wData+start+span),
realOnly(false) realOnly(false), aligned(false)
{ {
symmetry = wSymmetry; symmetry = wSymmetry;
}; };
signalVector(const signalVector &vec1, const signalVector &vec2): signalVector(const signalVector &vec1, const signalVector &vec2):
Vector<complex>(vec1,vec2), Vector<complex>(vec1,vec2),
realOnly(false) realOnly(false), aligned(false)
{ {
symmetry = vec1.symmetry; symmetry = vec1.symmetry;
}; };
signalVector(const signalVector &wVector): signalVector(const signalVector &wVector):
Vector<complex>(wVector.size()), Vector<complex>(wVector.size()),
realOnly(false) realOnly(false), aligned(false)
{ {
wVector.copyTo(*this); wVector.copyTo(*this);
symmetry = wVector.getSymmetry(); symmetry = wVector.getSymmetry();
}; };
signalVector(size_t size, size_t start):
Vector<complex>(size + start),
realOnly(false), aligned(false)
{
mStart = mData + start;
symmetry = NONE;
};
signalVector(const signalVector &wVector, size_t start, size_t tail = 0):
Vector<complex>(start + wVector.size() + tail),
realOnly(false), aligned(false)
{
mStart = mData + start;
wVector.copyTo(*this);
memset(mData, 0, start * sizeof(complex));
memset(mStart + wVector.size(), 0, tail * sizeof(complex));
symmetry = NONE;
};
/** symmetry operators */ /** symmetry operators */
Symmetry getSymmetry() const { return symmetry;}; Symmetry getSymmetry() const { return symmetry;};
void setSymmetry(Symmetry wSymmetry) { symmetry = wSymmetry;}; void setSymmetry(Symmetry wSymmetry) { symmetry = wSymmetry;};
@ -85,6 +102,10 @@ class signalVector: public Vector<complex>
/** real-valued operators */ /** real-valued operators */
bool isRealOnly() const { return realOnly;}; bool isRealOnly() const { return realOnly;};
void isRealOnly(bool wOnly) { realOnly = wOnly;}; void isRealOnly(bool wOnly) { realOnly = wOnly;};
/** alignment markers */
bool isAligned() const { return aligned; };
void setAligned(bool aligned) { this->aligned = aligned; };
}; };
/** Convert a linear number to a dB value */ /** Convert a linear number to a dB value */
@ -110,14 +131,15 @@ void sigProcLibDestroy(void);
@param a,b The vectors to be convolved. @param a,b The vectors to be convolved.
@param c, A preallocated vector to hold the convolution result. @param c, A preallocated vector to hold the convolution result.
@param spanType The type/span of the convolution. @param spanType The type/span of the convolution.
@return The convolution result. @return The convolution result or NULL on error.
*/ */
signalVector* convolve(const signalVector *a, signalVector *convolve(const signalVector *a,
const signalVector *b, const signalVector *b,
signalVector *c, signalVector *c,
ConvType spanType, ConvType spanType,
unsigned startIx = 0, int start = 0,
unsigned len = 0); unsigned len = 0,
unsigned step = 1, int offset = 0);
/** /**
Generate the GSM pulse. Generate the GSM pulse.
@ -169,8 +191,7 @@ signalVector *modulateBurst(const BitVector &wBurst,
float sinc(float x); float sinc(float x);
/** Delay a vector */ /** Delay a vector */
void delayVector(signalVector &wBurst, bool delayVector(signalVector &wBurst, float delay);
float delay);
/** Add two vectors in-place */ /** Add two vectors in-place */
bool addVector(signalVector &x, bool addVector(signalVector &x,
@ -257,13 +278,13 @@ bool energyDetect(signalVector &rxBurst,
@param sps The number of samples per GSM symbol. @param sps The number of samples per GSM symbol.
@param amplitude The estimated amplitude of received RACH burst. @param amplitude The estimated amplitude of received RACH burst.
@param TOA The estimate time-of-arrival of received RACH burst. @param TOA The estimate time-of-arrival of received RACH burst.
@return True if burst SNR is larger that the detectThreshold value. @return positive if threshold value is reached, negative on error, zero otherwise
*/ */
bool detectRACHBurst(signalVector &rxBurst, int detectRACHBurst(signalVector &rxBurst,
float detectThreshold, float detectThreshold,
int sps, int sps,
complex *amplitude, complex *amplitude,
float* TOA); float* TOA);
/** /**
Normal burst correlator, detector, channel estimator. Normal burst correlator, detector, channel estimator.
@ -277,18 +298,18 @@ bool detectRACHBurst(signalVector &rxBurst,
@param requestChannel Set to true if channel estimation is desired. @param requestChannel Set to true if channel estimation is desired.
@param channelResponse The estimated channel. @param channelResponse The estimated channel.
@param channelResponseOffset The time offset b/w the first sample of the channel response and the reported TOA. @param channelResponseOffset The time offset b/w the first sample of the channel response and the reported TOA.
@return True if burst SNR is larger that the detectThreshold value. @return positive if threshold value is reached, negative on error, zero otherwise
*/ */
bool analyzeTrafficBurst(signalVector &rxBurst, int analyzeTrafficBurst(signalVector &rxBurst,
unsigned TSC, unsigned TSC,
float detectThreshold, float detectThreshold,
int sps, int sps,
complex *amplitude, complex *amplitude,
float *TOA, float *TOA,
unsigned maxTOA, unsigned maxTOA,
bool requestChannel = false, bool requestChannel = false,
signalVector** channelResponse = NULL, signalVector** channelResponse = NULL,
float *channelResponseOffset = NULL); float *channelResponseOffset = NULL);
/** /**
Decimate a vector. Decimate a vector.

View File

@ -50,9 +50,6 @@ int main(int argc, char **argv) {
sigProcLibSetup(samplesPerSymbol); sigProcLibSetup(samplesPerSymbol);
signalVector *gsmPulse = generateGSMPulse(2,samplesPerSymbol);
cout << *gsmPulse << endl;
BitVector RACHBurstStart = "01010101"; BitVector RACHBurstStart = "01010101";
BitVector RACHBurstRest = "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"; BitVector RACHBurstRest = "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000";
@ -60,12 +57,9 @@ int main(int argc, char **argv) {
signalVector *RACHSeq = modulateBurst(RACHBurst, signalVector *RACHSeq = modulateBurst(RACHBurst,
*gsmPulse,
9, 9,
samplesPerSymbol); samplesPerSymbol);
generateRACHSequence(*gsmPulse,samplesPerSymbol);
complex a; float t; complex a; float t;
detectRACHBurst(*RACHSeq, 5, samplesPerSymbol,&a,&t); detectRACHBurst(*RACHSeq, 5, samplesPerSymbol,&a,&t);
@ -94,12 +88,9 @@ int main(int argc, char **argv) {
BitVector normalBurst(BitVector(normalBurstSeg,gTrainingSequence[TSC]),normalBurstSeg); BitVector normalBurst(BitVector(normalBurstSeg,gTrainingSequence[TSC]),normalBurstSeg);
generateMidamble(samplesPerSymbol,TSC);
generateMidamble(*gsmPulse,samplesPerSymbol,TSC); signalVector *modBurst = modulateBurst(normalBurst,0,samplesPerSymbol);
signalVector *modBurst = modulateBurst(normalBurst,*gsmPulse,
0,samplesPerSymbol);
//delayVector(*rsVector2,6.932); //delayVector(*rsVector2,6.932);
@ -133,7 +124,7 @@ int main(int argc, char **argv) {
cout << "ampl:" << ampl << endl; cout << "ampl:" << ampl << endl;
cout << "TOA: " << TOA << endl; cout << "TOA: " << TOA << endl;
//cout << "chanResp: " << *chanResp << endl; //cout << "chanResp: " << *chanResp << endl;
SoftVector *demodBurst = demodulateBurst(*modBurst,*gsmPulse,samplesPerSymbol,(complex) ampl, TOA); SoftVector *demodBurst = demodulateBurst(*modBurst,samplesPerSymbol,(complex) ampl, TOA);
cout << *demodBurst << endl; cout << *demodBurst << endl;

View File

@ -0,0 +1,72 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS])
#
# DESCRIPTION
#
# Check whether the given FLAG works with the current language's compiler
# or gives an error. (Warnings, however, are ignored)
#
# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
# success/failure.
#
# If EXTRA-FLAGS is defined, it is added to the current language's default
# flags (e.g. CFLAGS) when the check is done. The check is thus made with
# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to
# force the compiler to issue an error when a bad flag is given.
#
# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
#
# LICENSE
#
# Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
# Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 2
AC_DEFUN([AX_CHECK_COMPILE_FLAG],
[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX
AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
[AS_VAR_SET(CACHEVAR,[yes])],
[AS_VAR_SET(CACHEVAR,[no])])
_AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes],
[m4_default([$2], :)],
[m4_default([$3], :)])
AS_VAR_POPDEF([CACHEVAR])dnl
])dnl AX_CHECK_COMPILE_FLAGS

221
config/ax_ext.m4 Normal file
View File

@ -0,0 +1,221 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_ext.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_EXT
#
# DESCRIPTION
#
# Find supported SIMD extensions by requesting cpuid. When an SIMD
# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if
# compiler supports it. For example, if "sse2" is available, then "-msse2"
# is added to SIMD_FLAGS.
#
# This macro calls:
#
# AC_SUBST(SIMD_FLAGS)
#
# And defines:
#
# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX
#
# LICENSE
#
# Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 12
AC_DEFUN([AX_EXT],
[
AC_REQUIRE([AC_CANONICAL_HOST])
case $host_cpu in
i[[3456]]86*|x86_64*|amd64*)
AC_REQUIRE([AX_GCC_X86_CPUID])
AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])
AX_GCC_X86_CPUID(0x00000001)
ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`
edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`
AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext],
[
ax_cv_have_mmx_ext=no
if test "$((0x$edx>>23&0x01))" = 1; then
ax_cv_have_mmx_ext=yes
fi
])
AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext],
[
ax_cv_have_sse_ext=no
if test "$((0x$edx>>25&0x01))" = 1; then
ax_cv_have_sse_ext=yes
fi
])
AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext],
[
ax_cv_have_sse2_ext=no
if test "$((0x$edx>>26&0x01))" = 1; then
ax_cv_have_sse2_ext=yes
fi
])
AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext],
[
ax_cv_have_sse3_ext=no
if test "$((0x$ecx&0x01))" = 1; then
ax_cv_have_sse3_ext=yes
fi
])
AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext],
[
ax_cv_have_ssse3_ext=no
if test "$((0x$ecx>>9&0x01))" = 1; then
ax_cv_have_ssse3_ext=yes
fi
])
AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext],
[
ax_cv_have_sse41_ext=no
if test "$((0x$ecx>>19&0x01))" = 1; then
ax_cv_have_sse41_ext=yes
fi
])
AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext],
[
ax_cv_have_sse42_ext=no
if test "$((0x$ecx>>20&0x01))" = 1; then
ax_cv_have_sse42_ext=yes
fi
])
AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext],
[
ax_cv_have_avx_cpu_ext=no
if test "$((0x$ecx>>28&0x01))" = 1; then
ax_cv_have_avx_cpu_ext=yes
fi
])
if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then
AX_GCC_X86_AVX_XGETBV(0x00000000)
xgetbv_eax="0"
if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then
xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`
fi
AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext],
[
ax_cv_have_avx_ext=no
if test "$((0x$ecx>>27&0x01))" = 1; then
if test "$((0x$xgetbv_eax&0x6))" = 6; then
ax_cv_have_avx_ext=yes
fi
fi
])
if test x"$ax_cv_have_avx_ext" = x"no"; then
AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't])
fi
fi
if test "$ax_cv_have_mmx_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, [])
if test x"$ax_cv_support_mmx_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mmmx"
AC_DEFINE(HAVE_MMX,,[Support mmx instructions])
else
AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, [])
if test x"$ax_cv_support_sse_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse"
AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions])
else
AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse2_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, [])
if test x"$ax_cv_support_sse2_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse2"
AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions])
else
AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse3_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])
if test x"$ax_cv_support_sse3_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse3"
AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions])
else
AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_ssse3_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mssse3"
AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])
else
AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse41_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])
if test x"$ax_cv_support_sse41_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse4.1"
AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions])
else
AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_sse42_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, [])
if test x"$ax_cv_support_sse42_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse4.2"
AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions])
else
AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?])
fi
fi
if test "$ax_cv_have_avx_ext" = yes; then
AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, [])
if test x"$ax_cv_support_avx_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mavx"
AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions])
else
AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?])
fi
fi
;;
esac
AC_SUBST(SIMD_FLAGS)
])

View File

@ -0,0 +1,79 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_GCC_X86_AVX_XGETBV
#
# DESCRIPTION
#
# On later x86 processors with AVX SIMD support, with gcc or a compiler
# that has a compatible syntax for inline assembly instructions, run a
# small program that executes the xgetbv instruction with input OP. This
# can be used to detect if the OS supports AVX instruction usage.
#
# On output, the values of the eax and edx registers are stored as
# hexadecimal strings as "eax:edx" in the cache variable
# ax_cv_gcc_x86_avx_xgetbv.
#
# If the xgetbv instruction fails (because you are running a
# cross-compiler, or because you are not using gcc, or because you are on
# a processor that doesn't have this instruction),
# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown".
#
# This macro mainly exists to be used in AX_EXT.
#
# LICENSE
#
# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 1
AC_DEFUN([AX_GCC_X86_AVX_XGETBV],
[AC_REQUIRE([AC_PROG_CC])
AC_LANG_PUSH([C])
AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1,
[AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
int op = $1, eax, edx;
FILE *f;
/* Opcodes for xgetbv */
__asm__(".byte 0x0f, 0x01, 0xd0"
: "=a" (eax), "=d" (edx)
: "c" (op));
f = fopen("conftest_xgetbv", "w"); if (!f) return 1;
fprintf(f, "%x:%x\n", eax, edx);
fclose(f);
return 0;
])],
[ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv],
[ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv],
[ax_cv_gcc_x86_avx_xgetbv_$1=unknown])])
AC_LANG_POP([C])
])

View File

@ -0,0 +1,79 @@
# ===========================================================================
# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_GCC_X86_CPUID(OP)
#
# DESCRIPTION
#
# On Pentium and later x86 processors, with gcc or a compiler that has a
# compatible syntax for inline assembly instructions, run a small program
# that executes the cpuid instruction with input OP. This can be used to
# detect the CPU type.
#
# On output, the values of the eax, ebx, ecx, and edx registers are stored
# as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable
# ax_cv_gcc_x86_cpuid_OP.
#
# If the cpuid instruction fails (because you are running a
# cross-compiler, or because you are not using gcc, or because you are on
# a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP
# is set to the string "unknown".
#
# This macro mainly exists to be used in AX_GCC_ARCHFLAG.
#
# LICENSE
#
# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
# Copyright (c) 2008 Matteo Frigo
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <http://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 7
AC_DEFUN([AX_GCC_X86_CPUID],
[AC_REQUIRE([AC_PROG_CC])
AC_LANG_PUSH([C])
AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1,
[AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
int op = $1, eax, ebx, ecx, edx;
FILE *f;
__asm__("cpuid"
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
: "a" (op));
f = fopen("conftest_cpuid", "w"); if (!f) return 1;
fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx);
fclose(f);
return 0;
])],
[ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid],
[ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid],
[ax_cv_gcc_x86_cpuid_$1=unknown])])
AC_LANG_POP([C])
])

View File

@ -22,6 +22,7 @@ AC_INIT(openbts,P2.8TRUNK)
AC_PREREQ(2.57) AC_PREREQ(2.57)
AC_CONFIG_SRCDIR([Transceiver52M/Makefile.am]) AC_CONFIG_SRCDIR([Transceiver52M/Makefile.am])
AC_CONFIG_AUX_DIR([.]) AC_CONFIG_AUX_DIR([.])
AC_CONFIG_MACRO_DIR([config])
AM_CONFIG_HEADER(config.h) AM_CONFIG_HEADER(config.h)
AC_CANONICAL_BUILD AC_CANONICAL_BUILD
@ -90,11 +91,14 @@ AS_IF([test "x$with_usrp1" = "xyes"], [
if test "x$libusrp_3_3" = "xyes";then if test "x$libusrp_3_3" = "xyes";then
AC_DEFINE(HAVE_LIBUSRP_3_3, 1, Define to 1 if you have libusrp >= 3.3) AC_DEFINE(HAVE_LIBUSRP_3_3, 1, Define to 1 if you have libusrp >= 3.3)
fi fi
# Find and define supported SIMD extensions
AX_EXT
]) ])
AS_IF([test "x$with_uhd" = "xyes"],[ AS_IF([test "x$with_uhd" = "xyes"],[
PKG_CHECK_MODULES(UHD, uhd >= 003.004.000) PKG_CHECK_MODULES(UHD, uhd >= 003.004.000)
AC_DEFINE(USE_UHD, 1, Define to 1 if using UHD) AC_DEFINE(USE_UHD, 1, Define to 1 if using UHD)
AX_EXT
]) ])
AS_IF([test "x$with_extref" = "xyes"], [ AS_IF([test "x$with_extref" = "xyes"], [