From 9471d7635abeb9b5c87227d64e452890da0e65bd Mon Sep 17 00:00:00 2001 From: Thomas Tsou Date: Tue, 20 Aug 2013 21:24:24 -0400 Subject: [PATCH] Transceiver52M: Add SSE floating point / integer conversion Convertions are performed in multiples of 4 or 8. All loads are considered unaligned. Signed-off-by: Thomas Tsou --- Transceiver52M/Makefile.am | 6 +- Transceiver52M/convert.c | 199 ++++++++++++++++++++++++ Transceiver52M/convert.h | 7 + Transceiver52M/radioInterface.cpp | 93 +++++------ Transceiver52M/radioInterface.h | 1 - Transceiver52M/radioInterfaceResamp.cpp | 24 +-- 6 files changed, 257 insertions(+), 73 deletions(-) create mode 100644 Transceiver52M/convert.c create mode 100644 Transceiver52M/convert.h diff --git a/Transceiver52M/Makefile.am b/Transceiver52M/Makefile.am index c2f8eced..d002b041 100644 --- a/Transceiver52M/Makefile.am +++ b/Transceiver52M/Makefile.am @@ -53,7 +53,8 @@ COMMON_SOURCES = \ sigProcLib.cpp \ Transceiver.cpp \ DummyLoad.cpp \ - convolve.c + convolve.c \ + convert.c libtransceiver_la_SOURCES = \ $(COMMON_SOURCES) \ @@ -78,7 +79,8 @@ noinst_HEADERS = \ rcvLPF_651.h \ sendLPF_961.h \ Resampler.h \ - convolve.h + convolve.h \ + convert.h USRPping_SOURCES = USRPping.cpp USRPping_LDADD = \ diff --git a/Transceiver52M/convert.c b/Transceiver52M/convert.c new file mode 100644 index 00000000..dc5e748d --- /dev/null +++ b/Transceiver52M/convert.c @@ -0,0 +1,199 @@ +/* + * SSE type conversions + * Copyright (C) 2013 Thomas Tsou + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_SSE3 +#include +#include + +#ifdef HAVE_SSE4_1 +#include + +/* 16*N 16-bit signed integer converted to single precision floats */ +static void _sse_convert_si16_ps_16n(float *restrict out, + short *restrict in, + int len) +{ + __m128i m0, m1, m2, m3, m4, m5; + __m128 m6, m7, m8, m9; + + for (int i = 0; i < len / 16; i++) { + /* Load (unaligned) packed floats */ + m0 = _mm_loadu_si128((__m128i *) &in[16 * i + 0]); + m1 = _mm_loadu_si128((__m128i *) &in[16 * i + 8]); + + /* Unpack */ + m2 = _mm_cvtepi16_epi32(m0); + m4 = _mm_cvtepi16_epi32(m1); + m0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); + m1 = _mm_shuffle_epi32(m1, _MM_SHUFFLE(1, 0, 3, 2)); + m3 = _mm_cvtepi16_epi32(m0); + m5 = _mm_cvtepi16_epi32(m1); + + /* Convert */ + m6 = _mm_cvtepi32_ps(m2); + m7 = _mm_cvtepi32_ps(m3); + m8 = _mm_cvtepi32_ps(m4); + m9 = _mm_cvtepi32_ps(m5); + + /* Store */ + _mm_storeu_ps(&out[16 * i + 0], m6); + _mm_storeu_ps(&out[16 * i + 4], m7); + _mm_storeu_ps(&out[16 * i + 8], m8); + _mm_storeu_ps(&out[16 * i + 12], m9); + } +} + +/* 16*N 16-bit signed integer conversion with remainder */ +static void _sse_convert_si16_ps(float *restrict out, + short *restrict in, + int len) +{ + int start = len / 16 * 16; + + _sse_convert_si16_ps_16n(out, in, len); + + for (int i = 0; i < len % 16; i++) + out[start + i] = in[start + i]; +} +#endif /* HAVE_SSE4_1 */ + +/* 8*N single precision floats scaled and converted to 16-bit signed integer */ +static void _sse_convert_scale_ps_si16_8n(short *restrict out, + float *restrict in, + float scale, int len) +{ + __m128 m0, m1, m2; + __m128i m4, m5; + + for (int i = 0; i < len / 8; i++) { + /* Load (unaligned) packed floats */ + m0 = _mm_loadu_ps(&in[8 * i + 0]); + m1 = _mm_loadu_ps(&in[8 * i + 4]); + m2 = _mm_load1_ps(&scale); + + /* Scale */ + m0 = _mm_mul_ps(m0, m2); + m1 = _mm_mul_ps(m1, m2); + + /* Convert */ + m4 = _mm_cvtps_epi32(m0); + m5 = _mm_cvtps_epi32(m1); + + /* Pack and store */ + m5 = _mm_packs_epi32(m4, m5); + _mm_storeu_si128((__m128i *) &out[8 * i], m5); + } +} + +/* 8*N single precision floats scaled and converted with remainder */ +static void _sse_convert_scale_ps_si16(short *restrict out, + float *restrict in, + float scale, int len) +{ + int start = len / 8 * 8; + + _sse_convert_scale_ps_si16_8n(out, in, scale, len); + + for (int i = 0; i < len % 8; i++) + out[start + i] = in[start + i] * scale; +} + +/* 16*N single precision floats scaled and converted to 16-bit signed integer */ +static void _sse_convert_scale_ps_si16_16n(short *restrict out, + float *restrict in, + float scale, int len) +{ + __m128 m0, m1, m2, m3, m4; + __m128i m5, m6, m7, m8; + + for (int i = 0; i < len / 16; i++) { + /* Load (unaligned) packed floats */ + m0 = _mm_loadu_ps(&in[16 * i + 0]); + m1 = _mm_loadu_ps(&in[16 * i + 4]); + m2 = _mm_loadu_ps(&in[16 * i + 8]); + m3 = _mm_loadu_ps(&in[16 * i + 12]); + m4 = _mm_load1_ps(&scale); + + /* Scale */ + m0 = _mm_mul_ps(m0, m4); + m1 = _mm_mul_ps(m1, m4); + m2 = _mm_mul_ps(m2, m4); + m3 = _mm_mul_ps(m3, m4); + + /* Convert */ + m5 = _mm_cvtps_epi32(m0); + m6 = _mm_cvtps_epi32(m1); + m7 = _mm_cvtps_epi32(m2); + m8 = _mm_cvtps_epi32(m3); + + /* Pack and store */ + m5 = _mm_packs_epi32(m5, m6); + m7 = _mm_packs_epi32(m7, m8); + _mm_storeu_si128((__m128i *) &out[16 * i + 0], m5); + _mm_storeu_si128((__m128i *) &out[16 * i + 8], m7); + } +} +#else /* HAVE_SSE3 */ +static void convert_scale_ps_si16(short *out, float *in, float scale, int len) +{ + for (int i = 0; i < len; i++) + out[i] = in[i] * scale; +} +#endif + +#ifndef HAVE_SSE_4_1 +static void convert_si16_ps(float *out, short *in, int len) +{ + for (int i = 0; i < len; i++) + out[i] = in[i]; +} +#endif + +void convert_float_short(short *out, float *in, float scale, int len) +{ +#ifdef HAVE_SSE3 + if (!(len % 16)) + _sse_convert_scale_ps_si16_16n(out, in, scale, len); + else if (!(len % 8)) + _sse_convert_scale_ps_si16_8n(out, in, scale, len); + else + _sse_convert_scale_ps_si16(out, in, scale, len); +#else + convert_scale_ps_si16(out, in, scale, len); +#endif +} + +void convert_short_float(float *out, short *in, int len) +{ +#ifdef HAVE_SSE4_1 + if (!(len % 16)) + _sse_convert_si16_ps_16n(out, in, len); + else + _sse_convert_si16_ps(out, in, len); +#else + convert_si16_ps(out, in, len); +#endif +} diff --git a/Transceiver52M/convert.h b/Transceiver52M/convert.h new file mode 100644 index 00000000..5b557bfc --- /dev/null +++ b/Transceiver52M/convert.h @@ -0,0 +1,7 @@ +#ifndef _CONVERT_H_ +#define _CONVERT_H_ + +void convert_float_short(short *out, float *in, float scale, int len); +void convert_short_float(float *out, short *in, int len); + +#endif /* _CONVERT_H_ */ diff --git a/Transceiver52M/radioInterface.cpp b/Transceiver52M/radioInterface.cpp index 1e25b5d4..f39a470d 100644 --- a/Transceiver52M/radioInterface.cpp +++ b/Transceiver52M/radioInterface.cpp @@ -25,30 +25,12 @@ #include "radioInterface.h" #include +extern "C" { +#include "convert.h" +} + bool started = false; -/* Device side buffers */ -static short rx_buf[OUTCHUNK * 2 * 2]; -static short tx_buf[INCHUNK * 2 * 2]; - -/* Complex float to short conversion */ -static void floatToShort(short *out, float *in, int num) -{ - for (int i = 0; i < num; i++) { - out[2 * i + 0] = (short) in[2 * i + 0]; - out[2 * i + 1] = (short) in[2 * i + 1]; - } -} - -/* Complex short to float conversion */ -static void shortToFloat(float *out, short *in, int num) -{ - for (int i = 0; i < num; i++) { - out[2 * i + 0] = (float) in[2 * i + 0]; - out[2 * i + 1] = (float) in[2 * i + 1]; - } -} - RadioInterface::RadioInterface(RadioDevice *wRadio, int wReceiveOffset, int wSPS, @@ -96,6 +78,7 @@ void RadioInterface::close() convertSendBuffer = NULL; } + double RadioInterface::fullScaleInputValue(void) { return mRadio->fullScaleInputValue(); } @@ -120,22 +103,14 @@ void RadioInterface::setPowerAttenuation(double atten) int RadioInterface::radioifyVector(signalVector &wVector, float *retVector, - float scale, bool zero) { - int i; - signalVector::iterator itr = wVector.begin(); - if (zero) { memset(retVector, 0, wVector.size() * 2 * sizeof(float)); return wVector.size(); } - for (i = 0; i < wVector.size(); i++) { - retVector[2 * i + 0] = itr->real() * scale; - retVector[2 * i + 1] = itr->imag() * scale; - itr++; - } + memcpy(retVector, wVector.begin(), wVector.size() * 2 * sizeof(float)); return wVector.size(); } @@ -143,10 +118,14 @@ int RadioInterface::radioifyVector(signalVector &wVector, int RadioInterface::unRadioifyVector(float *floatVector, signalVector& newVector) { - int i; signalVector::iterator itr = newVector.begin(); - for (i = 0; i < newVector.size(); i++) { + if (newVector.size() > recvCursor) { + LOG(ALERT) << "Insufficient number of samples in receive buffer"; + return -1; + } + + for (int i = 0; i < newVector.size(); i++) { *itr++ = Complex(floatVector[2 * i + 0], floatVector[2 * i + 1]); } @@ -205,8 +184,7 @@ void RadioInterface::driveTransmitRadio(signalVector &radioBurst, bool zeroBurst return; radioifyVector(radioBurst, - (float *) (sendBuffer->begin() + sendCursor), - powerScaling, zeroBurst); + (float *) (sendBuffer->begin() + sendCursor), zeroBurst); sendCursor += radioBurst.size(); @@ -293,36 +271,49 @@ double RadioInterface::getRxGain() void RadioInterface::pullBuffer() { bool local_underrun; + int num_recv; - /* Read samples. Fail if we don't get what we want. */ - int num_rd = mRadio->readSamples(rx_buf, OUTCHUNK, &overrun, - readTimestamp, &local_underrun); + /* Outer buffer access size is fixed */ + num_recv = mRadio->readSamples(convertRecvBuffer, + OUTCHUNK, + &overrun, + readTimestamp, + &local_underrun); + if (num_recv != OUTCHUNK) { + LOG(ALERT) << "Receive error " << num_recv; + return; + } - LOG(DEBUG) << "Rx read " << num_rd << " samples from device"; - assert(num_rd == OUTCHUNK); + convert_short_float((float *) (recvBuffer->begin() + recvCursor), + convertRecvBuffer, 2 * OUTCHUNK); underrun |= local_underrun; - readTimestamp += (TIMESTAMP) num_rd; + readTimestamp += num_recv; - shortToFloat((float *) recvBuffer->begin() + recvCursor, rx_buf, num_rd); - recvCursor += num_rd; + recvCursor += num_recv; } /* Send timestamped chunk to the device with arbitrary size */ void RadioInterface::pushBuffer() { + int num_sent; + if (sendCursor < INCHUNK) return; - floatToShort(tx_buf, (float *) sendBuffer->begin(), sendCursor); + convert_float_short(convertSendBuffer, + (float *) sendBuffer->begin(), + powerScaling, 2 * sendCursor); - /* Write samples. Fail if we don't get what we want. */ - int num_smpls = mRadio->writeSamples(tx_buf, - sendCursor, - &underrun, - writeTimestamp); - assert(num_smpls == sendCursor); + /* Send the all samples in the send buffer */ + num_sent = mRadio->writeSamples(convertSendBuffer, + sendCursor, + &underrun, + writeTimestamp); + if (num_sent != sendCursor) { + LOG(ALERT) << "Transmit error " << num_sent; + } - writeTimestamp += (TIMESTAMP) num_smpls; + writeTimestamp += num_sent; sendCursor = 0; } diff --git a/Transceiver52M/radioInterface.h b/Transceiver52M/radioInterface.h index 690d5728..a590c326 100644 --- a/Transceiver52M/radioInterface.h +++ b/Transceiver52M/radioInterface.h @@ -70,7 +70,6 @@ private: /** format samples to USRP */ int radioifyVector(signalVector &wVector, float *floatVector, - float scale, bool zero); /** format samples from USRP */ diff --git a/Transceiver52M/radioInterfaceResamp.cpp b/Transceiver52M/radioInterfaceResamp.cpp index d3dc82ed..2b59203c 100644 --- a/Transceiver52M/radioInterfaceResamp.cpp +++ b/Transceiver52M/radioInterfaceResamp.cpp @@ -49,20 +49,6 @@ static Resampler *dnsampler = NULL; short *convertRecvBuffer = NULL; short *convertSendBuffer = NULL; -/* Complex float to short conversion */ -static void floatToShort(short *out, float *in, int num) -{ - for (int i = 0; i < num; i++) - out[i] = (short) in[i]; -} - -/* Complex short to float conversion */ -static void shortToFloat(float *out, short *in, int num) -{ - for (int i = 0; i < num; i++) - out[i] = (float) in[i]; -} - RadioInterfaceResamp::RadioInterfaceResamp(RadioDevice *wRadio, int wReceiveOffset, int wSPS, @@ -166,8 +152,8 @@ void RadioInterfaceResamp::pullBuffer() return; } - shortToFloat((float *) outerRecvBuffer->begin(), - convertRecvBuffer, 2 * outer_len); + convert_short_float((float *) outerRecvBuffer->begin(), + convertRecvBuffer, 2 * outer_len); underrun |= local_underrun; readTimestamp += (TIMESTAMP) num_recv; @@ -206,9 +192,9 @@ void RadioInterfaceResamp::pushBuffer() LOG(ALERT) << "Sample rate downsampling error"; } - floatToShort(convertSendBuffer, - (float *) outerSendBuffer->begin(), - 2 * outer_len); + convert_float_short(convertSendBuffer, + (float *) outerSendBuffer->begin(), + powerScaling, 2 * outer_len); num_sent = mRadio->writeSamples(convertSendBuffer, outer_len,