Transceiver52M: Add NEON complex-complex multiply

Complex-complex block multiples are used for phase rotation of
bursts. Optimization targeted from perf profiling.

Signed-off-by: Thomas Tsou <tom@tsou.cc>
This commit is contained in:
Thomas Tsou 2013-11-09 02:29:55 -05:00
parent acc22fa3ff
commit 0a3dc4c210
6 changed files with 131 additions and 2 deletions

View File

@ -77,7 +77,8 @@ noinst_HEADERS = \
Resampler.h \
common/convolve.h \
common/convert.h \
common/scale.h
common/scale.h \
common/mult.h
transceiver_SOURCES = runTransceiver.cpp
transceiver_LDADD = \

View File

@ -17,5 +17,7 @@ libarch_la_SOURCES = \
convolve.c \
convolve_neon.S \
scale.c \
scale_neon.S
scale_neon.S \
mult.c \
mult_neon.S
endif

56
Transceiver52M/arm/mult.c Normal file
View File

@ -0,0 +1,56 @@
/*
* NEON scaling
* Copyright (C) 2012,2013 Thomas Tsou <tom@tsou.cc>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <malloc.h>
#include <string.h>
#include <mult.h>
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
void neon_cmplx_mul_4n(float *, float *, float *, int);
static void cmplx_mul_ps(float *out, float *a, float *b, int len)
{
float ai, aq, bi, bq;
for (int i = 0; i < len; i++) {
ai = a[2 * i + 0];
aq = a[2 * i + 1];
bi = b[2 * i + 0];
bq = b[2 * i + 1];
out[2 * i + 0] = ai * bi - aq * bq;
out[2 * i + 1] = ai * bq + aq * bi;
}
}
void mul_complex(float *out, float *a, float *b, int len)
{
#ifdef HAVE_NEON
if (len % 4)
cmplx_mul_ps(out, a, b, len);
else
neon_cmplx_mul_4n(out, a, b, len >> 2);
#else
cmplx_mul_ps(out, a, b, len);
#endif
}

View File

@ -0,0 +1,42 @@
/*
* NEON complex multiplication
* Copyright (C) 2012,2013 Thomas Tsou <tom@tsou.cc>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
.syntax unified
.text
.align 2
.global neon_cmplx_mul_4n
.type neon_cmplx_mul_4n, %function
neon_cmplx_mul_4n:
vpush {q4-q7}
.loop_mul:
vld2.32 {q0-q1}, [r1]!
vld2.32 {q2-q3}, [r2]!
vmul.f32 q4, q0, q2
vmul.f32 q5, q1, q3
vmul.f32 q6, q0, q3
vmul.f32 q7, q2, q1
vsub.f32 q8, q4, q5
vadd.f32 q9, q6, q7
vst2.32 {q8-q9}, [r0]!
subs r3, #1
bne .loop_mul
vpop {q4-q7}
bx lr
.size neon_cmplx_mul_4n, .-neon_cmplx_mul_4n
.section .note.GNU-stack,"",%progbits

View File

@ -0,0 +1,6 @@
#ifndef _MULT_H_
#define _MULT_H_
void mul_complex(float *out, float *a, float *b, int len);
#endif /* _MULT_H_ */

View File

@ -32,6 +32,7 @@
extern "C" {
#include "convolve.h"
#include "scale.h"
#include "mult.h"
}
using namespace GSM;
@ -287,6 +288,26 @@ void initGMSKRotationTables(int sps)
static void GMSKRotate(signalVector &x, int sps)
{
#if HAVE_NEON
size_t len;
signalVector *a, *b, *out;
a = &x;
out = &x;
len = out->size();
if (len == 157)
len--;
if (sps == 1)
b = GMSKRotation1;
else
b = GMSKRotationN;
mul_complex((float *) out->begin(),
(float *) a->begin(),
(float *) b->begin(), len);
#else
signalVector::iterator rotPtr, xPtr = x.begin();
if (sps == 1)
@ -306,6 +327,7 @@ static void GMSKRotate(signalVector &x, int sps)
xPtr++;
}
}
#endif
}
static void GMSKReverseRotate(signalVector &x, int sps)