conv_acc: Our code requires SSSE3, not just SSE3

The accelerated convolutional decoder uses SSSE3 instructions such
as PSIGNW (via _mm_sign_epi16) which go beyond what SSE3 offers.  So
let's make sure we use the right compiler flag (-mssse3) and also the
right runtime check.

Without this patch, we would use illegal instructions e.g. on Opteron
Gen3 such as Opteron 2427, which are also used as build.opensuse.org
build hosts (build31 through build36) where we wouldn't pass "make
check" as a result.

Change-Id: I2754164384109f2821fd98ffb48f625893f2923d
Fixes: OS#2386
This commit is contained in:
Harald Welte 2017-11-17 11:41:34 +01:00
parent d068210896
commit b93f60f7cd
6 changed files with 29 additions and 29 deletions

View File

@ -281,7 +281,7 @@ then
AX_CHECK_SIMD
else
AM_CONDITIONAL(HAVE_AVX2, false)
AM_CONDITIONAL(HAVE_SSE3, false)
AM_CONDITIONAL(HAVE_SSSE3, false)
AM_CONDITIONAL(HAVE_SSE4_1, false)
fi

View File

@ -19,7 +19,7 @@
#
# And defines:
#
# HAVE_AVX3 / HAVE_SSE3 / HAVE_SSE4.1
# HAVE_AVX3 / HAVE_SSSE3 / HAVE_SSE4.1
#
# LICENSE
#
@ -42,7 +42,7 @@ AC_DEFUN([AX_CHECK_SIMD],
AC_REQUIRE([AC_CANONICAL_HOST])
AM_CONDITIONAL(HAVE_AVX2, false)
AM_CONDITIONAL(HAVE_SSE3, false)
AM_CONDITIONAL(HAVE_SSSE3, false)
AM_CONDITIONAL(HAVE_SSE4_1, false)
case $host_cpu in
@ -57,14 +57,14 @@ AC_DEFUN([AX_CHECK_SIMD],
AC_MSG_WARN([Your compiler does not support AVX2 instructions])
fi
AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])
if test x"$ax_cv_support_sse3_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -msse3"
AC_DEFINE(HAVE_SSE3,,
[Support SSE3 (Streaming SIMD Extensions 3) instructions])
AM_CONDITIONAL(HAVE_SSE3, true)
AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])
if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
SIMD_FLAGS="$SIMD_FLAGS -mssse3"
AC_DEFINE(HAVE_SSSE3,,
[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])
AM_CONDITIONAL(HAVE_SSSE3, true)
else
AC_MSG_WARN([Your compiler does not support SSE3 instructions])
AC_MSG_WARN([Your compiler does not support SSSE3 instructions])
fi
AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])

View File

@ -23,20 +23,20 @@ libosmocore_la_SOURCES = timer.c timer_gettimeofday.c select.c signal.c msgb.c b
macaddr.c stat_item.c stats.c stats_statsd.c prim.c \
conv_acc.c conv_acc_generic.c sercomm.c prbs.c
if HAVE_SSE3
if HAVE_SSSE3
libosmocore_la_SOURCES += conv_acc_sse.c
if HAVE_SSE4_1
conv_acc_sse.lo : AM_CFLAGS += -msse3 -msse4.1
conv_acc_sse.lo : AM_CFLAGS += -mssse3 -msse4.1
else
conv_acc_sse.lo : AM_CFLAGS += -msse3
conv_acc_sse.lo : AM_CFLAGS += -mssse3
endif
if HAVE_AVX2
libosmocore_la_SOURCES += conv_acc_sse_avx.c
if HAVE_SSE4_1
conv_acc_sse_avx.lo : AM_CFLAGS += -msse3 -mavx2 -msse4.1
conv_acc_sse_avx.lo : AM_CFLAGS += -mssse3 -mavx2 -msse4.1
else
conv_acc_sse_avx.lo : AM_CFLAGS += -msse3 -mavx2
conv_acc_sse_avx.lo : AM_CFLAGS += -mssse3 -mavx2
endif
endif
endif

View File

@ -48,7 +48,7 @@
static int init_complete = 0;
__attribute__ ((visibility("hidden"))) int avx2_supported = 0;
__attribute__ ((visibility("hidden"))) int sse3_supported = 0;
__attribute__ ((visibility("hidden"))) int ssse3_supported = 0;
__attribute__ ((visibility("hidden"))) int sse41_supported = 0;
/**
@ -75,12 +75,12 @@ void (*osmo_conv_metrics_k7_n4)(const int8_t *seq,
int16_t *osmo_conv_gen_vdec_malloc(size_t n);
void osmo_conv_gen_vdec_free(int16_t *ptr);
#if defined(HAVE_SSE3)
#if defined(HAVE_SSSE3)
int16_t *osmo_conv_sse_vdec_malloc(size_t n);
void osmo_conv_sse_vdec_free(int16_t *ptr);
#endif
#if defined(HAVE_SSE3) && defined(HAVE_AVX2)
#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
int16_t *osmo_conv_sse_avx_vdec_malloc(size_t n);
void osmo_conv_sse_avx_vdec_free(int16_t *ptr);
#endif
@ -99,7 +99,7 @@ void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out,
void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out,
int16_t *sums, int16_t *paths, int norm);
#if defined(HAVE_SSE3)
#if defined(HAVE_SSSE3)
void osmo_conv_sse_metrics_k5_n2(const int8_t *seq, const int16_t *out,
int16_t *sums, int16_t *paths, int norm);
void osmo_conv_sse_metrics_k5_n3(const int8_t *seq, const int16_t *out,
@ -114,7 +114,7 @@ void osmo_conv_sse_metrics_k7_n4(const int8_t *seq, const int16_t *out,
int16_t *sums, int16_t *paths, int norm);
#endif
#if defined(HAVE_SSE3) && defined(HAVE_AVX2)
#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
void osmo_conv_sse_avx_metrics_k5_n2(const int8_t *seq, const int16_t *out,
int16_t *sums, int16_t *paths, int norm);
void osmo_conv_sse_avx_metrics_k5_n3(const int8_t *seq, const int16_t *out,
@ -654,8 +654,8 @@ static void osmo_conv_init(void)
avx2_supported = __builtin_cpu_supports("avx2");
#endif
#ifdef HAVE_SSE3
sse3_supported = __builtin_cpu_supports("sse3");
#ifdef HAVE_SSSE3
ssse3_supported = __builtin_cpu_supports("ssse3");
#endif
#ifdef HAVE_SSE4_1
@ -667,16 +667,16 @@ static void osmo_conv_init(void)
* Usage of curly braces is mandatory,
* because we use multi-line define.
*/
#if defined(HAVE_SSE3) && defined(HAVE_AVX2)
if (sse3_supported && avx2_supported) {
#if defined(HAVE_SSSE3) && defined(HAVE_AVX2)
if (ssse3_supported && avx2_supported) {
INIT_POINTERS(sse_avx);
} else if (sse3_supported) {
} else if (ssse3_supported) {
INIT_POINTERS(sse);
} else {
INIT_POINTERS(gen);
}
#elif defined(HAVE_SSE3)
if (sse3_supported) {
#elif defined(HAVE_SSSE3)
if (ssse3_supported) {
INIT_POINTERS(sse);
} else {
INIT_POINTERS(gen);

View File

@ -1,6 +1,6 @@
/*! \file conv_acc_sse.c
* Accelerated Viterbi decoder implementation
* for architectures with only SSE3 available. */
* for architectures with only SSSE3 available. */
/*
* Copyright (C) 2013, 2014 Thomas Tsou <tom@tsou.cc>
*

View File

@ -1,6 +1,6 @@
/*! \file conv_acc_sse_avx.c
* Accelerated Viterbi decoder implementation
* for architectures with both SSE3 and AVX2 support. */
* for architectures with both SSSE3 and AVX2 support. */
/*
* Copyright (C) 2013, 2014 Thomas Tsou <tom@tsou.cc>
*