diff --git a/configure.ac b/configure.ac index a8c1d2e35..f7acf05cb 100644 --- a/configure.ac +++ b/configure.ac @@ -281,7 +281,7 @@ then AX_CHECK_SIMD else AM_CONDITIONAL(HAVE_AVX2, false) - AM_CONDITIONAL(HAVE_SSE3, false) + AM_CONDITIONAL(HAVE_SSSE3, false) AM_CONDITIONAL(HAVE_SSE4_1, false) fi diff --git a/m4/ax_check_simd.m4 b/m4/ax_check_simd.m4 index 8a0ceb702..daca2beb9 100644 --- a/m4/ax_check_simd.m4 +++ b/m4/ax_check_simd.m4 @@ -19,7 +19,7 @@ # # And defines: # -# HAVE_AVX3 / HAVE_SSE3 / HAVE_SSE4.1 +# HAVE_AVX3 / HAVE_SSSE3 / HAVE_SSE4.1 # # LICENSE # @@ -42,7 +42,7 @@ AC_DEFUN([AX_CHECK_SIMD], AC_REQUIRE([AC_CANONICAL_HOST]) AM_CONDITIONAL(HAVE_AVX2, false) - AM_CONDITIONAL(HAVE_SSE3, false) + AM_CONDITIONAL(HAVE_SSSE3, false) AM_CONDITIONAL(HAVE_SSE4_1, false) case $host_cpu in @@ -57,14 +57,14 @@ AC_DEFUN([AX_CHECK_SIMD], AC_MSG_WARN([Your compiler does not support AVX2 instructions]) fi - AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, []) - if test x"$ax_cv_support_sse3_ext" = x"yes"; then - SIMD_FLAGS="$SIMD_FLAGS -msse3" - AC_DEFINE(HAVE_SSE3,, - [Support SSE3 (Streaming SIMD Extensions 3) instructions]) - AM_CONDITIONAL(HAVE_SSE3, true) + AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, []) + if test x"$ax_cv_support_ssse3_ext" = x"yes"; then + SIMD_FLAGS="$SIMD_FLAGS -mssse3" + AC_DEFINE(HAVE_SSSE3,, + [Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions]) + AM_CONDITIONAL(HAVE_SSSE3, true) else - AC_MSG_WARN([Your compiler does not support SSE3 instructions]) + AC_MSG_WARN([Your compiler does not support SSSE3 instructions]) fi AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, []) diff --git a/src/Makefile.am b/src/Makefile.am index e7f94cef2..3d6e6f799 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -23,20 +23,20 @@ libosmocore_la_SOURCES = timer.c timer_gettimeofday.c select.c signal.c msgb.c b macaddr.c stat_item.c stats.c stats_statsd.c prim.c \ conv_acc.c conv_acc_generic.c sercomm.c prbs.c -if HAVE_SSE3 +if HAVE_SSSE3 libosmocore_la_SOURCES += conv_acc_sse.c if HAVE_SSE4_1 -conv_acc_sse.lo : AM_CFLAGS += -msse3 -msse4.1 +conv_acc_sse.lo : AM_CFLAGS += -mssse3 -msse4.1 else -conv_acc_sse.lo : AM_CFLAGS += -msse3 +conv_acc_sse.lo : AM_CFLAGS += -mssse3 endif if HAVE_AVX2 libosmocore_la_SOURCES += conv_acc_sse_avx.c if HAVE_SSE4_1 -conv_acc_sse_avx.lo : AM_CFLAGS += -msse3 -mavx2 -msse4.1 +conv_acc_sse_avx.lo : AM_CFLAGS += -mssse3 -mavx2 -msse4.1 else -conv_acc_sse_avx.lo : AM_CFLAGS += -msse3 -mavx2 +conv_acc_sse_avx.lo : AM_CFLAGS += -mssse3 -mavx2 endif endif endif diff --git a/src/conv_acc.c b/src/conv_acc.c index 33fe26491..c16e43643 100644 --- a/src/conv_acc.c +++ b/src/conv_acc.c @@ -48,7 +48,7 @@ static int init_complete = 0; __attribute__ ((visibility("hidden"))) int avx2_supported = 0; -__attribute__ ((visibility("hidden"))) int sse3_supported = 0; +__attribute__ ((visibility("hidden"))) int ssse3_supported = 0; __attribute__ ((visibility("hidden"))) int sse41_supported = 0; /** @@ -75,12 +75,12 @@ void (*osmo_conv_metrics_k7_n4)(const int8_t *seq, int16_t *osmo_conv_gen_vdec_malloc(size_t n); void osmo_conv_gen_vdec_free(int16_t *ptr); -#if defined(HAVE_SSE3) +#if defined(HAVE_SSSE3) int16_t *osmo_conv_sse_vdec_malloc(size_t n); void osmo_conv_sse_vdec_free(int16_t *ptr); #endif -#if defined(HAVE_SSE3) && defined(HAVE_AVX2) +#if defined(HAVE_SSSE3) && defined(HAVE_AVX2) int16_t *osmo_conv_sse_avx_vdec_malloc(size_t n); void osmo_conv_sse_avx_vdec_free(int16_t *ptr); #endif @@ -99,7 +99,7 @@ void osmo_conv_gen_metrics_k7_n3(const int8_t *seq, const int16_t *out, void osmo_conv_gen_metrics_k7_n4(const int8_t *seq, const int16_t *out, int16_t *sums, int16_t *paths, int norm); -#if defined(HAVE_SSE3) +#if defined(HAVE_SSSE3) void osmo_conv_sse_metrics_k5_n2(const int8_t *seq, const int16_t *out, int16_t *sums, int16_t *paths, int norm); void osmo_conv_sse_metrics_k5_n3(const int8_t *seq, const int16_t *out, @@ -114,7 +114,7 @@ void osmo_conv_sse_metrics_k7_n4(const int8_t *seq, const int16_t *out, int16_t *sums, int16_t *paths, int norm); #endif -#if defined(HAVE_SSE3) && defined(HAVE_AVX2) +#if defined(HAVE_SSSE3) && defined(HAVE_AVX2) void osmo_conv_sse_avx_metrics_k5_n2(const int8_t *seq, const int16_t *out, int16_t *sums, int16_t *paths, int norm); void osmo_conv_sse_avx_metrics_k5_n3(const int8_t *seq, const int16_t *out, @@ -654,8 +654,8 @@ static void osmo_conv_init(void) avx2_supported = __builtin_cpu_supports("avx2"); #endif - #ifdef HAVE_SSE3 - sse3_supported = __builtin_cpu_supports("sse3"); + #ifdef HAVE_SSSE3 + ssse3_supported = __builtin_cpu_supports("ssse3"); #endif #ifdef HAVE_SSE4_1 @@ -667,16 +667,16 @@ static void osmo_conv_init(void) * Usage of curly braces is mandatory, * because we use multi-line define. */ -#if defined(HAVE_SSE3) && defined(HAVE_AVX2) - if (sse3_supported && avx2_supported) { +#if defined(HAVE_SSSE3) && defined(HAVE_AVX2) + if (ssse3_supported && avx2_supported) { INIT_POINTERS(sse_avx); - } else if (sse3_supported) { + } else if (ssse3_supported) { INIT_POINTERS(sse); } else { INIT_POINTERS(gen); } -#elif defined(HAVE_SSE3) - if (sse3_supported) { +#elif defined(HAVE_SSSE3) + if (ssse3_supported) { INIT_POINTERS(sse); } else { INIT_POINTERS(gen); diff --git a/src/conv_acc_sse.c b/src/conv_acc_sse.c index a9679ef03..63d8722ac 100644 --- a/src/conv_acc_sse.c +++ b/src/conv_acc_sse.c @@ -1,6 +1,6 @@ /*! \file conv_acc_sse.c * Accelerated Viterbi decoder implementation - * for architectures with only SSE3 available. */ + * for architectures with only SSSE3 available. */ /* * Copyright (C) 2013, 2014 Thomas Tsou * diff --git a/src/conv_acc_sse_avx.c b/src/conv_acc_sse_avx.c index 5b6e7040b..5ac3c163c 100644 --- a/src/conv_acc_sse_avx.c +++ b/src/conv_acc_sse_avx.c @@ -1,6 +1,6 @@ /*! \file conv_acc_sse_avx.c * Accelerated Viterbi decoder implementation - * for architectures with both SSE3 and AVX2 support. */ + * for architectures with both SSSE3 and AVX2 support. */ /* * Copyright (C) 2013, 2014 Thomas Tsou *