diff --git a/CMakeLists.txt b/CMakeLists.txt index 41845d7..823fddd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,15 @@ include(GrVersion) #setup version info ######################################################################## # Compiler specific setup ######################################################################## + + SET(USE_SIMD "no" CACHE STRING "Use SIMD instructions") + SET(USE_SIMD_VALUES "no" "SSE2" "AVX") + SET_PROPERTY(CACHE USE_SIMD PROPERTY STRINGS ${USE_SIMD_VALUES}) + LIST(FIND USE_SIMD_VALUES ${USE_SIMD} USE_SIMD_INDEX) + IF(${USE_SIMD_INDEX} EQUAL -1) + message(FATAL_ERROR "Option ${USE_SIMD} not supported, valid entries are ${USE_SIMD_VALUES}") + ENDIF() + IF(CMAKE_COMPILER_IS_GNUCXX) ADD_DEFINITIONS(-Wall) ADD_DEFINITIONS(-Wextra) @@ -56,6 +65,23 @@ IF(CMAKE_COMPILER_IS_GNUCXX) ADD_DEFINITIONS(-fvisibility=hidden) ADD_DEFINITIONS(-fvisibility-inlines-hidden) ENDIF(NOT WIN32) + IF(USE_SIMD MATCHES SSE2) + ADD_DEFINITIONS(-msse2) + ADD_DEFINITIONS(-DUSE_SSE2) + ENDIF() + IF(USE_SIMD MATCHES AVX) + ADD_DEFINITIONS(-march=native) + ADD_DEFINITIONS(-DUSE_AVX) + ENDIF() +ELSEIF(MSVC) + IF(USE_SIMD MATCHES SSE2) + ADD_DEFINITIONS(/arch:SSE2) + ADD_DEFINITIONS(-DUSE_SSE2) + ENDIF() + IF(USE_SIMD MATCHES AVX) + ADD_DEFINITIONS(/arch:AVX) + ADD_DEFINITIONS(-DUSE_AVX) + ENDIF() ENDIF(CMAKE_COMPILER_IS_GNUCXX) ######################################################################## diff --git a/lib/hackrf/hackrf_sink_c.cc b/lib/hackrf/hackrf_sink_c.cc index 59a4fc7..c76f70f 100644 --- a/lib/hackrf/hackrf_sink_c.cc +++ b/lib/hackrf/hackrf_sink_c.cc @@ -29,6 +29,12 @@ #include #include +#include +#ifdef USE_AVX +#include +#elif USE_SSE2 +#include +#endif #include #include @@ -126,10 +132,10 @@ hackrf_sink_c_sptr make_hackrf_sink_c (const std::string & args) * are connected to this block. In this case, we accept * only 0 input and 1 output. */ -static const int MIN_IN = 1; // mininum number of input streams -static const int MAX_IN = 1; // maximum number of input streams -static const int MIN_OUT = 0; // minimum number of output streams -static const int MAX_OUT = 0; // maximum number of output streams +static const int MIN_IN = 1; // mininum number of input streams +static const int MAX_IN = 1; // maximum number of input streams +static const int MIN_OUT = 0; // minimum number of output streams +static const int MAX_OUT = 0; // maximum number of output streams /* * The private constructor @@ -320,9 +326,74 @@ bool hackrf_sink_c::stop() return ! (bool) hackrf_is_streaming( _dev ); } + +#ifdef USE_AVX +void convert_avx(const float* inbuf, unsigned char* outbuf,const unsigned int count) +{ + __m256 mulme = _mm256_set_ps(127.0f, 127.0f, 127.0f, 127.0f, 127.0f, 127.0f, 127.0f, 127.0f); + __m128i addme = _mm_set_epi16(127, 127, 127, 127, 127, 127, 127, 127); + + for(unsigned int i=0; i BUF_LEN ) { - { - boost::mutex::scoped_lock lock( _buf_mutex ); + unsigned int remaining = (BUF_LEN-_buf_used)/2; //complex - if ( ! cb_push_back( &_cbuf, _buf ) ) { - _buf_used = prev_buf_used; - items_consumed = 0; - std::cerr << "O" << std::flush; - break; - } else { -// std::cerr << "." << std::flush; - } + unsigned int count = std::min((unsigned int)noutput_items,remaining); + unsigned int sse_rem = count/8; // 8 complex = 16f==512bit for avx + unsigned int nosse_rem = count%8; // remainder + +#ifdef USE_AVX + convert_avx((float*)in, buf, sse_rem); + convert_default((float*)(in+sse_rem*8), buf+(sse_rem*8*2), nosse_rem*2); +#elif USE_SSE2 + convert_sse2((float*)in, buf, sse_rem); + convert_default((float*)(in+sse_rem*8), buf+(sse_rem*8*2), nosse_rem*2); +#else + convert_default((float*)in, buf, count*2); +#endif + + _buf_used += (sse_rem*8+nosse_rem)*2; + int items_consumed = sse_rem*8+nosse_rem; + + if(noutput_items >= remaining) { + { + boost::mutex::scoped_lock lock( _buf_mutex ); + + if ( ! cb_push_back( &_cbuf, _buf ) ) { + _buf_used = prev_buf_used; + items_consumed = 0; + std::cerr << "O" << std::flush; + } else { + // std::cerr << "." << std::flush; + _buf_used = 0; } - - _buf_used = 0; - break; } - - *buf++ = (in[i].real() + 1.0) * 127; - *buf++ = (in[i].imag() + 1.0) * 127; - - _buf_used += BYTES_PER_SAMPLE; - items_consumed++; } noutput_items = items_consumed;