2014-05-22 21:04:40 +00:00
/* strcspn with SSE4.2 intrinsics
Copyright ( C ) 2009 - 2014 Free Software Foundation , Inc .
Contributed by Intel Corporation .
This file is part of the GNU C Library .
2018-03-09 08:50:04 +00:00
SPDX - License - Identifier : LGPL - 2.1 - or - later
*/
2014-05-22 21:04:40 +00:00
2014-06-09 16:33:41 +00:00
2014-05-22 21:04:40 +00:00
# include "config.h"
2014-06-10 12:47:09 +00:00
# ifdef HAVE_SSE4_2
2014-06-09 19:28:32 +00:00
2014-05-22 21:04:40 +00:00
# include <glib.h>
2015-02-06 18:52:37 +00:00
# include "ws_cpuid.h"
2014-06-09 16:33:41 +00:00
2015-09-02 21:35:37 +00:00
# ifdef _WIN32
2014-05-22 21:04:40 +00:00
# include <tmmintrin.h>
# endif
# include <nmmintrin.h>
# include <string.h>
2014-06-09 15:30:08 +00:00
# include "ws_mempbrk.h"
2015-02-21 20:13:54 +00:00
# include "ws_mempbrk_int.h"
2014-06-09 15:30:08 +00:00
2014-07-02 15:00:15 +00:00
/* __has_feature(address_sanitizer) is used later for Clang, this is for
* compatibility with other compilers ( such as GCC and MSVC ) */
# ifndef __has_feature
# define __has_feature(x) 0
# endif
2014-07-06 01:29:48 +00:00
# define cast_128aligned__m128i(p) ((const __m128i *) (const void *) (p))
2014-05-22 21:04:40 +00:00
/* Helper for variable shifts of SSE registers.
Copyright ( C ) 2010 Free Software Foundation , Inc .
*/
2014-11-04 19:58:48 +00:00
static const gint8 ___m128i_shift_right [ 31 ] =
2014-05-22 21:04:40 +00:00
{
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ,
- 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1
} ;
static inline __m128i
__m128i_shift_right ( __m128i value , unsigned long int offset )
{
/* _mm_loadu_si128() works with unaligned data, cast safe */
return _mm_shuffle_epi8 ( value ,
2014-07-06 01:29:48 +00:00
_mm_loadu_si128 ( cast_128aligned__m128i ( ___m128i_shift_right + offset ) ) ) ;
2014-05-22 21:04:40 +00:00
}
2015-02-06 18:52:37 +00:00
void
2015-02-21 20:39:00 +00:00
ws_mempbrk_sse42_compile ( ws_mempbrk_pattern * pattern , const gchar * needles )
2015-02-06 18:52:37 +00:00
{
size_t length = strlen ( needles ) ;
pattern - > use_sse42 = ws_cpuid_sse42 ( ) & & ( length < = 16 ) ;
if ( pattern - > use_sse42 ) {
2015-02-12 08:07:02 +00:00
pattern - > mask = _mm_setzero_si128 ( ) ;
memcpy ( & ( pattern - > mask ) , needles , length ) ;
2015-02-06 18:52:37 +00:00
}
}
2014-05-22 21:04:40 +00:00
/* We use 0x2:
2014-06-09 17:28:02 +00:00
_SIDD_SBYTE_OPS
| _SIDD_CMP_EQUAL_ANY
| _SIDD_POSITIVE_POLARITY
| _SIDD_LEAST_SIGNIFICANT
2014-05-22 21:04:40 +00:00
on pcmpistri to compare xmm / mem128
0 1 2 3 4 5 6 7 8 9 A B C D E F
X X X X X X X X X X X X X X X X
against xmm
0 1 2 3 4 5 6 7 8 9 A B C D E F
A A A A A A A A A A A A A A A A
to find out if the first 16 byte data element has any byte A and
the offset of the first byte . There are 3 cases :
1. The first 16 byte data element has the byte A at the offset X .
2. The first 16 byte data element has EOS and doesn ' t have the byte A .
3. The first 16 byte data element is valid and doesn ' t have the byte A .
Here is the table of ECX , CFlag , ZFlag and SFlag for 2 cases :
2014-06-09 17:28:02 +00:00
1 X 1 0 / 1 0
2 16 0 1 0
3 16 0 0 0
2014-05-22 21:04:40 +00:00
We exit from the loop for cases 1 and 2 with jbe which branches
when either CFlag or ZFlag is 1. If CFlag = = 1 , ECX has the offset
X for case 1. */
const char *
2019-01-04 08:48:20 +00:00
ws_mempbrk_sse42_exec ( const char * haystack , size_t haystacklen , const ws_mempbrk_pattern * pattern , guchar * found_needle )
2014-05-22 21:04:40 +00:00
{
const char * aligned ;
int offset ;
2019-01-04 08:48:20 +00:00
offset = ( int ) ( ( size_t ) haystack & 15 ) ;
aligned = ( const char * ) ( ( size_t ) haystack & - 16L ) ;
2014-05-22 21:04:40 +00:00
if ( offset ! = 0 )
{
/* Check partial string. cast safe it's 16B aligned */
2014-07-06 01:29:48 +00:00
__m128i value = __m128i_shift_right ( _mm_load_si128 ( cast_128aligned__m128i ( aligned ) ) , offset ) ;
2014-05-22 21:04:40 +00:00
2015-02-12 08:07:02 +00:00
int length = _mm_cmpistri ( pattern - > mask , value , 0x2 ) ;
2014-05-22 21:04:40 +00:00
/* No need to check ZFlag since ZFlag is always 1. */
2015-02-12 08:07:02 +00:00
int cflag = _mm_cmpistrc ( pattern - > mask , value , 0x2 ) ;
2015-02-06 18:52:37 +00:00
/* XXX: why does this compare value with value? */
2014-06-09 13:32:59 +00:00
int idx = _mm_cmpistri ( value , value , 0x3a ) ;
2014-05-22 21:04:40 +00:00
2015-02-06 18:52:37 +00:00
if ( cflag ) {
if ( found_needle )
2019-01-04 08:48:20 +00:00
* found_needle = * ( haystack + length ) ;
return haystack + length ;
2015-02-06 18:52:37 +00:00
}
2014-05-22 21:04:40 +00:00
/* Find where the NULL terminator is. */
2014-06-09 13:32:59 +00:00
if ( idx < 16 - offset )
2014-05-22 21:04:40 +00:00
{
2015-02-06 18:52:37 +00:00
/* found NUL @ 'idx', need to switch to slower mempbrk */
2020-10-10 23:42:05 +00:00
return ws_mempbrk_portable_exec ( haystack + idx + 1 , haystacklen - idx - 1 , pattern , found_needle ) ; /* haystacklen is bigger than 16 & idx < 16 so no underflow here */
2014-05-22 21:04:40 +00:00
}
aligned + = 16 ;
2019-01-04 08:48:20 +00:00
haystacklen - = ( 16 - offset ) ;
2014-05-22 21:04:40 +00:00
}
else
2019-01-04 08:48:20 +00:00
aligned = haystack ;
2014-05-22 21:04:40 +00:00
2019-01-04 08:48:20 +00:00
while ( haystacklen > = 16 )
2014-05-22 21:04:40 +00:00
{
2014-07-06 01:29:48 +00:00
__m128i value = _mm_load_si128 ( cast_128aligned__m128i ( aligned ) ) ;
2015-02-12 08:07:02 +00:00
int idx = _mm_cmpistri ( pattern - > mask , value , 0x2 ) ;
int cflag = _mm_cmpistrc ( pattern - > mask , value , 0x2 ) ;
int zflag = _mm_cmpistrz ( pattern - > mask , value , 0x2 ) ;
2014-05-22 21:04:40 +00:00
2015-02-06 18:52:37 +00:00
if ( cflag ) {
if ( found_needle )
* found_needle = * ( aligned + idx ) ;
2014-06-09 17:28:02 +00:00
return aligned + idx ;
2015-02-06 18:52:37 +00:00
}
2014-05-22 21:04:40 +00:00
if ( zflag )
{
/* found NUL, need to switch to slower mempbrk */
2019-01-04 08:48:20 +00:00
return ws_mempbrk_portable_exec ( aligned , haystacklen , pattern , found_needle ) ;
2014-05-22 21:04:40 +00:00
}
aligned + = 16 ;
2019-01-04 08:48:20 +00:00
haystacklen - = 16 ;
2014-05-22 21:04:40 +00:00
}
/* XXX, use mempbrk_slow here? */
2019-01-04 08:48:20 +00:00
return ws_mempbrk_portable_exec ( aligned , haystacklen , pattern , found_needle ) ;
2014-05-22 21:04:40 +00:00
}
2014-06-09 15:30:08 +00:00
2014-06-10 12:47:09 +00:00
# endif /* HAVE_SSE4_2 */
2014-06-09 17:28:02 +00:00
/*
* Editor modelines
*
* Local Variables :
* c - basic - offset : 2
* tab - width : 8
* indent - tabs - mode : nil
* End :
*
* ex : set shiftwidth = 2 tabstop = 8 expandtab :
* : indentSize = 2 : tabSize = 8 : noTabs = true :
*/