Skip to content

Commit 0b7986f

Browse files
committed
Tweak SSE2-accelerated strtoupper() and strtolower() for speed
I learned this trick for doing a faster bounds check with both upper and lower bounds by reading a disassembler listing of optimized code produced by GCC; instead of doing 2 compares to check the upper and the lower bound, add an immediate value to shift the range you are testing for to the far low or high end of the range of possible values for the type in question, and then a single compare will do. Intstead of compare + compare + AND, you just do ADD + compare. From microbenchmarking on my development PC, this makes strtoupper() about 10% faster on long strings (~10,000 bytes).
1 parent 6676f5d commit 0b7986f

File tree

3 files changed

+13
-7
lines changed

3 files changed

+13
-7
lines changed

Zend/zend_operators.c

+5-7
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ static _locale_t current_locale = NULL;
6060
/* Common code for SSE2 accelerated character case conversion */
6161

6262
#define BLOCKCONV_INIT_RANGE(start, end) \
63-
const __m128i blconv_start_minus_1 = _mm_set1_epi8((start) - 1); \
64-
const __m128i blconv_end_plus_1 = _mm_set1_epi8((end) + 1);
63+
const __m128i blconv_offset = _mm_set1_epi8((signed char)(SCHAR_MIN - start)); \
64+
const __m128i blconv_threshold = _mm_set1_epi8(SCHAR_MIN + (end - start) + 1);
6565

6666
#define BLOCKCONV_STRIDE sizeof(__m128i)
6767

@@ -70,14 +70,12 @@ static _locale_t current_locale = NULL;
7070

7171
#define BLOCKCONV_LOAD(input) \
7272
__m128i blconv_operand = _mm_loadu_si128((__m128i*)(input)); \
73-
__m128i blconv_gt = _mm_cmpgt_epi8(blconv_operand, blconv_start_minus_1); \
74-
__m128i blconv_lt = _mm_cmplt_epi8(blconv_operand, blconv_end_plus_1); \
75-
__m128i blconv_mingle = _mm_and_si128(blconv_gt, blconv_lt);
73+
__m128i blconv_mask = _mm_cmplt_epi8(_mm_add_epi8(blconv_operand, blconv_offset), blconv_threshold);
7674

77-
#define BLOCKCONV_FOUND() _mm_movemask_epi8(blconv_mingle)
75+
#define BLOCKCONV_FOUND() _mm_movemask_epi8(blconv_mask)
7876

7977
#define BLOCKCONV_STORE(dest) \
80-
__m128i blconv_add = _mm_and_si128(blconv_mingle, blconv_delta); \
78+
__m128i blconv_add = _mm_and_si128(blconv_mask, blconv_delta); \
8179
__m128i blconv_result = _mm_add_epi8(blconv_operand, blconv_add); \
8280
_mm_storeu_si128((__m128i *)(dest), blconv_result);
8381

ext/standard/tests/strings/strtolower.phpt

+4
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ $strings = array (
3030
"ZZZZZZZZZZZZZZZZZZZZ",
3131
"@@@@@@@@@@@@@@@@@@@@",
3232
"[[[[[[[[[[[[[[[[[[[[",
33+
"abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
3334
);
3435

3536
$count = 0;
@@ -347,6 +348,9 @@ string(20) "@@@@@@@@@@@@@@@@@@@@"
347348
-- Iteration 11 --
348349
string(20) "[[[[[[[[[[[[[[[[[[[["
349350

351+
-- Iteration 12 --
352+
string(62) "abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"
353+
350354
*** Testing strtolower() with two different case strings ***
351355
strings are same, with Case Insensitive
352356
*** Done ***

ext/standard/tests/strings/strtoupper1.phpt

+4
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ $strings = array (
2828
"zzzzzzzzzzzzzzzzzzzz",
2929
"````````````````````",
3030
"{{{{{{{{{{{{{{{{{{{{",
31+
"abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
3132
);
3233

3334
$count = 0;
@@ -346,6 +347,9 @@ string(20) "````````````````````"
346347
-- Iteration 11 --
347348
string(20) "{{{{{{{{{{{{{{{{{{{{"
348349

350+
-- Iteration 12 --
351+
string(62) "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
352+
349353
*** Testing strtoupper() with two different case strings ***
350354
strings are same, with Case Insensitive
351355
*** Done ***

0 commit comments

Comments
 (0)