Skip to content

Commit c02af98

Browse files
committedFeb 3, 2023
Use AVX2 to accelerate strto{upper,lower} (only on 'AVX2-native' builds for now)
On short strings, there is no difference in performance. However, for strings around 10,000 bytes long, the AVX2-accelerated function is about 55% faster than the SSE2-accelerated one.
1 parent ab87283 commit c02af98

File tree

2 files changed

+44
-1
lines changed

2 files changed

+44
-1
lines changed
 

‎Zend/zend_operators.c

+27-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535
# include <langinfo.h>
3636
#endif
3737

38+
#ifdef ZEND_INTRIN_AVX2_NATIVE
39+
#include <immintrin.h>
40+
#endif
3841
#ifdef __SSE2__
3942
#include <emmintrin.h>
4043
#endif
@@ -54,7 +57,30 @@ static _locale_t current_locale = NULL;
5457

5558
#define TYPE_PAIR(t1,t2) (((t1) << 4) | (t2))
5659

57-
#if __SSE2__
60+
#ifdef ZEND_INTRIN_AVX2_NATIVE
61+
#define HAVE_BLOCKCONV
62+
63+
#define BLOCKCONV_INIT_RANGE(start, end) \
64+
const __m256i blconv_offset = _mm256_set1_epi8((signed char)(SCHAR_MIN - start)); \
65+
const __m256i blconv_threshold = _mm256_set1_epi8(SCHAR_MIN + (end - start) + 1);
66+
67+
#define BLOCKCONV_STRIDE sizeof(__m256i)
68+
69+
#define BLOCKCONV_INIT_DELTA(delta) \
70+
const __m256i blconv_delta = _mm256_set1_epi8(delta);
71+
72+
#define BLOCKCONV_LOAD(input) \
73+
__m256i blconv_operand = _mm256_loadu_si256((__m256i*)(input)); \
74+
__m256i blconv_mask = _mm256_cmpgt_epi8(blconv_threshold, _mm256_add_epi8(blconv_operand, blconv_offset));
75+
76+
#define BLOCKCONV_FOUND() _mm256_movemask_epi8(blconv_mask)
77+
78+
#define BLOCKCONV_STORE(dest) \
79+
__m256i blconv_add = _mm256_and_si256(blconv_mask, blconv_delta); \
80+
__m256i blconv_result = _mm256_add_epi8(blconv_operand, blconv_add); \
81+
_mm256_storeu_si256((__m256i*)(dest), blconv_result);
82+
83+
#elif __SSE2__
5884
#define HAVE_BLOCKCONV
5985

6086
/* Common code for SSE2 accelerated character case conversion */

‎ext/standard/tests/strings/strtoupper1.phpt

+17
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ $strings = array (
2828
"zzzzzzzzzzzzzzzzzzzz",
2929
"````````````````````",
3030
"{{{{{{{{{{{{{{{{{{{{",
31+
/* And the AVX2 implementation also */
32+
"{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{",
33+
"abcdefghijklmnopqrstuvwxyz01234",
34+
"abcdefghijklmnopqrstuvwxyz012345",
35+
"abcdefghijklmnopqrstuvwxyz0123456",
3136
"abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
3237
);
3338

@@ -348,6 +353,18 @@ string(20) "````````````````````"
348353
string(20) "{{{{{{{{{{{{{{{{{{{{"
349354

350355
-- Iteration 12 --
356+
string(40) "{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{"
357+
358+
-- Iteration 13 --
359+
string(31) "ABCDEFGHIJKLMNOPQRSTUVWXYZ01234"
360+
361+
-- Iteration 14 --
362+
string(32) "ABCDEFGHIJKLMNOPQRSTUVWXYZ012345"
363+
364+
-- Iteration 15 --
365+
string(33) "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456"
366+
367+
-- Iteration 16 --
351368
string(62) "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
352369

353370
*** Testing strtoupper() with two different case strings ***

0 commit comments

Comments
 (0)
Please sign in to comment.