diff options
author | Nathan Bossart | 2025-03-28 21:20:20 +0000 |
---|---|---|
committer | Nathan Bossart | 2025-03-28 21:20:20 +0000 |
commit | 519338ace410d9b1ffb13176b8802b0307ff0531 (patch) | |
tree | cef689c0b92e9678b1b5cf0110b0ba3a37c8ebe0 /configure.ac | |
parent | 3c8e463b0d885e0d976f6a13a1fb78187b25c86f (diff) |
Optimize popcount functions with ARM SVE intrinsics.
This commit introduces SVE implementations of pg_popcount{32,64}.
Unlike the Neon versions, we need an additional configure-time
check to determine if the compiler supports SVE intrinsics, and we
need a runtime check to determine if the current CPU supports SVE
instructions. Our testing showed that the SVE implementations are
much faster for larger inputs and are comparable to the status
quo for smaller inputs.
Author: "[email protected]" <[email protected]>
Co-authored-by: "[email protected]" <[email protected]>
Co-authored-by: "Malladi, Rama" <[email protected]>
Reviewed-by: John Naylor <[email protected]>
Reviewed-by: Kirill Reshke <[email protected]>
Discussion: https://postgr.es/m/010101936e4aaa70-b474ab9e-b9ce-474d-a3ba-a3dc223d295c-000000%40us-west-2.amazonses.com
Discussion: https://postgr.es/m/OSZPR01MB84990A9A02A3515C6E85A65B8B2A2%40OSZPR01MB8499.jpnprd01.prod.outlook.com
Diffstat (limited to 'configure.ac')
-rw-r--r-- | configure.ac | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/configure.ac b/configure.ac index 537e654e7b3..25cdfcf65af 100644 --- a/configure.ac +++ b/configure.ac @@ -2070,6 +2070,15 @@ if test x"$host_cpu" = x"x86_64"; then fi fi +# Check for SVE popcount intrinsics +# +if test x"$host_cpu" = x"aarch64"; then + PGAC_SVE_POPCNT_INTRINSICS() + if test x"$pgac_sve_popcnt_intrinsics" = x"yes"; then + AC_DEFINE(USE_SVE_POPCNT_WITH_RUNTIME_CHECK, 1, [Define to 1 to use SVE popcount instructions with a runtime check.]) + fi +fi + # Check for Intel SSE 4.2 intrinsics to do CRC calculations. # PGAC_SSE42_CRC32_INTRINSICS() |