diff options
| author | Heikki Linnakangas | 2015-04-14 14:05:03 +0000 |
|---|---|---|
| committer | Heikki Linnakangas | 2015-04-14 14:05:03 +0000 |
| commit | 3dc2d62d0486325bf263655c2d9a96aee0b02abe (patch) | |
| tree | 47336185d9126f14d8a3943503706023d05fe4b7 /src/include | |
| parent | 4f700bcd20c087f60346cb8aefd0e269be8e2157 (diff) | |
Use Intel SSE 4.2 CRC instructions where available.
Modern x86 and x86-64 processors with SSE 4.2 support have special
instructions, crc32b and crc32q, for calculating CRC-32C. They greatly
speed up CRC calculation.
Whether the instructions can be used or not depends on the compiler and the
target architecture. If generation of SSE 4.2 instructions is allowed for
the target (-msse4.2 flag on gcc and clang), use them. If they are not
allowed by default, but the compiler supports the -msse4.2 flag to enable
them, compile just the CRC-32C function with -msse4.2 flag, and check at
runtime whether the processor we're running on supports it. If it doesn't,
fall back to the slicing-by-8 algorithm. (With the common defaults on
current operating systems, the runtime-check variant is what you get in
practice.)
Abhijit Menon-Sen, heavily modified by me, reviewed by Andres Freund.
Diffstat (limited to 'src/include')
| -rw-r--r-- | src/include/pg_config.h.in | 15 | ||||
| -rw-r--r-- | src/include/pg_config.h.win32 | 23 | ||||
| -rw-r--r-- | src/include/port/pg_crc32c.h | 44 |
3 files changed, 80 insertions, 2 deletions
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 202c51a34a5..5688f750af9 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -675,6 +675,12 @@ /* Define to 1 if your compiler understands __builtin_unreachable. */ #undef HAVE__BUILTIN_UNREACHABLE +/* Define to 1 if you have __cpuid. */ +#undef HAVE__CPUID + +/* Define to 1 if you have __get_cpuid. */ +#undef HAVE__GET_CPUID + /* Define to 1 if your compiler understands _Static_assert. */ #undef HAVE__STATIC_ASSERT @@ -818,6 +824,15 @@ /* Use replacement snprintf() functions. */ #undef USE_REPL_SNPRINTF +/* Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check. */ +#undef USE_SLICING_BY_8_CRC32C + +/* Define to 1 use Intel SSE 4.2 CRC instructions. */ +#undef USE_SSE42_CRC32C + +/* Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check. */ +#undef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK + /* Define to select SysV-style semaphores. */ #undef USE_SYSV_SEMAPHORES diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32 index 1baf64f0056..d9fa711ab57 100644 --- a/src/include/pg_config.h.win32 +++ b/src/include/pg_config.h.win32 @@ -6,8 +6,8 @@ * * HAVE_CBRT, HAVE_FUNCNAME_FUNC, HAVE_GETOPT, HAVE_GETOPT_H, HAVE_INTTYPES_H, * HAVE_GETOPT_LONG, HAVE_LOCALE_T, HAVE_RINT, HAVE_STRINGS_H, HAVE_STRTOLL, - * HAVE_STRTOULL, HAVE_STRUCT_OPTION, ENABLE_THREAD_SAFETY, - * PG_USE_INLINE, inline + * HAVE_STRTOULL, HAVE_STRUCT_OPTION, ENABLE_THREAD_SAFETY, PG_USE_INLINE, + * inline, USE_SSE42_CRC32C_WITH_RUNTIME_CHECK */ /* Define to the type of arg 1 of 'accept' */ @@ -529,6 +529,12 @@ /* Define to 1 if your compiler understands __builtin_unreachable. */ /* #undef HAVE__BUILTIN_UNREACHABLE */ +/* Define to 1 if you have __cpuid. */ +#define HAVE__CPUID 1 + +/* Define to 1 if you have __get_cpuid. */ +#undef HAVE__GET_CPUID + /* Define to 1 if your compiler understands _Static_assert. */ /* #undef HAVE__STATIC_ASSERT */ @@ -639,6 +645,19 @@ /* Use replacement snprintf() functions. */ #define USE_REPL_SNPRINTF 1 +/* Define to 1 to use Intel SSE 4.2 CRC instructions with a runtime check. */ +#if (_MSC_VER < 1500) +#define USE_SLICING_BY_8_CRC32C 1 +#end + +/* Define to 1 use Intel SSE 4.2 CRC instructions. */ +/* #undef USE_SSE42_CRC32C */ + +/* Define to 1 to use Intel SSSE 4.2 CRC instructions with a runtime check. */ +#if (_MSC_VER >= 1500) +#define USE_SSE42_CRC32C_WITH_RUNTIME_CHECK +#endif + /* Define to select SysV-style semaphores. */ /* #undef USE_SYSV_SEMAPHORES */ diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index d07c0cb623d..b14d194fb33 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -3,6 +3,25 @@ * pg_crc32c.h * Routines for computing CRC-32C checksums. * + * The speed of CRC-32C calculation has a big impact on performance, so we + * jump through some hoops to get the best implementation for each + * platform. Some CPU architectures have special instructions for speeding + * up CRC calculations (e.g. Intel SSE 4.2), on other platforms we use the + * Slicing-by-8 algorithm which uses lookup tables. + * + * The public interface consists of four macros: + * + * INIT_CRC32C(crc) + * Initialize a CRC accumulator + * + * COMP_CRC32C(crc, data, len) + * Accumulate some (more) bytes into a CRC + * + * FIN_CRC32C(crc) + * Finish a CRC calculation + * + * EQ_CRC32C(c1, c2) + * Check for equality of two CRCs. * * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -16,9 +35,32 @@ typedef uint32 pg_crc32c; +/* The INIT and EQ macros are the same for all implementations. */ #define INIT_CRC32C(crc) ((crc) = 0xFFFFFFFF) #define EQ_CRC32C(c1, c2) ((c1) == (c2)) +#if defined(USE_SSE42_CRC32C) +/* Use SSE4.2 instructions. */ +#define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c_sse42((crc), (data), (len))) +#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) + +extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); + +#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) +/* + * Use SSE4.2 instructions, but perform a runtime check first to check that + * they are available. + */ +#define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c((crc), (data), (len))) +#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) + +extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); +extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); +extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); + +#else /* * Use slicing-by-8 algorithm. * @@ -46,4 +88,6 @@ typedef uint32 pg_crc32c; extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); +#endif + #endif /* PG_CRC32C_H */ |
