diff --git a/Makefile b/Makefile index 3ffd593b..cf94286c 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # ################################################################ # xxHash Makefile -# Copyright (C) 2012-2021 Yann Collet +# Copyright (C) 2012-2024 Yann Collet # # GPL v2 License # @@ -55,6 +55,13 @@ else EXT = endif +# automatically enable runtime vector dispatch on x86/64 targets +detect_x86_arch = $(shell $(CC) -dumpmachine | grep -E 'i[3-6]86|x86_64') +ifneq ($(strip $(call detect_x86_arch)),) + #note: can be overridden at compile time, by setting DISPATCH=0 + DISPATCH ?= 1 +endif + ifeq ($(NODE_JS),1) # Link in unrestricted filesystem support LDFLAGS += -sNODERAWFS diff --git a/README.md b/README.md index 7a2169de..de0ece67 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ The following macros can be set at compilation time to modify `libxxhash`'s beha #### Makefile variables When compiling the Command Line Interface `xxhsum` using `make`, the following environment variables can also be set : -- `DISPATCH=1` : use `xxh_x86dispatch.c`, to automatically select between `scalar`, `sse2`, `avx2` or `avx512` instruction set _at runtime_, depending on local host. This option is only valid for `x86`/`x64` systems. +- `DISPATCH=1` : use `xxh_x86dispatch.c`, select at runtime between `scalar`, `sse2`, `avx2` or `avx512` instruction set. This option is only valid for `x86`/`x64` systems. It is enabled by default when target `x86`/`x64` is detected. It can be forcefully turned off using `DISPATCH=0`. - `XXH_1ST_SPEED_TARGET` : select an initial speed target, expressed in MB/s, for the first speed test in benchmark mode. Benchmark will adjust the target at subsequent iterations, but the first test is made "blindly" by targeting this speed. Currently conservatively set to 10 MB/s, to support very slow (emulated) platforms. - `NODE_JS=1` : When compiling `xxhsum` for Node.js with Emscripten, this links the `NODERAWFS` library for unrestricted filesystem access and patches `isatty` to make the command line utility correctly detect the terminal. This does make the binary specific to Node.js. diff --git a/cli/xsum_arch.c b/cli/xsum_arch.c index 1533f585..3540af48 100644 --- a/cli/xsum_arch.c +++ b/cli/xsum_arch.c @@ -1,6 +1,6 @@ /* * xxhsum - Command line interface for xxhash algorithms - * Copyright (C) 2013-2021 Yann Collet + * Copyright (C) 2013-2024 Yann Collet * * GPL v2 License * @@ -29,8 +29,6 @@ int g_xsumarch_avoid_empty_unit = 0; #if defined(XXHSUM_DISPATCH) #include "../xxh_x86dispatch.h" -#define XXH_INLINE_ALL /* XXH_* vector types */ -#include "../xxhash.h" const char* XSUM_autox86(void) { diff --git a/xxh_x86dispatch.c b/xxh_x86dispatch.c index 430df13e..03e7dc41 100644 --- a/xxh_x86dispatch.c +++ b/xxh_x86dispatch.c @@ -40,9 +40,9 @@ * * Optional add-on. * - * **Compile this file with the default flags for your target.** Do not compile - * with flags like `-mavx*`, `-march=native`, or `/arch:AVX*`, there will be - * an error. See @ref XXH_X86DISPATCH_ALLOW_AVX for details. + * **Compile this file with the default flags for your target.** + * Note that compiling with flags like `-mavx*`, `-march=native`, or `/arch:AVX*` + * will make the resulting binary incompatible with cpus not supporting the requested instruction set. * * @defgroup dispatch x86 Dispatcher * @{ @@ -70,34 +70,6 @@ extern "C" { #endif /*! @endcond */ -/*! - * @def XXH_X86DISPATCH_ALLOW_AVX - * @brief Disables the AVX sanity check. - * - * xxh_x86dispatch.c is intended to be compiled for the minimum target, and - * it selectively enables SSE2, AVX2, and AVX512 when it is needed. - * - * Compiling with options like `-mavx*`, `-march=native`, or `/arch:AVX*` - * _globally_ will always enable this feature, and therefore makes it - * undefined behavior to execute on any CPU without said feature. - * - * Even if the source code isn't directly using AVX intrinsics in a function, - * the compiler can still generate AVX code from autovectorization and by - * "upgrading" SSE2 intrinsics to use the VEX prefixes (a.k.a. AVX128). - * - * Define XXH_X86DISPATCH_ALLOW_AVX to ignore this check, - * thus accepting that the produced binary will not work correctly - * on any CPU with less features than the ones stated at compilation time. - */ -#ifdef XXH_DOXYGEN -# define XXH_X86DISPATCH_ALLOW_AVX -#endif - -#if defined(__AVX__) && !defined(XXH_X86DISPATCH_ALLOW_AVX) -# error "Error: if xxh_x86dispatch.c is compiled with AVX enabled, the resulting binary will crash on sse2-only cpus !! " \ - "If you nonetheless want to do that, please enable the XXH_X86DISPATCH_ALLOW_AVX build variable" -#endif - /*! * @def XXH_DISPATCH_SCALAR * @brief Enables/dispatching the scalar code path. diff --git a/xxh_x86dispatch.h b/xxh_x86dispatch.h index 13679b70..70852215 100644 --- a/xxh_x86dispatch.h +++ b/xxh_x86dispatch.h @@ -1,6 +1,6 @@ /* * xxHash - XXH3 Dispatcher for x86-based targets - * Copyright (C) 2020-2021 Yann Collet + * Copyright (C) 2020-2024 Yann Collet * * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) * diff --git a/xxhash.h b/xxhash.h index cb8d3f58..f7d59276 100644 --- a/xxhash.h +++ b/xxhash.h @@ -1110,6 +1110,23 @@ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const * * The API supports one-shot hashing, streaming mode, and custom secrets. */ + +/*! + * @ingroup tuning + * @brief Possible values for @ref XXH_VECTOR. + * + * Unless set explicitly, determined automatically. + */ +# define XXH_SCALAR 0 /*!< Portable scalar version */ +# define XXH_SSE2 1 /*!< SSE2 for Pentium 4, Opteron, all x86_64. */ +# define XXH_AVX2 2 /*!< AVX2 for Haswell and Bulldozer */ +# define XXH_AVX512 3 /*!< AVX512 for Skylake and Icelake */ +# define XXH_NEON 4 /*!< NEON for most ARMv7-A, all AArch64, and WASM SIMD128 */ +# define XXH_VSX 5 /*!< VSX and ZVector for POWER8/z13 (64-bit) */ +# define XXH_SVE 6 /*!< SVE for some ARMv8-A and ARMv9-A */ +# define XXH_LSX 7 /*!< LSX (128-bit SIMD) for LoongArch64 */ + + /*-********************************************************************** * XXH3 64-bit variant ************************************************************************/ @@ -3853,34 +3870,6 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can * implementation. */ # define XXH_VECTOR XXH_SCALAR -/*! - * @ingroup tuning - * @brief Possible values for @ref XXH_VECTOR. - * - * Note that these are actually implemented as macros. - * - * If this is not defined, it is detected automatically. - * internal macro XXH_X86DISPATCH overrides this. - */ -enum XXH_VECTOR_TYPE /* fake enum */ { - XXH_SCALAR = 0, /*!< Portable scalar version */ - XXH_SSE2 = 1, /*!< - * SSE2 for Pentium 4, Opteron, all x86_64. - * - * @note SSE2 is also guaranteed on Windows 10, macOS, and - * Android x86. - */ - XXH_AVX2 = 2, /*!< AVX2 for Haswell and Bulldozer */ - XXH_AVX512 = 3, /*!< AVX512 for Skylake and Icelake */ - XXH_NEON = 4, /*!< - * NEON for most ARMv7-A, all AArch64, and WASM SIMD128 - * via the SIMDeverywhere polyfill provided with the - * Emscripten SDK. - */ - XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */ - XXH_SVE = 6, /*!< SVE for some ARMv8-A and ARMv9-A */ - XXH_LSX = 7, /*!< LSX (128-bit SIMD) for LoongArch64 */ -}; /*! * @ingroup tuning * @brief Selects the minimum alignment for XXH3's accumulators. @@ -3895,14 +3884,6 @@ enum XXH_VECTOR_TYPE /* fake enum */ { /* Actual definition */ #ifndef XXH_DOXYGEN -# define XXH_SCALAR 0 -# define XXH_SSE2 1 -# define XXH_AVX2 2 -# define XXH_AVX512 3 -# define XXH_NEON 4 -# define XXH_VSX 5 -# define XXH_SVE 6 -# define XXH_LSX 7 #endif #ifndef XXH_VECTOR /* can be defined on command line */