From a230bd80e8e596f69323c8e8c108dbb80fa3c683 Mon Sep 17 00:00:00 2001 From: Filip Krikava Date: Tue, 3 Dec 2024 14:11:56 +0000 Subject: [PATCH] Fixes --- .gitignore | 2 + .idea/misc.xml | 2 +- client/rsh/Makefile | 11 +- client/rsh/inst/xxhash.hpp | 2226 ----------------- client/rsh/src/Makevars | 8 +- client/rsh/src/client.cpp | 125 +- client/rsh/src/client.hpp | 28 +- client/rsh/src/compiler.cpp | 76 +- client/rsh/src/compiler.hpp | 2 +- client/rsh/src/init.cpp | 2 +- client/rsh/test6.R | 44 - external/R | 2 +- .../org/prlprg/server/CompileService.java | 5 +- 13 files changed, 133 insertions(+), 2400 deletions(-) delete mode 100644 client/rsh/inst/xxhash.hpp delete mode 100644 client/rsh/test6.R diff --git a/.gitignore b/.gitignore index 76049fcd..13213824 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,5 @@ compile_commands.json ### IntelliJ IDEA ### /.idea/uiDesigner.xml +.idea/modules.xml +.idea/workspace.iml diff --git a/.idea/misc.xml b/.idea/misc.xml index 09665c35..953ce19e 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -57,7 +57,7 @@ - + \ No newline at end of file diff --git a/client/rsh/Makefile b/client/rsh/Makefile index 7b1d039c..8d4ccc30 100644 --- a/client/rsh/Makefile +++ b/client/rsh/Makefile @@ -28,12 +28,12 @@ build: install: $(LLVM_R) CMD INSTALL --install-tests --use-LTO . -.PHONY: dependencies -dependencies: +.PHONY: setup +setup: $(R) -e 'install.packages(c("microbenchmark"), repos="https://cloud.r-project.org", Ncpus=4)' -.PHONY: setup -setup: dependencies +.PHONY: compile_commands.json +compile_commands.json: $(BEAR) -- $(MAKE) clean install TEST_DIR = $(RSH_HOME)/tests @@ -82,6 +82,3 @@ benchmark: echo "$(TXT_RED)FAIL$(NO_NORM) $$exit$(TXT_NORM)"; \ fi; \ done - -test6: - $(LLVM_R) -f test6.R \ No newline at end of file diff --git a/client/rsh/inst/xxhash.hpp b/client/rsh/inst/xxhash.hpp deleted file mode 100644 index 88645484..00000000 --- a/client/rsh/inst/xxhash.hpp +++ /dev/null @@ -1,2226 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include - -/* -xxHash - Extremely Fast Hash algorithm -Header File -Copyright (C) 2012-2024, Yann Collet. -Copyright (C) 2017-2024, Red Gavin. -All rights reserved. - -BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: -* Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -You can contact the author at : -- xxHash source repository : https://github.com/Cyan4973/xxHash -- xxHash C++ port repository : https://github.com/RedSpah/xxhash_cpp -*/ - -/* Intrinsics -* Sadly has to be included in the global namespace or literally everything breaks -*/ -#if (defined(__ARM_NEON) && defined(__APPLE__)) -#include "sse2neon.h" -#else -#include -#endif - -namespace xxh -{ - /* ************************************* - * Versioning - ***************************************/ - - namespace version - { - constexpr int cpp_version_major = 0; - constexpr int cpp_version_minor = 8; - constexpr int cpp_version_release = 1; - } - - constexpr uint32_t version_number() - { - return version::cpp_version_major * 10000 + version::cpp_version_minor * 100 + version::cpp_version_release; - } - - - /* ************************************* - * Basic Types - Predefining uint128_t for intrin - ***************************************/ - - namespace typedefs - { - struct alignas(16) uint128_t - { - uint64_t low64 = 0; - uint64_t high64 = 0; - - bool operator==(const uint128_t & other) - { - return (low64 == other.low64 && high64 == other.high64); - } - - bool operator>(const uint128_t & other) - { - return (high64 > other.high64 || low64 > other.low64); - } - - bool operator>=(const uint128_t & other) - { - return (*this > other || *this == other); - } - - bool operator<(const uint128_t & other) - { - return !(*this >= other); - } - - bool operator<=(const uint128_t & other) - { - return !(*this > other); - } - - bool operator!=(const uint128_t & other) - { - return !(*this == other); - } - - uint128_t(uint64_t low, uint64_t high) : low64(low), high64(high) {} - - uint128_t() {} - }; - - } - - using uint128_t = typedefs::uint128_t; - - - /* ************************************* - * Compiler / Platform Specific Features - ***************************************/ - - namespace intrin - { - /*!XXH_CPU_LITTLE_ENDIAN : - * This is a CPU endian detection macro, will be - * automatically set to 1 (little endian) if it is left undefined. - * If compiling for a big endian system (why), XXH_CPU_LITTLE_ENDIAN has to be explicitly defined as 0. - */ -#ifndef XXH_CPU_LITTLE_ENDIAN -# define XXH_CPU_LITTLE_ENDIAN 1 -#endif - - - /* Vectorization Detection - * NOTE: XXH_NEON and XXH_VSX aren't supported in this C++ port. - * The primary reason is that I don't have access to an ARM and PowerPC - * machines to test them, and the secondary reason is that I even doubt anyone writing - * code for such machines would bother using a C++ port rather than the original C version. - */ -#ifndef XXH_VECTOR /* can be predefined on command line */ -# if defined(__AVX512F__) -# define XXH_VECTOR 3 /* AVX512 for Skylake and Icelake */ -# elif defined(__AVX2__) -# define XXH_VECTOR 2 /* AVX2 for Haswell and Bulldozer */ -# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) -# define XXH_VECTOR 1 /* SSE2 for Pentium 4 and all x86_64 */ -# else -# define XXH_VECTOR 0 /* Portable scalar version */ -# endif -#endif - - constexpr int vector_mode = XXH_VECTOR; - -#if XXH_VECTOR == 3 /* AVX512 for Skylake and Icelake */ - constexpr int acc_align = 64; - using avx512_underlying = __m512i; - using avx2_underlying = __m256i; - using sse2_underlying = __m128i; -#elif XXH_VECTOR == 2 /* AVX2 for Haswell and Bulldozer */ - constexpr int acc_align = 32; - using avx512_underlying = void; - using avx2_underlying = __m256i; - using sse2_underlying = __m128i; -#elif XXH_VECTOR == 1 /* SSE2 for Pentium 4 and all x86_64 */ - using avx512_underlying = void; - using avx2_underlying = void; //std::array<__m128i, 2>; - using sse2_underlying = __m128i; - constexpr int acc_align = 16; -#else /* Portable scalar version */ - using avx512_underlying = void; - using avx2_underlying = void; //std::array; - using sse2_underlying = void; //std::array; - constexpr int acc_align = 8; -#endif - - - /* Compiler Specifics - * Defines inline macros and includes specific compiler's instrinsics. - * */ -#ifdef XXH_FORCE_INLINE /* First undefining the symbols in case they're already defined */ -# undef XXH_FORCE_INLINE -#endif -#ifdef XXH_NO_INLINE -# undef XXH_NO_INLINE -#endif - -#ifdef _MSC_VER /* Visual Studio */ -# pragma warning(disable : 4127) -# define XXH_FORCE_INLINE static __forceinline -# define XXH_NO_INLINE static __declspec(noinline) -# include -#elif defined(__GNUC__) /* Clang / GCC */ -# define XXH_FORCE_INLINE static inline __attribute__((always_inline)) -# define XXH_NO_INLINE static __attribute__((noinline)) -#if (defined(__ARM_NEON) && defined(__APPLE__)) -# include "sse2neon.h" -# else -# include -# endif -#else -# define XXH_FORCE_INLINE static inline -# define XXH_NO_INLINE static -#endif - - - /* Prefetch - * Can be disabled by defining XXH_NO_PREFETCH - */ -#if defined(XXH_NO_PREFETCH) - XXH_FORCE_INLINE void prefetch(const void* ptr) {} -#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) - XXH_FORCE_INLINE void prefetch(const void* ptr) { _mm_prefetch((const char*)(ptr), _MM_HINT_T0); } -#elif defined(__GNUC__) - XXH_FORCE_INLINE void prefetch(const void* ptr) { __builtin_prefetch((ptr), 0, 3); } -#else - XXH_FORCE_INLINE void prefetch(const void* ptr) {} -#endif - - - /* Restrict - * Defines macro for restrict, which in C++ is sadly just a compiler extension (for now). - * Can be disabled by defining XXH_NO_RESTRICT - */ -#ifdef XXH_RESTRICT -# undef XXH_RESTRICT -#endif - -#if (defined(__GNUC__) || defined(_MSC_VER)) && defined(__cplusplus) && !defined(XXH_NO_RESTRICT) -# define XXH_RESTRICT __restrict -#else -# define XXH_RESTRICT -#endif - - - /* Likely / Unlikely - * Defines macros for Likely / Unlikely, which are official in C++20, but sadly this library aims the previous standard. - * Not present on MSVC. - * Can be disabled by defining XXH_NO_BRANCH_HINTS - */ -#if ((defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)) && !defined(XXH_NO_BRANCH_HINTS) -# define XXH_likely(x) __builtin_expect(x, 1) -# define XXH_unlikely(x) __builtin_expect(x, 0) -#else -# define XXH_likely(x) (x) -# define XXH_unlikely(x) (x) -#endif - - - namespace bit_ops - { -#if defined(_MSC_VER) - static inline uint32_t rotl32(uint32_t x, int32_t r) { return _rotl(x, r); } - static inline uint64_t rotl64(uint64_t x, int32_t r) { return _rotl64(x, r); } - static inline uint32_t rotr32(uint32_t x, int32_t r) { return _rotr(x, r); } - static inline uint64_t rotr64(uint64_t x, int32_t r) { return _rotr64(x, r); } -#else - static inline uint32_t rotl32(uint32_t x, int32_t r) { return ((x << r) | (x >> (32 - r))); } - static inline uint64_t rotl64(uint64_t x, int32_t r) { return ((x << r) | (x >> (64 - r))); } - static inline uint32_t rotr32(uint32_t x, int32_t r) { return ((x >> r) | (x << (32 - r))); } - static inline uint64_t rotr64(uint64_t x, int32_t r) { return ((x >> r) | (x << (64 - r))); } -#endif - - -#if defined(_MSC_VER) /* Visual Studio */ - static inline uint32_t swap32(uint32_t x) { return _byteswap_ulong(x); } - static inline uint64_t swap64(uint64_t x) { return _byteswap_uint64(x); } -#elif defined(__GNUC__) - static inline uint32_t swap32(uint32_t x) { return __builtin_bswap32(x); } - static inline uint64_t swap64(uint64_t x) { return __builtin_bswap64(x); } -#else - static inline uint32_t swap32(uint32_t x) { return ((x << 24) & 0xff000000) | ((x << 8) & 0x00ff0000) | ((x >> 8) & 0x0000ff00) | ((x >> 24) & 0x000000ff); } - static inline uint64_t swap64(uint64_t x) { return ((x << 56) & 0xff00000000000000ULL) | ((x << 40) & 0x00ff000000000000ULL) | ((x << 24) & 0x0000ff0000000000ULL) | ((x << 8) & 0x000000ff00000000ULL) | ((x >> 8) & 0x00000000ff000000ULL) | ((x >> 24) & 0x0000000000ff0000ULL) | ((x >> 40) & 0x000000000000ff00ULL) | ((x >> 56) & 0x00000000000000ffULL); } -#endif - - -#if defined(_MSC_VER) && defined(_M_IX86) // Only for 32-bit MSVC. - XXH_FORCE_INLINE uint64_t mult32to64(uint32_t x, uint32_t y) { return __emulu(x, y); } -#else - XXH_FORCE_INLINE uint64_t mult32to64(uint32_t x, uint32_t y) { return (uint64_t)(uint32_t)(x) * (uint64_t)(uint32_t)(y); } -#endif - - -#if defined(__GNUC__) && !defined(__clang__) && defined(__i386__) - __attribute__((__target__("no-sse"))) -#endif - static inline uint128_t mult64to128(uint64_t lhs, uint64_t rhs) - { - -#if defined(__GNUC__) && !defined(__wasm__) \ - && defined(__SIZEOF_INT128__) \ - || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - - __uint128_t product = (__uint128_t)lhs * (__uint128_t)rhs; - uint128_t r128; - r128.low64 = (uint64_t)(product); - r128.high64 = (uint64_t)(product >> 64); - return r128; - -#elif defined(_M_X64) || defined(_M_IA64) - -#ifndef _MSC_VER -# pragma intrinsic(_umul128) -#endif - uint64_t product_high; - uint64_t const product_low = _umul128(lhs, rhs, &product_high); - uint128_t r128; - r128.low64 = product_low; - r128.high64 = product_high; - return r128; - -#else - uint64_t const lo_lo = bit_ops::mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF); - uint64_t const hi_lo = bit_ops::mult32to64(lhs >> 32, rhs & 0xFFFFFFFF); - uint64_t const lo_hi = bit_ops::mult32to64(lhs & 0xFFFFFFFF, rhs >> 32); - uint64_t const hi_hi = bit_ops::mult32to64(lhs >> 32, rhs >> 32); - - /* Now add the products together. These will never overflow. */ - uint64_t const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; - uint64_t const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; - uint64_t const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); - - uint128_t r128; - r128.low64 = lower; - r128.high64 = upper; - return r128; -#endif - } - } - } - - - /* ************************************* - * Basic Types - Everything else - ***************************************/ - - namespace typedefs - { - /* ************************************* - * Basic Types - Detail - ***************************************/ - - template - struct hash_type - { - using type = void; - }; - - template <> - struct hash_type<32> - { - using type = uint32_t; - }; - - template <> - struct hash_type<64> - { - using type = uint64_t; - }; - - template <> - struct hash_type<128> - { - using type = uint128_t; - }; - - - template - struct vec_type - { - using type = void; - }; - - template <> - struct vec_type<64> - { - using type = uint64_t; - }; - - template <> - struct vec_type<128> - { - using type = intrin::sse2_underlying; - }; - - template <> - struct vec_type<256> - { - using type = intrin::avx2_underlying; - }; - - template <> - struct vec_type<512> - { - using type = intrin::avx512_underlying; - }; - - /* Rationale - * On the surface level uint_type appears to be pointless, - * as it is just a copy of hash_type. They do use the same types, - * that is true, but the reasoning for the difference is aimed at humans, - * not the compiler, as a difference between values that are 'just' numbers, - * and those that represent actual hash values. - */ - template - struct uint_type - { - using type = void; - }; - - template <> - struct uint_type<32> - { - using type = uint32_t; - }; - - template <> - struct uint_type<64> - { - using type = uint64_t; - }; - - template <> - struct uint_type<128> - { - using type = uint128_t; - }; - } - - template - using hash_t = typename typedefs::hash_type::type; - using hash32_t = hash_t<32>; - using hash64_t = hash_t<64>; - using hash128_t = hash_t<128>; - - template - using vec_t = typename typedefs::vec_type::type; - using vec64_t = vec_t<64>; - using vec128_t = vec_t<128>; - using vec256_t = vec_t<256>; - using vec512_t = vec_t<512>; - - template - using uint_t = typename typedefs::uint_type::type; - - - - /* ************************************* - * Bit Operations - ***************************************/ - - namespace bit_ops - { - /* **************************************** - * Bit Operations - ******************************************/ - - template - static inline uint_t rotl(uint_t n, int32_t r) - { - if constexpr (N == 32) - { - return intrin::bit_ops::rotl32(n, r); - } - - if constexpr (N == 64) - { - return intrin::bit_ops::rotl64(n, r); - } - } - - template - static inline uint_t rotr(uint_t n, int32_t r) - { - if constexpr (N == 32) - { - return intrin::bit_ops::rotr32(n, r); - } - - if constexpr (N == 64) - { - return intrin::bit_ops::rotr64(n, r); - } - } - - template - static inline uint_t swap(uint_t n) - { - if constexpr (N == 32) - { - return intrin::bit_ops::swap32(n); - } - - if constexpr (N == 64) - { - return intrin::bit_ops::swap64(n); - } - } - - template - static inline vec_t mul32to64(vec_t x, vec_t y) - { - if constexpr (N == 64) - { - return intrin::bit_ops::mult32to64(static_cast(x), static_cast(y)); - } - else - { - return 0; - } - } - - static inline uint128_t mul64to128(uint64_t x, uint64_t y) - { - return intrin::bit_ops::mult64to128(x, y); - } - - static inline uint64_t mul128fold64(uint64_t x, uint64_t y) - { - uint128_t product = mul64to128(x, y); - - return (product.low64 ^ product.high64); - } - } - - - /* ************************************* - * Memory Functions - ***************************************/ - - namespace mem_ops - { - - /* ************************************* - * Endianness - ***************************************/ - - constexpr bool is_little_endian() - { - return (XXH_CPU_LITTLE_ENDIAN == 1); - } - - - /* ************************************* - * Memory Access - ***************************************/ - - template - static inline uint_t read(const void* memPtr) - { - uint_t val; - - memcpy(&val, memPtr, sizeof(val)); - return val; - } - - template - static inline uint_t readLE(const void* ptr) - { - if constexpr (is_little_endian()) - { - return read(ptr); - } - else - { - return bit_ops::swap(read(ptr)); - } - } - - template - static inline uint_t readBE(const void* ptr) - { - if constexpr (is_little_endian()) - { - return bit_ops::swap(read(ptr)); - } - else - { - return read(ptr); - } - } - - template - static void writeLE(void* dst, uint_t v) - { - if constexpr (!is_little_endian()) - { - v = bit_ops::swap(v); - } - - memcpy(dst, &v, sizeof(v)); - } - } - - - /* ************************************* - * Vector Functions - ***************************************/ - - namespace vec_ops - { - template - XXH_FORCE_INLINE vec_t loadu(const vec_t* input) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid template argument passed to xxh::vec_ops::loadu"); - - if constexpr (N == 128) - { - return _mm_loadu_si128(input); - } - - if constexpr (N == 256) - { - return _mm256_loadu_si256(input); - } - - if constexpr (N == 512) - { - return _mm512_loadu_si512(input); - } - - if constexpr (N == 64) - { - return mem_ops::readLE<64>(input); - } - - } - - - // 'xorv' instead of 'xor' because 'xor' is a weird wacky alternate operator expression thing. - template - XXH_FORCE_INLINE vec_t xorv(vec_t a, vec_t b) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::xorv"); - - if constexpr (N == 128) - { - return _mm_xor_si128(a, b); - } - - if constexpr (N == 256) - { - return _mm256_xor_si256(a, b); - } - - if constexpr (N == 512) - { - return _mm512_xor_si512(a, b); - } - - if constexpr (N == 64) - { - return a ^ b; - } - } - - - template - XXH_FORCE_INLINE vec_t mul(vec_t a, vec_t b) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::mul"); - - if constexpr (N == 128) - { - return _mm_mul_epu32(a, b); - } - - if constexpr (N == 256) - { - return _mm256_mul_epu32(a, b); - } - - if constexpr (N == 512) - { - return _mm512_mul_epu32(a, b); - } - - if constexpr (N == 64) - { - return a * b; - } - } - - - template - XXH_FORCE_INLINE vec_t add(vec_t a, vec_t b) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::add"); - - if constexpr (N == 128) - { - return _mm_add_epi64(a, b); - } - - if constexpr (N == 256) - { - return _mm256_add_epi64(a, b); - } - - if constexpr (N == 512) - { - return _mm512_add_epi64(a, b); - } - - if constexpr (N == 64) - { - return a + b; - } - } - - - template - XXH_FORCE_INLINE vec_t shuffle(vec_t a) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::shuffle"); - - if constexpr (N == 128) - { - return _mm_shuffle_epi32(a, _MM_SHUFFLE(S1, S2, S3, S4)); - } - - if constexpr (N == 256) - { - return _mm256_shuffle_epi32(a, _MM_SHUFFLE(S1, S2, S3, S4)); - } - - if constexpr (N == 512) - { - return _mm512_shuffle_epi32(a, _MM_SHUFFLE(S1, S2, S3, S4)); - } - - if constexpr (N == 64) - { - return a; - } - } - - - template - XXH_FORCE_INLINE vec_t set1(int64_t a) - { - -#if (defined(__ARM_NEON) && defined(__APPLE__)) - static_assert(!(N != 128 && N != 64), "Invalid argument passed to xxh::vec_ops::set1"); -#else - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::set1"); - if constexpr (N == 256) - { - return _mm256_set1_epi32(static_cast(a)); - } - - if constexpr (N == 512) - { - return _mm512_set1_epi32(static_cast(a)); - } -#endif - - if constexpr (N == 128) - { - return _mm_set1_epi32(static_cast(a)); - } - - if constexpr (N == 64) - { - return a; - } - } - - - template - XXH_FORCE_INLINE vec_t srli(vec_t n, int a) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::srli"); - - if constexpr (N == 128) - { - return _mm_srli_epi64(n, a); - } - - if constexpr (N == 256) - { - return _mm256_srli_epi64(n, a); - } - - if constexpr (N == 512) - { - return _mm512_srli_epi64(n, a); - } - - if constexpr (N == 64) - { - return n >> a; - } - } - - - template - XXH_FORCE_INLINE vec_t slli(vec_t n, int a) - { - static_assert(!(N != 128 && N != 256 && N != 64 && N != 512), "Invalid argument passed to xxh::vec_ops::slli"); - - if constexpr (N == 128) - { - return _mm_slli_epi64(n, a); - } - - if constexpr (N == 256) - { - return _mm256_slli_epi64(n, a); - } - - if constexpr (N == 512) - { - return _mm512_slli_epi64(n, a); - } - - if constexpr (N == 64) - { - return n << a; - } - } - } - - /* ************************************* - * Canonical represenation - ***************************************/ - - template - struct canonical_t - { - std::array digest{ 0 }; - - canonical_t(hash_t hash) - { - if constexpr (bit_mode < 128) - { - if (mem_ops::is_little_endian()) - { - hash = bit_ops::swap(hash); - } - - memcpy(digest.data(), &hash, sizeof(canonical_t)); - } - else - { - if (mem_ops::is_little_endian()) - { - hash.low64 = bit_ops::swap<64>(hash.low64); - hash.high64 = bit_ops::swap<64>(hash.high64); - } - - memcpy(digest.data(), &hash.high64, sizeof(hash.high64)); - memcpy(digest.data() + sizeof(hash.high64), &hash.low64, sizeof(hash.low64)); - } - } - - hash_t get_hash() const - { - if constexpr (bit_mode < 128) - { - return mem_ops::readBE(&digest); - } - else - { - return { mem_ops::readBE<64>(&digest[8]), mem_ops::readBE<64>(&digest) }; - } - } - }; - - using canonical32_t = canonical_t<32>; - using canonical64_t = canonical_t<64>; - using canonical128_t = canonical_t<128>; - - template - inline hash_t to_canonical(hash_t hash) - { - static_assert(!(bit_mode != 128 && bit_mode != 64 && bit_mode != 32), "Canonical form can only be obtained from 32, 64 and 128 bit hashes."); - canonical_t canon(hash); - hash_t res; - memcpy(&res, &canon, bit_mode / 8); - - return res; - } - - - /* ************************************* - * Algorithm Implementation - xxhash - ***************************************/ - - namespace detail - { - using namespace mem_ops; - using namespace bit_ops; - - - /* ************************************* - * Constants - ***************************************/ - - constexpr static std::array primes32 = { 2654435761U, 2246822519U, 3266489917U, 668265263U, 374761393U }; - constexpr static std::array primes64 = { 11400714785074694791ULL, 14029467366897019727ULL, 1609587929392839161ULL, 9650029242287828579ULL, 2870177450012600261ULL }; - - template - constexpr uint_t PRIME(uint64_t n) - { - if constexpr (N == 32) - { - return primes32[n - 1]; - } - else - { - return primes64[n - 1]; - } - } - - - /* ************************************* - * Functions - ***************************************/ - - template - XXH_FORCE_INLINE uint_t avalanche(uint_t hash) - { - if constexpr (N == 32) - { - hash ^= hash >> 15; - hash *= PRIME<32>(2); - hash ^= hash >> 13; - hash *= PRIME<32>(3); - hash ^= hash >> 16; - return hash; - } - else if constexpr (N == 64) - { - hash ^= hash >> 33; - hash *= PRIME<64>(2); - hash ^= hash >> 29; - hash *= PRIME<64>(3); - hash ^= hash >> 32; - return hash; - } - else return 0; - } - - template - XXH_FORCE_INLINE uint_t round(uint_t seed, uint_t input) - { - seed += input * PRIME(2); - - if constexpr (N == 32) - { - seed = rotl(seed, 13); - } - else - { - seed = rotl(seed, 31); - } - - seed *= PRIME(1); - return seed; - } - - XXH_FORCE_INLINE uint64_t mergeRound64(hash64_t acc, uint64_t val) - { - val = round<64>(0, val); - acc ^= val; - acc = acc * PRIME<64>(1) + PRIME<64>(4); - return acc; - } - - XXH_FORCE_INLINE void endian_align_sub_mergeround(hash64_t& hash_ret, uint64_t v1, uint64_t v2, uint64_t v3, uint64_t v4) - { - hash_ret = mergeRound64(hash_ret, v1); - hash_ret = mergeRound64(hash_ret, v2); - hash_ret = mergeRound64(hash_ret, v3); - hash_ret = mergeRound64(hash_ret, v4); - } - - template - static inline hash_t endian_align_sub_ending(hash_t hash_ret, const uint8_t* p, const uint8_t* bEnd) - { - if constexpr (N == 32) - { - while ((p + 4) <= bEnd) - { - hash_ret += readLE<32>(p) * PRIME<32>(3); - hash_ret = rotl<32>(hash_ret, 17) * PRIME<32>(4); - p += 4; - } - - while (p < bEnd) - { - hash_ret += (*p) * PRIME<32>(5); - hash_ret = rotl<32>(hash_ret, 11) * PRIME<32>(1); - p++; - } - - return avalanche<32>(hash_ret); - } - else - { - while (p + 8 <= bEnd) - { - const uint64_t k1 = round<64>(0, readLE<64>(p)); - - hash_ret ^= k1; - hash_ret = rotl<64>(hash_ret, 27) * PRIME<64>(1) + PRIME<64>(4); - p += 8; - } - - if (p + 4 <= bEnd) - { - hash_ret ^= static_cast(readLE<32>(p))* PRIME<64>(1); - hash_ret = rotl<64>(hash_ret, 23) * PRIME<64>(2) + PRIME<64>(3); - p += 4; - } - - while (p < bEnd) - { - hash_ret ^= (*p) * PRIME<64>(5); - hash_ret = rotl<64>(hash_ret, 11) * PRIME<64>(1); - p++; - } - - return avalanche<64>(hash_ret); - } - } - - template - static inline hash_t endian_align(const void* input, size_t len, uint_t seed) - { - static_assert(!(N != 32 && N != 64), "You can only call endian_align in 32 or 64 bit mode."); - - const uint8_t* p = static_cast(input); - const uint8_t* bEnd = p + len; - hash_t hash_ret; - - if (len >= (N / 2)) - { - const uint8_t* const limit = bEnd - (N / 2); - uint_t v1 = seed + PRIME(1) + PRIME(2); - uint_t v2 = seed + PRIME(2); - uint_t v3 = seed + 0; - uint_t v4 = seed - PRIME(1); - - do - { - v1 = round(v1, readLE(p)); - p += (N / 8); - v2 = round(v2, readLE(p)); - p += (N / 8); - v3 = round(v3, readLE(p)); - p += (N / 8); - v4 = round(v4, readLE(p)); - p += (N / 8); - } - while (p <= limit); - - hash_ret = rotl(v1, 1) + rotl(v2, 7) + rotl(v3, 12) + rotl(v4, 18); - - if constexpr (N == 64) - { - endian_align_sub_mergeround(hash_ret, v1, v2, v3, v4); - } - } - else - { - hash_ret = seed + PRIME(5); - } - - hash_ret += static_cast>(len); - - return endian_align_sub_ending(hash_ret, p, bEnd); - } - } - - - /* ************************************* - * Algorithm Implementation - xxhash3 - ***************************************/ - - namespace detail3 - { - using namespace vec_ops; - using namespace detail; - using namespace mem_ops; - using namespace bit_ops; - - - /* ************************************* - * Enums - ***************************************/ - - enum class vec_mode : uint8_t { scalar = 0, sse2 = 1, avx2 = 2, avx512 = 3 }; - - - /* ************************************* - * Constants - ***************************************/ - - constexpr uint64_t secret_default_size = 192; - constexpr uint64_t secret_size_min = 136; - constexpr uint64_t secret_consume_rate = 8; - constexpr uint64_t stripe_len = 64; - constexpr uint64_t acc_nb = 8; - constexpr uint64_t prefetch_distance = 384; - constexpr uint64_t secret_lastacc_start = 7; - constexpr uint64_t secret_mergeaccs_start = 11; - constexpr uint64_t midsize_max = 240; - constexpr uint64_t midsize_startoffset = 3; - constexpr uint64_t midsize_lastoffset = 17; - - constexpr vec_mode vector_mode = static_cast(intrin::vector_mode); - constexpr uint64_t acc_align = intrin::acc_align; - constexpr std::array vector_bit_width { 64, 128, 256, 512 }; - - - /* ************************************* - * Defaults - ***************************************/ - - alignas(64) constexpr uint8_t default_secret[secret_default_size] = { - 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, - 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, - 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, - 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, - 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, - 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, - 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, - 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, - 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, - 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, - 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, - 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, - }; - - constexpr std::array init_acc = { PRIME<32>(3), PRIME<64>(1), PRIME<64>(2), PRIME<64>(3), PRIME<64>(4), PRIME<32>(2), PRIME<64>(5), PRIME<32>(1) }; - - - /* ************************************* - * Functions - ***************************************/ - - XXH_FORCE_INLINE hash_t<64> avalanche(hash_t<64> h64) - { - constexpr uint64_t avalanche_mul_prime = 0x165667919E3779F9ULL; - - h64 ^= h64 >> 37; - h64 *= avalanche_mul_prime; - h64 ^= h64 >> 32; - return h64; - } - - XXH_FORCE_INLINE hash_t<64> rrmxmx(hash_t<64> h64, uint64_t len) - { - h64 ^= rotl<64>(h64, 49) ^ rotl<64>(h64, 24); - h64 *= 0x9FB21C651E98DF25ULL; - h64 ^= (h64 >> 35) + len; - h64 *= 0x9FB21C651E98DF25ULL; - h64 ^= (h64 >> 28); - return h64; - } - - XXH_FORCE_INLINE void combine_16(void* dest, hash128_t h128) - { - writeLE<64>(dest, readLE<64>(dest) ^ h128.low64); - writeLE<64>((uint8_t*)dest + 8, readLE<64>((uint8_t*)dest + 8) ^ h128.high64); - } - - XXH_FORCE_INLINE void accumulate_512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT input, const void* XXH_RESTRICT secret) - { - constexpr uint64_t bits = vector_bit_width[static_cast(vector_mode)]; - - using vec_t = vec_t; - - alignas(sizeof(vec_t)) vec_t* const xacc = static_cast(acc); - const vec_t* const xinput = static_cast(input); - const vec_t* const xsecret = static_cast(secret); - - for (size_t i = 0; i < stripe_len / sizeof(vec_t); i++) - { - vec_t const data_vec = loadu(xinput + i); - vec_t const key_vec = loadu(xsecret + i); - vec_t const data_key = xorv(data_vec, key_vec); - vec_t product = set1(0); - - if constexpr (vector_mode == vec_mode::scalar) - { - product = mul32to64(srli(slli(data_key, 32),32), srli(data_key, 32)); - xacc[i ^ 1] = add(xacc[i ^ 1], data_vec); - xacc[i] = add(xacc[i], product); - } - else - { - vec_t const data_key_lo = shuffle(data_key); - product = mul(data_key, data_key_lo); - - vec_t const data_swap = shuffle(data_vec); - vec_t const sum = add(xacc[i], data_swap); - xacc[i] = add(sum, product); - } - } - } - - XXH_FORCE_INLINE void scramble_acc(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) - { - constexpr uint64_t bits = vector_bit_width[static_cast(vector_mode)];; - - using vec_t = vec_t; - - alignas(sizeof(vec_t)) vec_t* const xacc = (vec_t*)acc; - const vec_t* const xsecret = (const vec_t*)secret; - - for (size_t i = 0; i < stripe_len / sizeof(vec_t); i++) - { - vec_t const acc_vec = xacc[i]; - vec_t const shifted = srli(acc_vec, 47); - vec_t const data_vec = xorv(acc_vec, shifted); - vec_t const key_vec = loadu(xsecret + i); - vec_t const data_key = xorv(data_vec, key_vec); - - if constexpr (vector_mode == vec_mode::scalar) - { - xacc[i] = mul(data_key, set1(PRIME<32>(1))); - } - else - { - vec_t const prime32 = set1(PRIME<32>(1)); - vec_t const data_key_hi = shuffle(data_key); - vec_t const prod_lo = mul(data_key, prime32); - vec_t const prod_hi = mul(data_key_hi, prime32); - - xacc[i] = add(prod_lo, vec_ops::slli(prod_hi, 32)); - } - } - } - - XXH_FORCE_INLINE void accumulate(uint64_t* XXH_RESTRICT acc, const uint8_t* XXH_RESTRICT input, const uint8_t* XXH_RESTRICT secret, size_t nbStripes) - { - for (size_t n = 0; n < nbStripes; n++) - { - const uint8_t* const in = input + n * stripe_len; - - intrin::prefetch(in + prefetch_distance); - accumulate_512(acc, in, secret + n * secret_consume_rate); - } - } - - XXH_FORCE_INLINE void hash_long_internal_loop(uint64_t* XXH_RESTRICT acc, const uint8_t* XXH_RESTRICT input, size_t len, const uint8_t* XXH_RESTRICT secret, size_t secretSize) - { - size_t const nb_rounds = (secretSize - stripe_len) / secret_consume_rate; - size_t const block_len = stripe_len * nb_rounds; - size_t const nb_blocks = (len-1) / block_len; - - for (size_t n = 0; n < nb_blocks; n++) - { - accumulate(acc, input + n * block_len, secret, nb_rounds); - scramble_acc(acc, secret + secretSize - stripe_len); - } - - /* last partial block */ - size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / stripe_len; - - accumulate(acc, input + nb_blocks * block_len, secret, nbStripes); - - /* last stripe */ - const uint8_t* const p = input + len - stripe_len; - - accumulate_512(acc, p, secret + secretSize - stripe_len - secret_lastacc_start); - } - - XXH_FORCE_INLINE uint64_t mix_2_accs(const uint64_t* XXH_RESTRICT acc, const uint8_t* XXH_RESTRICT secret) - { - return mul128fold64(acc[0] ^ readLE<64>(secret), acc[1] ^ readLE<64>(secret + 8)); - } - - XXH_FORCE_INLINE uint64_t merge_accs(const uint64_t* XXH_RESTRICT acc, const uint8_t* XXH_RESTRICT secret, uint64_t start) - { - uint64_t result64 = start; - - result64 += mix_2_accs(acc + 0, secret + 0); - result64 += mix_2_accs(acc + 2, secret + 16); - result64 += mix_2_accs(acc + 4, secret + 32); - result64 += mix_2_accs(acc + 6, secret + 48); - - return avalanche(result64); - } - - XXH_FORCE_INLINE void init_custom_secret(uint8_t* customSecret, uint64_t seed) - { - for (uint64_t i = 0; i < secret_default_size / 16; i++) - { - writeLE<64>(customSecret + i * 16, readLE<64>(default_secret + i * 16) + seed); - writeLE<64>(customSecret + i * 16 + 8, readLE<64>(default_secret + i * 16 + 8) - seed); - } - } - - template - XXH_FORCE_INLINE hash_t len_1to3(const uint8_t* input, size_t len, const uint8_t* secret, uint64_t seed) - { - if constexpr (N == 64) - { - uint8_t const c1 = input[0]; - uint8_t const c2 = input[len >> 1]; - uint8_t const c3 = input[len - 1]; - uint32_t const combined = ((uint32_t)c1 << 16) | (((uint32_t)c2) << 24) | (((uint32_t)c3) << 0) | (((uint32_t)len) << 8); - uint64_t const bitflip = (readLE<32>(secret) ^ readLE<32>(secret + 4)) + seed; - uint64_t const keyed = (uint64_t)combined ^ bitflip; - return detail::avalanche<64>(keyed); - } - else - { - uint8_t const c1 = input[0]; - uint8_t const c2 = input[len >> 1]; - uint8_t const c3 = input[len - 1]; - uint32_t const combinedl = ((uint32_t)c1 << 16) + (((uint32_t)c2) << 24) + (((uint32_t)c3) << 0) + (((uint32_t)len) << 8); - uint32_t const combinedh = rotl<32>(swap<32>(combinedl), 13); - uint64_t const bitflipl = (readLE<32>(secret) ^ readLE<32>(secret + 4)) + seed; - uint64_t const bitfliph = (readLE<32>(secret + 8) ^ readLE<32>(secret + 12)) - seed; - uint64_t const keyed_lo = (uint64_t)combinedl ^ bitflipl; - uint64_t const keyed_hi = (uint64_t)combinedh ^ bitfliph; - hash128_t const h128 = { detail::avalanche<64>(keyed_lo), detail::avalanche<64>(keyed_hi)}; - - return h128; - } - } - - template - XXH_FORCE_INLINE hash_t len_4to8(const uint8_t* input, size_t len, const uint8_t* secret, uint64_t seed) - { - constexpr uint64_t mix_constant = 0x9FB21C651E98DF25ULL; - - seed ^= (uint64_t)swap<32>((uint32_t)seed) << 32; - - if constexpr (N == 64) - { - uint32_t const input1 = readLE<32>(input); - uint32_t const input2 = readLE<32>(input + len - 4); - uint64_t const bitflip = (readLE<64>(secret + 8) ^ readLE<64>(secret + 16)) - seed; - uint64_t const input64 = input2 + ((uint64_t)input1 << 32); - uint64_t keyed = input64 ^ bitflip; - - return rrmxmx(keyed, len); - } - else - { - uint32_t const input_lo = readLE<32>(input); - uint32_t const input_hi = readLE<32>(input + len - 4); - uint64_t const input_64 = input_lo + ((uint64_t)input_hi << 32); - uint64_t const bitflip = (readLE<64>(secret + 16) ^ readLE<64>(secret + 24)) + seed; - uint64_t const keyed = input_64 ^ bitflip; - uint128_t m128 = mul64to128(keyed, PRIME<64>(1) + (len << 2)); - - m128.high64 += (m128.low64 << 1); - m128.low64 ^= (m128.high64 >> 3); - m128.low64 ^= (m128.low64 >> 35); - m128.low64 *= mix_constant; - m128.low64 ^= (m128.low64 >> 28); - m128.high64 = avalanche(m128.high64); - - return m128; - } - } - - template - XXH_FORCE_INLINE hash_t len_9to16(const uint8_t* input, size_t len, const uint8_t* secret, uint64_t seed) - { - if constexpr (N == 64) - { - uint64_t const bitflip1 = (readLE<64>(secret + 24) ^ readLE<64>(secret + 32)) + seed; - uint64_t const bitflip2 = (readLE<64>(secret + 40) ^ readLE<64>(secret + 48)) - seed; - uint64_t const input_lo = readLE<64>(input) ^ bitflip1; - uint64_t const input_hi = readLE<64>(input + len - 8) ^ bitflip2; - uint64_t const acc = len + swap<64>(input_lo) + input_hi + mul128fold64(input_lo, input_hi); - - return avalanche(acc); - } - else - { - uint64_t const bitflipl = (readLE<64>(secret + 32) ^ readLE<64>(secret + 40)) - seed; - uint64_t const bitfliph = (readLE<64>(secret + 48) ^ readLE<64>(secret + 56)) + seed; - uint64_t const input_lo = readLE<64>(input); - uint64_t input_hi = readLE<64>(input + len - 8); - uint128_t m128 = mul64to128(input_lo ^ input_hi ^ bitflipl, PRIME<64>(1)); - - m128.low64 += (uint64_t)(len - 1) << 54; - input_hi ^= bitfliph; - - if constexpr (sizeof(void*) < sizeof(uint64_t)) // 32-bit version - { - m128.high64 += (input_hi & 0xFFFFFFFF00000000) + mul32to64((uint32_t)input_hi, PRIME<32>(2)); - } - else - { - m128.high64 += input_hi + mul32to64((uint32_t)input_hi, PRIME<32>(2) - 1); - } - - m128.low64 ^= swap<64>(m128.high64); - - hash128_t h128 = mul64to128(m128.low64, PRIME<64>(2)); - - h128.high64 += m128.high64 * PRIME<64>(2); - h128.low64 = avalanche(h128.low64); - h128.high64 = avalanche(h128.high64); - - return h128; - } - } - - template - XXH_FORCE_INLINE hash_t len_0to16(const uint8_t* input, size_t len, const uint8_t* secret, uint64_t seed) - { - if (XXH_likely(len > 8)) - { - return len_9to16(input, len, secret, seed); - } - else if (XXH_likely(len >= 4)) - { - return len_4to8(input, len, secret, seed); - } - else if (len) - { - return len_1to3(input, len, secret, seed); - } - else - { - if constexpr (N == 64) - { - return detail::avalanche<64>((seed) ^ (readLE<64>(secret + 56) ^ readLE<64>(secret + 64))); - } - else - { - uint64_t const bitflipl = readLE<64>(secret + 64) ^ readLE<64>(secret + 72); - uint64_t const bitfliph = readLE<64>(secret + 80) ^ readLE<64>(secret + 88); - - return hash128_t(detail::avalanche<64>(( seed) ^ bitflipl), detail::avalanche<64>(( seed) ^ bitfliph)); - } - } - } - - template - XXH_FORCE_INLINE hash_t hash_long_internal(const uint8_t* XXH_RESTRICT input, size_t len, const uint8_t* XXH_RESTRICT secret = default_secret, size_t secretSize = sizeof(default_secret)) - { - alignas(acc_align) std::array acc = init_acc; - - if constexpr (N == 64) - { - hash_long_internal_loop(acc.data(), input, len, secret, secretSize); - - /* converge into final hash */ - return merge_accs(acc.data(), secret + secret_mergeaccs_start, (uint64_t)len * PRIME<64>(1)); - } - else - { - hash_long_internal_loop(acc.data(), input, len, secret, secretSize); - - /* converge into final hash */ - uint64_t const low64 = merge_accs(acc.data(), secret + secret_mergeaccs_start, (uint64_t)len * PRIME<64>(1)); - uint64_t const high64 = merge_accs(acc.data(), secret + secretSize - sizeof(acc) - secret_mergeaccs_start, ~((uint64_t)len * PRIME<64>(2))); - - return hash128_t(low64, high64); - } - } - - XXH_FORCE_INLINE uint64_t mix_16b(const uint8_t* XXH_RESTRICT input, const uint8_t* XXH_RESTRICT secret, uint64_t seed) - { - uint64_t const input_lo = readLE<64>(input); - uint64_t const input_hi = readLE<64>(input + 8); - - return mul128fold64(input_lo ^ (readLE<64>(secret) + seed), input_hi ^ (readLE<64>(secret + 8) - seed)); - } - - XXH_FORCE_INLINE uint128_t mix_32b(uint128_t acc, const uint8_t* input1, const uint8_t* input2, const uint8_t* secret, uint64_t seed) - { - acc.low64 += mix_16b(input1, secret + 0, seed); - acc.low64 ^= readLE<64>(input2) + readLE<64>(input2 + 8); - acc.high64 += mix_16b(input2, secret + 16, seed); - acc.high64 ^= readLE<64>(input1) + readLE<64>(input1 + 8); - - return acc; - } - - template - XXH_FORCE_INLINE hash_t len_17to128(const uint8_t* XXH_RESTRICT input, size_t len, const uint8_t* XXH_RESTRICT secret, uint64_t seed) - { - if constexpr (N == 64) - { - hash64_t acc = len * PRIME<64>(1); - - if (len > 32) - { - if (len > 64) - { - if (len > 96) - { - acc += mix_16b(input + 48, secret + 96, seed); - acc += mix_16b(input + len - 64, secret + 112, seed); - } - - acc += mix_16b(input + 32, secret + 64, seed); - acc += mix_16b(input + len - 48, secret + 80, seed); - } - - acc += mix_16b(input + 16, secret + 32, seed); - acc += mix_16b(input + len - 32, secret + 48, seed); - } - - acc += mix_16b(input + 0, secret + 0, seed); - acc += mix_16b(input + len - 16, secret + 16, seed); - - return avalanche(acc); - } - else - { - hash128_t acc = { len * PRIME<64>(1), 0 }; - - if (len > 32) - { - if (len > 64) - { - if (len > 96) - { - acc = mix_32b(acc, input + 48, input + len - 64, secret + 96, seed); - } - - acc = mix_32b(acc, input + 32, input + len - 48, secret + 64, seed); - } - - acc = mix_32b(acc, input + 16, input + len - 32, secret + 32, seed); - } - - acc = mix_32b(acc, input, input + len - 16, secret, seed); - - uint64_t const low64 = acc.low64 + acc.high64; - uint64_t const high64 = (acc.low64 * PRIME<64>(1)) + (acc.high64 * PRIME<64>(4)) + ((len - seed) * PRIME<64>(2)); - - return { avalanche(low64), (uint64_t)0 - avalanche(high64) }; - } - } - - template - XXH_NO_INLINE hash_t len_129to240(const uint8_t* XXH_RESTRICT input, size_t len, const uint8_t* XXH_RESTRICT secret, uint64_t seed) - { - if constexpr (N == 64) - { - uint64_t acc = len * PRIME<64>(1); - size_t const nbRounds = len / 16; - - for (size_t i = 0; i < 8; i++) - { - acc += mix_16b(input + (i * 16), secret + (i * 16), seed); - } - - acc = avalanche(acc); - - for (size_t i = 8; i < nbRounds; i++) - { - acc += mix_16b(input + (i * 16), secret + ((i - 8) * 16) + midsize_startoffset, seed); - } - - /* last bytes */ - acc += mix_16b(input + len - 16, secret + secret_size_min - midsize_lastoffset, seed); - - return avalanche(acc); - } - else - { - hash128_t acc; - uint64_t const nbRounds = len / 32; - - acc.low64 = len * PRIME<64>(1); - acc.high64 = 0; - - for (size_t i = 0; i < 4; i++) - { - acc = mix_32b(acc, input + (i * 32), input + (i * 32) + 16, secret + (i * 32), seed); - } - - acc.low64 = avalanche(acc.low64); - acc.high64 = avalanche(acc.high64); - - for (size_t i = 4; i < nbRounds; i++) - { - acc = mix_32b(acc, input + (i * 32), input + (i * 32) + 16, secret + midsize_startoffset + ((i - 4) * 32), seed); - } - - /* last bytes */ - acc = mix_32b(acc, input + len - 16, input + len - 32, secret + secret_size_min - midsize_lastoffset - 16, 0ULL - seed); - - uint64_t const low64 = acc.low64 + acc.high64; - uint64_t const high64 = (acc.low64 * PRIME<64>(1)) + (acc.high64 * PRIME<64>(4)) + ((len - seed) * PRIME<64>(2)); - - return { avalanche(low64), (uint64_t)0 - avalanche(high64) }; - } - - } - - template - XXH_NO_INLINE hash_t xxhash3_impl(const void* XXH_RESTRICT input, size_t len, hash64_t seed, const void* XXH_RESTRICT secret = default_secret, size_t secretSize = secret_default_size) - { - - alignas(64) uint8_t custom_secret[secret_default_size]; - - const void* short_secret = secret; - - if (seed != 0) - { - init_custom_secret(custom_secret, seed); - short_secret = default_secret; - } - - if (len <= 16) - { - return len_0to16(static_cast(input), len, static_cast(short_secret), seed); - } - else if (len <= 128) - { - return len_17to128(static_cast(input), len, static_cast(short_secret), seed); - } - else if (len <= midsize_max) - { - return len_129to240(static_cast(input), len, static_cast(short_secret), seed); - } - else - { - return hash_long_internal(static_cast(input), len, static_cast(((seed == 0) ? secret : ((secret == default_secret) ? custom_secret : secret))), ((seed == 0) ? secretSize : ((secret == default_secret) ? secret_default_size : secretSize))); - } - } - - XXH_NO_INLINE void generate_secret(void* secret_buffer, size_t secret_size, const void* custom_seed, size_t seed_size) - { - if (seed_size == 0) - { - custom_seed = default_secret; - seed_size = secret_default_size; - } - - size_t pos = 0; - while (pos < secret_size) - { - size_t const copy_len = std::min(secret_size - pos, seed_size); - memcpy((uint8_t*)secret_buffer + pos, custom_seed, copy_len); - pos += copy_len; - } - - size_t const nbseg16 = secret_size / 16; - canonical128_t scrambled(xxhash3_impl<128>(custom_seed, seed_size, 0)); - for (size_t n = 0; n < nbseg16; n++) - { - hash128_t const h128 = xxhash3_impl<128>(&scrambled, sizeof(scrambled), n); - combine_16((uint8_t*)secret_buffer + n * 16, h128); - } - - combine_16((uint8_t*)secret_buffer + secret_size - 16, scrambled.get_hash()); - } - } - - - /* ************************************* - * Public Access Point - xxhash - ***************************************/ - - template - inline hash_t xxhash(const void* input, size_t len, uint_t seed = 0) - { - static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); - return detail::endian_align(input, len, seed); - } - - template - inline hash_t xxhash(const std::basic_string& input, uint_t seed = 0) - { - static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); - return detail::endian_align(static_cast(input.data()), input.length() * sizeof(T), seed); - } - - template - inline hash_t xxhash(ContiguousIterator begin, ContiguousIterator end, uint_t seed = 0) - { - static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); - using T = typename std::decay_t; - return detail::endian_align(static_cast(&*begin), (end - begin) * sizeof(T), seed); - } - - template - inline hash_t xxhash(const std::vector& input, uint_t seed = 0) - { - static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); - return detail::endian_align(static_cast(input.data()), input.size() * sizeof(T), seed); - } - - template - inline hash_t xxhash(const std::array& input, uint_t seed = 0) - { - static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); - return detail::endian_align(static_cast(input.data()), AN * sizeof(T), seed); - } - - template - inline hash_t xxhash(const std::initializer_list& input, uint_t seed = 0) - { - static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash can only be used in 32 and 64 bit modes."); - return detail::endian_align(static_cast(input.begin()), input.size() * sizeof(T), seed); - } - - - /* ************************************* - * Public Access Point - xxhash3 - ***************************************/ - - template - inline hash_t xxhash3(const void* input, size_t len, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(input, len, seed); - } - - template - inline hash_t xxhash3(const void* input, size_t len, const void* secret, size_t secretSize, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(input, len, seed, secret, secretSize); - } - - template - inline hash_t xxhash3(const std::basic_string& input, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.data()), input.length() * sizeof(T), seed); - } - - template - inline hash_t xxhash3(const std::basic_string& input, const void* secret, size_t secretSize, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.data()), input.length() * sizeof(T), seed, secret, secretSize); - } - - template - inline hash_t xxhash3(ContiguousIterator begin, ContiguousIterator end, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - using T = typename std::decay_t; - return detail3::xxhash3_impl(static_cast(&*begin), (end - begin) * sizeof(T), seed); - } - - template - inline hash_t xxhash3(ContiguousIterator begin, ContiguousIterator end, const void* secret, size_t secretSize, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - using T = typename std::decay_t; - return detail3::xxhash3_impl(static_cast(&*begin), (end - begin) * sizeof(T), seed, secret, secretSize); - } - - template - inline hash_t xxhash3(const std::vector& input, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.data()), input.size() * sizeof(T), seed); - } - - template - inline hash_t xxhash3(const std::vector& input, const void* secret, size_t secretSize, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.data()), input.size() * sizeof(T), seed, secret, secretSize); - } - - template - inline hash_t xxhash3(const std::array& input, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.data()), AN * sizeof(T), seed); - } - - template - inline hash_t xxhash3(const std::array& input, const void* secret, size_t secretSize, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.data()), AN * sizeof(T), seed, secret, secretSize); - } - - template - inline hash_t xxhash3(const std::initializer_list& input, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.begin()), input.size() * sizeof(T), seed); - } - - template - inline hash_t xxhash3(const std::initializer_list& input, const void* secret, size_t secretSize, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 can only be used in 64 and 128 bit modes."); - return detail3::xxhash3_impl(static_cast(input.begin()), input.size() * sizeof(T), seed, secret, secretSize); - } - - - /* ************************************* - * Secret Generation Functions - ***************************************/ - - inline void generate_secret(void* secret_buffer, size_t secret_size, const void* custom_seed = detail3::default_secret, size_t seed_length = 0) - { - detail3::generate_secret(secret_buffer, secret_size, custom_seed, seed_length); - } - - template - inline void generate_secret(void* secret_buffer, size_t secret_size, const std::array& custom_seed) - { - detail3::generate_secret(secret_buffer, secret_size, static_cast(custom_seed.data()), AN * sizeof(T)); - } - - template - inline void generate_secret(void* secret_buffer, size_t secret_size, const std::initializer_list& custom_seed) - { - detail3::generate_secret(secret_buffer, secret_size, static_cast(custom_seed.begin()), custom_seed.size() * sizeof(T)); - } - - template - inline void generate_secret(void* secret_buffer, size_t secret_size, const std::vector& custom_seed) - { - detail3::generate_secret(secret_buffer, secret_size, static_cast(custom_seed.data()), custom_seed.size() * sizeof(T)); - } - - template - inline void generate_secret(void* secret_buffer, size_t secret_size, const std::basic_string& custom_seed) - { - detail3::generate_secret(secret_buffer, secret_size, static_cast(custom_seed.data()), custom_seed.length() * sizeof(T)); - } - - template - inline void generate_secret(void* secret_buffer, size_t secret_size, ContiguousIterator begin, ContiguousIterator end) - { - using T = typename std::decay_t; - detail3::generate_secret(secret_buffer, secret_size, static_cast(&*begin), (end - begin) * sizeof(T)); - } - - inline void generate_secret_from_seed(void* secret_buffer, uint64_t seed = 0) - { - alignas(64) uint8_t custom_secret[detail3::secret_default_size]; - detail3::init_custom_secret(custom_secret, seed); - memcpy(secret_buffer, custom_secret, detail3::secret_default_size); - } - - - /* ************************************* - * Hash streaming - xxhash - ***************************************/ - - template - class hash_state_t - { - uint64_t total_len = 0; - uint_t v1 = 0, v2 = 0, v3 = 0, v4 = 0; - std::array, 4> mem = {0, 0, 0, 0}; - uint32_t memsize = 0; - - inline void update_impl(const void* input, size_t length) - { - const uint8_t* p = reinterpret_cast(input); - const uint8_t* const bEnd = p + length; - - total_len += length; - - if (memsize + length < (bit_mode / 2)) - { /* fill in tmp buffer */ - memcpy(reinterpret_cast(mem.data()) + memsize, input, length); - memsize += static_cast(length); - return; - } - - if (memsize > 0) - { /* some data left from previous update */ - memcpy(reinterpret_cast(mem.data()) + memsize, input, (bit_mode / 2) - memsize); - - const uint_t* ptr = mem.data(); - - v1 = detail::round(v1, mem_ops::readLE(ptr)); - ptr++; - v2 = detail::round(v2, mem_ops::readLE(ptr)); - ptr++; - v3 = detail::round(v3, mem_ops::readLE(ptr)); - ptr++; - v4 = detail::round(v4, mem_ops::readLE(ptr)); - - p += (bit_mode / 2) - memsize; - memsize = 0; - } - - while (p + (bit_mode / 2) <= bEnd) - { - v1 = detail::round(v1, mem_ops::readLE(p)); - p += (bit_mode / 8); - v2 = detail::round(v2, mem_ops::readLE(p)); - p += (bit_mode / 8); - v3 = detail::round(v3, mem_ops::readLE(p)); - p += (bit_mode / 8); - v4 = detail::round(v4, mem_ops::readLE(p)); - p += (bit_mode / 8); - } - - if (p < bEnd) - { - memcpy(mem.data(), p, static_cast(bEnd - p)); - memsize = static_cast(bEnd - p); - } - } - - inline hash_t digest_impl() const - { - const uint8_t* p = reinterpret_cast(mem.data()); - const uint8_t* const bEnd = reinterpret_cast(mem.data()) + memsize; - hash_t hash_ret; - - if (total_len >= (bit_mode / 2)) - { - hash_ret = bit_ops::rotl(v1, 1) + bit_ops::rotl(v2, 7) + bit_ops::rotl(v3, 12) + bit_ops::rotl(v4, 18); - - if constexpr (bit_mode == 64) - { - detail::endian_align_sub_mergeround(hash_ret, v1, v2, v3, v4); - } - } - else - { - hash_ret = v3 + detail::PRIME(5); - } - - hash_ret += static_cast>(total_len); - - return detail::endian_align_sub_ending(hash_ret, p, bEnd); - } - - public: - - hash_state_t(uint_t seed = 0) - { - static_assert(!(bit_mode != 32 && bit_mode != 64), "xxhash streaming can only be used in 32 and 64 bit modes."); - v1 = seed + detail::PRIME(1) + detail::PRIME(2); - v2 = seed + detail::PRIME(2); - v3 = seed + 0; - v4 = seed - detail::PRIME(1); - }; - - void reset(uint_t seed = 0) - { - memset(this, 0, sizeof(hash_state_t)); - v1 = seed + detail::PRIME(1) + detail::PRIME(2); - v2 = seed + detail::PRIME(2); - v3 = seed + 0; - v4 = seed - detail::PRIME(1); - } - - void update(const void* input, size_t length) - { - return update_impl(input, length); - } - - template - void update(const std::basic_string& input) - { - return update_impl(static_cast(input.data()), input.length() * sizeof(T)); - } - - template - void update(ContiguousIterator begin, ContiguousIterator end) - { - using T = typename std::decay_t; - return update_impl(static_cast(&*begin), (end - begin) * sizeof(T)); - } - - template - void update(const std::vector& input) - { - return update_impl(static_cast(input.data()), input.size() * sizeof(T)); - } - - template - void update(const std::array& input) - { - return update_impl(static_cast(input.data()), AN * sizeof(T)); - } - - template - void update(const std::initializer_list& input) - { - return update_impl(static_cast(input.begin()), input.size() * sizeof(T)); - } - - hash_t digest() const - { - return digest_impl(); - } - }; - - using hash_state32_t = hash_state_t<32>; - using hash_state64_t = hash_state_t<64>; - - - /* ************************************* - * Hash streaming - xxhash3 - ***************************************/ - - template - class alignas(64) hash3_state_t - { - constexpr static int internal_buffer_size = 256; - constexpr static int internal_buffer_stripes = (internal_buffer_size / detail3::stripe_len); - - alignas(64) uint64_t acc[8]; - alignas(64) uint8_t customSecret[detail3::secret_default_size]; /* used to store a custom secret generated from the seed. Makes state larger. Design might change */ - alignas(64) uint8_t buffer[internal_buffer_size]; - uint32_t bufferedSize = 0; - uint32_t nbStripesPerBlock = 0; - uint32_t nbStripesSoFar = 0; - uint32_t secretLimit = 0; - uint32_t reserved32 = 0; - uint32_t reserved32_2 = 0; - uint64_t totalLen = 0; - uint64_t seed = 0; - bool useSeed = false; - uint64_t reserved64 = 0; - const uint8_t* secret = nullptr; /* note : there is some padding after, due to alignment on 64 bytes */ - - - void consume_stripes(uint64_t* acc, uint32_t& nbStripesSoFar, size_t totalStripes, const uint8_t* input) - { - if (nbStripesPerBlock - nbStripesSoFar <= totalStripes) /* need a scrambling operation */ - { - size_t const nbStripes = nbStripesPerBlock - nbStripesSoFar; - - detail3::accumulate(acc, input, secret + (nbStripesSoFar * detail3::secret_consume_rate), nbStripes); - detail3::scramble_acc(acc, secret + secretLimit); - detail3::accumulate(acc, input + nbStripes * detail3::stripe_len, secret, totalStripes - nbStripes); - nbStripesSoFar = (uint32_t)(totalStripes - nbStripes); - } - else - { - detail3::accumulate(acc, input, secret + (nbStripesSoFar * detail3::secret_consume_rate), totalStripes); - nbStripesSoFar += (uint32_t)totalStripes; - } - } - - void update_impl(const void* input_, size_t len) - { - const uint8_t* input = static_cast(input_); - const uint8_t* const bEnd = input + len; - - totalLen += len; - - if (bufferedSize + len <= internal_buffer_size) - { /* fill in tmp buffer */ - memcpy(buffer + bufferedSize, input, len); - bufferedSize += (uint32_t)len; - return; - } - /* input now > XXH3_INTERNALBUFFER_SIZE */ - - if (bufferedSize > 0) - { /* some input within internal buffer: fill then consume it */ - size_t const loadSize = internal_buffer_size - bufferedSize; - - memcpy(buffer + bufferedSize, input, loadSize); - input += loadSize; - consume_stripes(acc, nbStripesSoFar, internal_buffer_stripes, buffer); - bufferedSize = 0; - } - - /* consume input by full buffer quantities */ - if (input + internal_buffer_size <= bEnd) - { - const uint8_t* const limit = bEnd - internal_buffer_size; - - do - { - consume_stripes(acc, nbStripesSoFar, internal_buffer_stripes, input); - input += internal_buffer_size; - } - while (input < limit); - - memcpy(buffer + sizeof(buffer) - detail3::stripe_len, input - detail3::stripe_len, detail3::stripe_len); - } - - if (input < bEnd) - { /* some remaining input input : buffer it */ - memcpy(buffer, input, (size_t)(bEnd - input)); - bufferedSize = (uint32_t)(bEnd - input); - } - } - - void digest_long(uint64_t* acc_) - { - memcpy(acc_, acc, sizeof(acc)); /* digest locally, state remains unaltered, and can continue ingesting more input afterwards */ - - if (bufferedSize >= detail3::stripe_len) - { - size_t const totalNbStripes = (bufferedSize - 1) / detail3::stripe_len; - uint32_t nbStripesSoFar = this->nbStripesSoFar; - - consume_stripes(acc_, nbStripesSoFar, totalNbStripes, buffer); - - /* one last partial stripe */ - detail3::accumulate_512(acc_, buffer + bufferedSize - detail3::stripe_len, secret + secretLimit - detail3::secret_lastacc_start); - } - else - { /* bufferedSize < STRIPE_LEN */ - /* one last stripe */ - uint8_t lastStripe[detail3::stripe_len]; - size_t const catchupSize = detail3::stripe_len - bufferedSize; - memcpy(lastStripe, buffer + sizeof(buffer) - catchupSize, catchupSize); - memcpy(lastStripe + catchupSize, buffer, bufferedSize); - detail3::accumulate_512(acc_, lastStripe, secret + secretLimit - detail3::secret_lastacc_start); - } - } - - void reset_internal(uint64_t seed_reset, const void* secret_reset, size_t secret_size) - { - memset(this, 0, sizeof(*this)); - memcpy(acc, detail3::init_acc.data(), sizeof(detail3::init_acc)); - seed = seed_reset; - useSeed = (seed != 0); - secret = (const uint8_t*)secret_reset; - secretLimit = (uint32_t)(secret_size - detail3::stripe_len); - nbStripesPerBlock = secretLimit / detail3::secret_consume_rate; - } - - public: - - hash3_state_t operator=(hash3_state_t& other) - { - memcpy(this, &other, sizeof(hash3_state_t)); - } - - hash3_state_t(uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 streaming can only be used in 64 and 128 bit modes."); - reset(seed); - } - - hash3_state_t(const void* secret, size_t secretSize, uint64_t seed = 0) - { - static_assert(!(bit_mode != 128 && bit_mode != 64), "xxhash3 streaming can only be used in 64 and 128 bit modes."); - reset(secret, secretSize, seed); - } - - void reset(uint64_t seed = 0) - { - reset_internal(seed, detail3::default_secret, detail3::secret_default_size); - detail3::init_custom_secret(customSecret, seed); - secret = customSecret; - } - - void reset(const void* secret, size_t secretSize, uint64_t seed = 0) - { - reset_internal(seed, secret, secretSize); - useSeed = true; - } - - void update(const void* input, size_t len) - { - return update_impl(static_cast(input), len); - } - - template - void update(const std::basic_string& input) - { - return update_impl(static_cast(input.data()), input.length() * sizeof(T)); - } - - template - void update(ContiguousIterator begin, ContiguousIterator end) - { - using T = typename std::decay_t; - return update_impl(static_cast(&*begin), (end - begin) * sizeof(T)); - } - - template - void update(const std::vector& input) - { - return update_impl(static_cast(input.data()), input.size() * sizeof(T)); - } - - template - void update(const std::array& input) - { - return update_impl(static_cast(input.data()), AN * sizeof(T)); - } - - template - void update(const std::initializer_list& input) - { - return update_impl(static_cast(input.begin()), input.size() * sizeof(T)); - } - - hash_t digest() - { - if (totalLen > detail3::midsize_max) - { - alignas(128) hash64_t acc[detail3::acc_nb]; - - digest_long(acc); - - if constexpr (bit_mode == 64) - { - return detail3::merge_accs(acc, secret + detail3::secret_mergeaccs_start, (uint64_t)totalLen * detail::PRIME<64>(1)); - } - else - { - uint64_t const low64 = detail3::merge_accs(acc, secret + detail3::secret_mergeaccs_start, (uint64_t)totalLen * detail::PRIME<64>(1)); - uint64_t const high64 = detail3::merge_accs(acc, secret + secretLimit + detail3::stripe_len - sizeof(acc) - detail3::secret_mergeaccs_start, ~((uint64_t)totalLen * detail::PRIME<64>(2))); - - return { low64, high64 }; - } - } - else - { - return detail3::xxhash3_impl(buffer, totalLen, seed, secret, secretLimit + detail3::stripe_len); - } - } - }; - - using hash3_state64_t = hash3_state_t<64>; - using hash3_state128_t = hash3_state_t<128>; -} diff --git a/client/rsh/src/Makevars b/client/rsh/src/Makevars index bb091ecf..8032cac5 100644 --- a/client/rsh/src/Makevars +++ b/client/rsh/src/Makevars @@ -9,15 +9,17 @@ PB_CPPFLAGS != pkg-config --cflags protobuf PB_LIBS != pkg-config --libs protobuf GRPC_CPPFLAGS != pkg-config --cflags grpc++ GRPC_LIBS != pkg-config --libs grpc++ +XXHASH_CFLAGS != pkg-config --cflags libxxhash +XXHASH_LIBS != pkg-config --libs libxxhash # Both C/C++ flags -PKG_CPPFLAGS := -DR_NO_REMAP -DSTRICT_R_HEADERS -DUSE_RINTERNALS $(DEBUGFLAGS) +PKG_CPPFLAGS := -DR_NO_REMAP -DSTRICT_R_HEADERS -DUSE_RINTERNALS $(DEBUGFLAGS) # C++ flags -PKG_CXXFLAGS := $(LLVM_CPPFLAGS) $(PB_CPPFLAGS) $(GRPC_CPPFLAGS) -fexceptions -I../inst +PKG_CXXFLAGS := $(LLVM_CPPFLAGS) $(PB_CPPFLAGS) $(GRPC_CPPFLAGS) $(XXHASH_CFLAGS) -fexceptions # C flags PKG_CFLAGS := # Linker flags -PKG_LIBS := $(LLVM_LIBS) $(PB_LIBS) $(GRPC_LIBS) -lzmq $(DEBUGFLAGS) +PKG_LIBS := $(LLVM_LIBS) $(PB_LIBS) $(GRPC_LIBS) $(XXHASH_LIBS) $(DEBUGFLAGS) # Allow sources in subdirectories diff --git a/client/rsh/src/client.cpp b/client/rsh/src/client.cpp index 6225079e..24ad2088 100644 --- a/client/rsh/src/client.cpp +++ b/client/rsh/src/client.cpp @@ -1,6 +1,6 @@ #include "client.hpp" -#include "xxhash.hpp" #include "serialize.hpp" +#include "xxhash.h" #include #include @@ -10,45 +10,47 @@ namespace rsh { +Client::Client(std::shared_ptr channel, + std::vector installed_packages) + : stub_(protocol::CompileService::NewStub(channel)) { + using namespace protocol; + InitRequest request; + // Rsh version + request.mutable_rsh_version()->set_major(0); + request.mutable_rsh_version()->set_minor(1); + request.mutable_rsh_version()->set_patch(0); + + // R version (see Rversion.h) + request.mutable_r_version()->set_major(R_VERSION / 65536); + request.mutable_r_version()->set_minor(R_VERSION % 65536 / 256); + request.mutable_r_version()->set_patch(R_VERSION % 256); + + SEXP r_platform = Rf_findVar(Rf_install("R.version"), R_BaseEnv); + // we could also extract the R version from there + // the platform is R.version$platform and is the st element of the list + request.set_platform( + CHAR(STRING_ELT(VECTOR_ELT(r_platform, 0), 0))); // or R_PLATFORM? + + // TODO: compute hashes of package contents (maybe rdb/rdx and so on) + // instead of just writing package names here. + for (auto const &package : installed_packages) { + request.add_package_hash(package); + } -Client::Client(std::shared_ptr channel, std::vector installed_packages) : stub_(protocol::CompileService::NewStub(channel)) { - using namespace protocol; - InitRequest request; - // Rsh version - request.mutable_rsh_version()->set_major(0); - request.mutable_rsh_version()->set_minor(1); - request.mutable_rsh_version()->set_patch(0); - - // R version (see Rversion.h) - request.mutable_r_version()->set_major(R_VERSION / 65536); - request.mutable_r_version()->set_minor(R_VERSION % 65536 / 256); - request.mutable_r_version()->set_patch(R_VERSION % 256); - - SEXP r_platform = Rf_findVar(Rf_install("R.version"), R_BaseEnv); - // we could also extract the R version from there - // the platform is R.version$platform and is the st element of the list - request.set_platform(CHAR(STRING_ELT(VECTOR_ELT(r_platform, 0), 0))); // or R_PLATFORM? - - // TODO: compute hashes of package contents (maybe rdb/rdx and so on) - // instead of just writing package names here. - for(auto const& package : installed_packages) { - request.add_package_hash(package); - } - - - InitResponse response; - grpc::ClientContext context; - grpc::Status status = stub_->Init(&context, request, &response); - if(!status.ok()) { - Rf_error("Failed to connect to the server: %d %s\n", status.error_code(), status.error_message().c_str()); - } - else { - Rprintf("Connected to the server\n"); - } + InitResponse response; + grpc::ClientContext context; + grpc::Status status = stub_->Init(&context, request, &response); + if (!status.ok()) { + Rf_error("Failed to connect to the server: %d %s\n", status.error_code(), + status.error_message().c_str()); + } else { + Rprintf("Connected to the server\n"); } +} -std::variant Client::remote_compile(std::vector const &rds_closure, - CompilerOptions const& options) { +std::variant +Client::remote_compile(std::vector const &rds_closure, + CompilerOptions const &options) { using namespace protocol; CompileRequest request; @@ -58,25 +60,27 @@ std::variant Client::remote_compile(std: request.set_cc_opt(options.cc_opt); request.mutable_function()->set_body(rds_closure.data(), rds_closure.size()); - // We replace the body of a function with its compiled version so it would not make - //sense to compute its hash again, except if its body has changed. - uint64_t hash = xxh::xxhash3<64>(rds_closure.data(), rds_closure.size()); + // We replace the body of a function with its compiled version so it would not + // make + // sense to compute its hash again, except if its body has changed. + uint64_t hash = XXH3_64bits(rds_closure.data(), rds_closure.size()); request.mutable_function()->set_hash(hash); total_request_bytes += request.ByteSizeLong(); - Rprintf("Sending request, with serialized size %ld\n", request.GetCachedSize()); + Rprintf("Sending request, with serialized size %d\n", + request.GetCachedSize()); grpc::ClientContext context; CompileResponse response; grpc::Status status = stub_->Compile(&context, request, &response); total_response_bytes += response.ByteSizeLong(); - if(!status.ok()) { + if (!status.ok()) { std::cerr << status.error_code() << ": " << status.error_message() << std::endl; return status.error_message(); - } - else { - Rprintf("Received response, with serialized size %ld\n", response.GetCachedSize()); + } else { + Rprintf("Received response, with serialized size %d\n", + response.GetCachedSize()); return response; } } @@ -88,45 +92,46 @@ SEXP Client::make_client(SEXP address, SEXP port, SEXP installed_packages) { address_str += ":" + std::to_string(p); std::vector packages; - for(int i = 0; i < LENGTH(installed_packages); ++i) { + for (int i = 0; i < LENGTH(installed_packages); ++i) { packages.push_back(CHAR(STRING_ELT(installed_packages, i))); } - auto channel = grpc::CreateChannel(address_str, grpc::InsecureChannelCredentials()); + auto channel = + grpc::CreateChannel(address_str, grpc::InsecureChannelCredentials()); auto client = new Client(channel, packages); - - SEXP ptr = PROTECT(R_MakeExternalPtr(client, RSH_CLIENT_PTR, R_NilValue)); + SEXP ptr = PROTECT(R_MakeExternalPtr(client, RSH_CLIENT_PTR, R_NilValue)); // Removed because it was causing a segfault (memory not mapped) - //R_RegisterCFinalizerEx(ptr, &Client::remove_client, FALSE);// TRUE because we want to shutdown the client when R quits + // R_RegisterCFinalizerEx(ptr, &Client::remove_client, FALSE);// TRUE because + // we want to shutdown the client when R quits UNPROTECT(1); return ptr; } void Client::remove_client(SEXP ptr) { - if(ptr == nullptr) { + if (ptr == nullptr) { Rf_warning("Client already removed"); } - auto client = static_cast(R_ExternalPtrAddr(ptr)); + auto client = static_cast(R_ExternalPtrAddr(ptr)); delete client; } -Client* Client::get_client() { - if(Client::CLIENT_INSTANCE != nullptr) { - return static_cast(R_ExternalPtrAddr(CLIENT_INSTANCE)); - } - else { +Client *Client::get_client() { + if (Client::CLIENT_INSTANCE != nullptr) { + return static_cast(R_ExternalPtrAddr(CLIENT_INSTANCE)); + } else { Rf_error("Client not initialized"); } } SEXP init_client(SEXP address, SEXP port, SEXP installed_packages) { - if(Client::CLIENT_INSTANCE != nullptr) { + if (Client::CLIENT_INSTANCE != nullptr) { Rf_warning("Client already initialized, replacing it"); } - Client::CLIENT_INSTANCE = Client::make_client(address, port, installed_packages); + Client::CLIENT_INSTANCE = + Client::make_client(address, port, installed_packages); return Client::CLIENT_INSTANCE; } @@ -137,7 +142,7 @@ SEXP get_total_size() { SEXP out = PROTECT(Rf_allocVector(INTSXP, 2)); INTEGER(out)[0] = req; INTEGER(out)[1] = res; - // Add names + // Add names SEXP names = PROTECT(Rf_allocVector(STRSXP, 2)); SET_STRING_ELT(names, 0, Rf_mkChar("request")); SET_STRING_ELT(names, 1, Rf_mkChar("response")); diff --git a/client/rsh/src/client.hpp b/client/rsh/src/client.hpp index 026a3eff..16ffe1b5 100644 --- a/client/rsh/src/client.hpp +++ b/client/rsh/src/client.hpp @@ -1,20 +1,20 @@ #pragma once +#include "compiler.hpp" #include "messages.pb.h" -#include "routes.pb.h" #include "routes.grpc.pb.h" +#include "routes.pb.h" #include "rsh.hpp" -#include "compiler.hpp" #include #include #include #include -#include -#include -#include -#include #include #include +#include +#include +#include +#include namespace rsh { @@ -30,20 +30,22 @@ class Client { // For it to be able to access the client instance friend SEXP init_client(SEXP address, SEXP port, SEXP installed_packages); + public: - Client(std::shared_ptr channel, std::vector installed_packages); + Client(std::shared_ptr channel, + std::vector installed_packages); - std::variant remote_compile( - std::vector const &rds_closure, - CompilerOptions const &opts); + std::variant + remote_compile(std::vector const &rds_closure, + CompilerOptions const &opts); // Total size of requests and responses since the start of the client - std::pair get_total_size() const { + std::pair get_total_size() const { return {total_request_bytes, total_response_bytes}; - } + } static SEXP make_client(SEXP address, SEXP port, SEXP installed_packages); - static Client* get_client(); + static Client *get_client(); }; SEXP get_total_size(); diff --git a/client/rsh/src/compiler.cpp b/client/rsh/src/compiler.cpp index 50f34f17..311be11c 100644 --- a/client/rsh/src/compiler.cpp +++ b/client/rsh/src/compiler.cpp @@ -34,29 +34,30 @@ namespace rsh { SEXP RSH_JIT_FUN_PTR = Rf_install("RSH_JIT_FUN_PTR"); -static std::variant compile_closure(SEXP closure, CompilerOptions options) { +static std::variant +compile_closure(SEXP closure, CompilerOptions options) { // If a function has already been compiled to native code - if(Rf_asLogical(is_compiled(closure))) { + if (Rf_asLogical(is_compiled(closure))) { return "Function already compiled"; } std::vector closure_bytes; - if(IS_BYTECODE(BODY(closure))) { + if (IS_BYTECODE(BODY(closure))) { // Build the closure AST to get the correct hash // The AST is the first element in the constant pool of the BCODESXP SEXP body = BODY_EXPR(closure); auto ast_clos = Rf_mkCLOSXP(FORMALS(closure), body, CLOENV(closure)); closure_bytes = rsh::serialize(ast_clos); - } - else { - closure_bytes = rsh::serialize(closure); + } else { + closure_bytes = rsh::serialize(closure); } auto client = rsh::Client::get_client(); return client->remote_compile(closure_bytes, options); } -static void *insert_into_jit(const char* name, protocol::CompileResponse const &compiled_fun) { +static void *insert_into_jit(const char *name, + protocol::CompileResponse const &compiled_fun) { auto native_code = compiled_fun.code(); GJIT->add_object(native_code); auto ptr = GJIT->lookup(name); @@ -66,7 +67,6 @@ static void *insert_into_jit(const char* name, protocol::CompileResponse const & return ptr; } - CompilerOptions CompilerOptions::from_list(SEXP listsxp) { if (TYPEOF(listsxp) != VECSXP) { Rf_error("Expected a list of compiler options"); @@ -99,16 +99,15 @@ CompilerOptions CompilerOptions::from_list(SEXP listsxp) { } else if (!strcmp(name, "inplace")) { opts.inplace = vec_element_as_bool(listsxp, i, "inplace option must be a logical"); - } - else if(!strcmp(name, "tier")) { + } else if (!strcmp(name, "tier")) { SEXP tier_sxp = VECTOR_ELT(listsxp, i); - if(TYPEOF(tier_sxp) != STRSXP) { + if (TYPEOF(tier_sxp) != STRSXP) { Rf_error("Expected a string for the tier option"); } opts.tier = protocol::Tier::OPTIMIZED; std::string tier_s = CHAR(STRING_ELT(tier_sxp, 0)); protocol::Tier tier = protocol::Tier::OPTIMIZED; - if(tier_s == "bytecode") { + if (tier_s == "bytecode") { opts.tier = protocol::Tier::BASELINE; } } else { @@ -118,7 +117,6 @@ CompilerOptions CompilerOptions::from_list(SEXP listsxp) { return opts; } - std::string genSymbol(uint64_t hash, int index) { return "gen_" + std::to_string(hash) + "_" + std::to_string(index); @@ -144,61 +142,59 @@ SEXP compile(SEXP closure, SEXP options) { auto compiled_fun = std::get(response); // If the code is empty, we keep the SEXP - if(!compiled_fun.has_code() || compiled_fun.code().empty()) { - Rf_warning("Empty body returned for function %s. Most likely because of browser in the body", opts.name.c_str()); + if (!compiled_fun.has_code() || compiled_fun.code().empty()) { + Rf_warning("Empty body returned for function %s. Most likely because of " + "browser in the body", + opts.name.c_str()); return closure; } SEXP body = nullptr; SEXP c_cp = nullptr; - void* fun_ptr = nullptr; + void *fun_ptr = nullptr; SEXP fun_ptr_sxp = nullptr; std::string name = genSymbol(compiled_fun.hash(), 0); // Native or bytecode? - if(opts.tier == protocol::Tier::OPTIMIZED) { + if (opts.tier == protocol::Tier::OPTIMIZED) { fun_ptr = insert_into_jit(name.c_str(), compiled_fun); - auto fun_ptr_sxp = R_MakeExternalPtr( - fun_ptr, RSH_JIT_FUN_PTR, Rf_mkString(name.c_str())); - R_RegisterCFinalizerEx(fun_ptr_sxp, &jit_fun_destructor, FALSE); + auto fun_ptr_sxp = + R_MakeExternalPtr(fun_ptr, RSH_JIT_FUN_PTR, Rf_mkString(name.c_str())); + R_RegisterCFinalizerEx(fun_ptr_sxp, &jit_fun_destructor, FALSE); auto c_cp = rsh::deserialize(compiled_fun.constants()); - body = PROTECT(create_wrapper_body(closure, fun_ptr_sxp, c_cp));//P1 - } - else if(opts.tier == protocol::Tier::BASELINE) { + body = PROTECT(create_wrapper_body(closure, fun_ptr_sxp, c_cp)); // P1 + } else if (opts.tier == protocol::Tier::BASELINE) { body = PROTECT(rsh::deserialize(compiled_fun.code())); // P2 - if(TYPEOF(body) != BCODESXP) { + if (TYPEOF(body) != BCODESXP) { Rf_error("Expected bytecode, got %s", Rf_type2char(TYPEOF(body))); } } - // Inplace or not (i.e. through through an explicit call to `compile` or through the R JIT) + // Inplace or not (i.e. through through an explicit call to `compile` or + // through the R JIT) if (opts.inplace) { SET_BODY(closure, body); UNPROTECT(1); // For P1 or P2 // FIXME: add logging primitives - Rprintf("Compiled in place fun %s (fun=%p, body=%p) ; ", - opts.name.c_str(), closure, - body); - if(opts.tier == protocol::Tier::OPTIMIZED) { + Rprintf("Compiled in place fun %s (fun=%p, body=%p) ; ", opts.name.c_str(), + closure, body); + if (opts.tier == protocol::Tier::OPTIMIZED) { Rprintf("Jit-compiled: jit=%p\n", fun_ptr); - } - else { + } else { Rprintf("Bytecode-compiled\n"); } } else { SEXP orig = closure; - closure = PROTECT(Rf_mkCLOSXP(FORMALS(closure), body, CLOENV(closure))); // P3 + closure = + PROTECT(Rf_mkCLOSXP(FORMALS(closure), body, CLOENV(closure))); // P3 // FIXME: add logging primitive - UNPROTECT(1);// P1 or P2 - Rprintf( - "Replaced compiled fun %s -- %p (fun=%p, body=%p) ; ", - opts.name.c_str(), orig, closure, - body); - if(opts.tier == protocol::Tier::OPTIMIZED) { + UNPROTECT(1); // P1 or P2 + Rprintf("Replaced compiled fun %s -- %p (fun=%p, body=%p) ; ", + opts.name.c_str(), orig, closure, body); + if (opts.tier == protocol::Tier::OPTIMIZED) { Rprintf("Jit-compiled: jit=%p\n", fun_ptr); - } - else { + } else { Rprintf("Bytecode-compiled\n"); } } diff --git a/client/rsh/src/compiler.hpp b/client/rsh/src/compiler.hpp index fbf3e73f..39a6dcaa 100644 --- a/client/rsh/src/compiler.hpp +++ b/client/rsh/src/compiler.hpp @@ -1,8 +1,8 @@ // from some reason the #pragma once directive is not working #ifndef RSH_COMPILER_HPP #define RSH_COMPILER_HPP -#include "rsh.hpp" #include "messages.pb.h" +#include "rsh.hpp" #include namespace rsh { diff --git a/client/rsh/src/init.cpp b/client/rsh/src/init.cpp index e6d48eb8..1cc21dbf 100644 --- a/client/rsh/src/init.cpp +++ b/client/rsh/src/init.cpp @@ -1,7 +1,7 @@ #include -#include "compiler.hpp" #include "client.hpp" +#include "compiler.hpp" extern "C" { #include "bc2c/runtime.h" diff --git a/client/rsh/test6.R b/client/rsh/test6.R deleted file mode 100644 index ebdffd80..00000000 --- a/client/rsh/test6.R +++ /dev/null @@ -1,44 +0,0 @@ -rsh::rsh_jit_enable() -rsh::rsh_jit_disable() - - -cat("TEST 1\n") -f <- function(x) { - y <- x + 42 - y + x -} -print(f) -rsh::rsh_compile(f, list(tier = "bytecode", bc_opt = 3L)) -print(f) -print(f(5)) -stopifnot(f(5) == 52) - -cat("TEST 2\n") -# f should be in the compile cache now. -rsh::rsh_compile(f, list(tier = "bytecode", bc_opt = 3L)) -print(f) -.Internal(inspect(f)) -print(f(5)) -stopifnot(f(5) == 52) - -cat("TEST 3\n") -# another opt level so it should be recompiled -rsh::rsh_compile(f, list(tier = "bytecode", bc_opt = 2L)) -stopifnot(f(5) == 52) - - -cat("TEST 4\n") -f <- function(x) { - x + 3 -} - -# body changed so it should be recompiled -rsh::rsh_compile(f, list(tier = "bytecode", bc_opt = 2L)) -#str(compiler::disassemble(f)) -stopifnot(f(5) == 8) - -cat("TEST 5\n") -# Now native compilation -rsh::rsh_compile(f, list(tier = "native", bc_opt = 3L, cc_opt = 3L)) - -stopifnot(f(5) == 8) \ No newline at end of file diff --git a/external/R b/external/R index c950e70f..4ef05b1f 160000 --- a/external/R +++ b/external/R @@ -1 +1 @@ -Subproject commit c950e70f516502534d973992c56a72f6eea77358 +Subproject commit 4ef05b1ff56902d6774431933177b38a67515e67 diff --git a/server/src/main/java/org/prlprg/server/CompileService.java b/server/src/main/java/org/prlprg/server/CompileService.java index 7709e12a..346c74cd 100644 --- a/server/src/main/java/org/prlprg/server/CompileService.java +++ b/server/src/main/java/org/prlprg/server/CompileService.java @@ -249,9 +249,8 @@ public void init( // TODO: Lookup to see if we have this version of R installed or not. // Hardcoded so far: - var r_dir = Path.of("/usr/lib/R/"); - var lib_dir = "~/R/x86_64-pc-linux-gnu-library/4.3/"; - lib_dir = lib_dir.replaceFirst("^~", System.getProperty("user.home")); + var r_dir = Path.of("/workspace/external/R"); + var lib_dir = "/workspace/external/R/library"; session = new GNURSession(convertVersion(RVersion), r_dir, Path.of(lib_dir)); // TODO: Look into our cache if we have the packages.