From c9c076c75bc47f01f752ffc3fb4135f388e3cc95 Mon Sep 17 00:00:00 2001
From: Soojin Nam <jsunam@gmail.com>
Date: Tue, 26 Dec 2017 10:57:24 +0900
Subject: [PATCH 01/50] fix a trivial typo

---
 Makefile  |  2 +-
 README.md | 14 +++++++-------
 xxhash.c  |  4 ++--
 xxhash.h  | 16 ++++++++--------
 xxhsum.c  |  2 +-
 5 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/Makefile b/Makefile
index c352b515..3d8d5c74 100644
--- a/Makefile
+++ b/Makefile
@@ -124,7 +124,7 @@ test-mem: xxhsum
 
 .PHONY: test32
 test32: clean xxhsum32
-	@echo ---- test 32-bits ----
+	@echo ---- test 32-bit ----
 	./xxhsum32 -bi1 xxhash.c
 
 test-xxhsum-c: xxhsum
diff --git a/README.md b/README.md
index 5be77c13..743659eb 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Code is highly portable, and hashes are identical on all platforms (little / big
 Benchmarks
 -------------------------
 
-The benchmark uses SMHasher speed test, compiled with Visual 2010 on a Windows Seven 32-bits box.
+The benchmark uses SMHasher speed test, compiled with Visual 2010 on a Windows Seven 32-bit box.
 The reference system uses a Core 2 Duo @3GHz
 
 
@@ -40,13 +40,13 @@ It depends on successfully passing SMHasher test set.
 Algorithms with a score < 5 are not listed on this table.
 
 A more recent version, XXH64, has been created thanks to [Mathias Westerdahl](https://github.com/JCash),
-which offers superior speed and dispersion for 64-bits systems.
-Note however that 32-bits applications will still run faster using the 32-bits version.
+which offers superior speed and dispersion for 64-bit systems.
+Note however that 32-bit applications will still run faster using the 32-bit version.
 
-SMHasher speed test, compiled using GCC 4.8.2, on Linux Mint 64-bits.
+SMHasher speed test, compiled using GCC 4.8.2, on Linux Mint 64-bit.
 The reference system uses a Core i5-3340M @2.7GHz
 
-| Version    | Speed on 64-bits | Speed on 32-bits |
+| Version    | Speed on 64-bit | Speed on 32-bit |
 |------------|------------------|------------------|
 | XXH64      | 13.8 GB/s        |  1.9 GB/s        |
 | XXH32      |  6.8 GB/s        |  6.0 GB/s        |
@@ -86,12 +86,12 @@ they modify xxhash behavior. They are all disabled by default.
 - `XXH_PRIVATE_API` : Make all functions `static`, directly accessible through `#include xxhash.h`, for inlining.
                       Do not compile `xxhash.c` as a separate module in this case.
 - `XXH_NO_LONG_LONG` : removes support for XXH64,
-                       useful for targets without 64-bits support.
+                       useful for targets without 64-bit support.
 
 
 ### Example
 
-Calling xxhash 64-bits variant from a C program :
+Calling xxhash 64-bit variant from a C program :
 
 ```
 #include "xxhash.h"
diff --git a/xxhash.c b/xxhash.c
index 63a11711..51f01a49 100644
--- a/xxhash.c
+++ b/xxhash.c
@@ -252,7 +252,7 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
 
 
 /* *******************************************************************
-*  32-bits hash functions
+*  32-bit hash functions
 *********************************************************************/
 static const U32 PRIME32_1 = 2654435761U;
 static const U32 PRIME32_2 = 2246822519U;
@@ -532,7 +532,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src
 #ifndef XXH_NO_LONG_LONG
 
 /* *******************************************************************
-*  64-bits hash functions
+*  64-bit hash functions
 *********************************************************************/
 
 /*======   Memory access   ======*/
diff --git a/xxhash.h b/xxhash.h
index 13136633..4099da02 100644
--- a/xxhash.h
+++ b/xxhash.h
@@ -57,8 +57,8 @@ Q.Score is a measure of quality of the hash function.
 It depends on successfully passing SMHasher test set.
 10 is a perfect score.
 
-A 64-bits version, named XXH64, is available since r35.
-It offers much better speed, but for 64-bits applications only.
+A 64-bit version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bit applications only.
 Name     Speed on 64 bits    Speed on 32 bits
 XXH64       13.8 GB/s            1.9 GB/s
 XXH32        6.8 GB/s            6.0 GB/s
@@ -156,12 +156,12 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void);
 
 
 /*-**********************************************************************
-*  32-bits hash
+*  32-bit hash
 ************************************************************************/
 typedef unsigned int XXH32_hash_t;
 
 /*! XXH32() :
-    Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
+    Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
     The memory between input & input+length must be valid (allocated and read-accessible).
     "seed" can be used to alter the result predictably.
     Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
@@ -214,14 +214,14 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src
 
 #ifndef XXH_NO_LONG_LONG
 /*-**********************************************************************
-*  64-bits hash
+*  64-bit hash
 ************************************************************************/
 typedef unsigned long long XXH64_hash_t;
 
 /*! XXH64() :
-    Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
+    Calculate the 64-bit hash of sequence of length "len" stored at memory address "input".
     "seed" can be used to alter the result predictably.
-    This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
+    This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
 */
 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
 
@@ -267,7 +267,7 @@ struct XXH32_state_s {
    unsigned reserved;   /* never read nor write, will be removed in a future version */
 };   /* typedef'd to XXH32_state_t */
 
-#ifndef XXH_NO_LONG_LONG   /* remove 64-bits support */
+#ifndef XXH_NO_LONG_LONG   /* remove 64-bit support */
 struct XXH64_state_s {
    unsigned long long total_len;
    unsigned long long v1;
diff --git a/xxhsum.c b/xxhsum.c
index 656d9b14..fabd1d38 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -356,7 +356,7 @@ static void BMK_checkResult64(U64 r1, U64 r2)
 {
     static int nbTests = 1;
     if (r1!=r2) {
-        DISPLAY("\rERROR : Test%3i : 64-bits values non equals   !!!!!   \n", nbTests);
+        DISPLAY("\rERROR : Test%3i : 64-bit values non equals   !!!!!   \n", nbTests);
         DISPLAY("\r %08X%08X != %08X%08X \n", (U32)(r1>>32), (U32)r1, (U32)(r2>>32), (U32)r2);
         exit(1);
     }

From d2e9d125917a18eb6cc21089e12d9d6a7ee855bb Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Fri, 29 Dec 2017 11:15:37 +0100
Subject: [PATCH 02/50] disable auto-vectorization capability on OS-X clang

auto-vectorization was triggered for XXH32,
resulting in detrimental performance
---
 Makefile | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 3d8d5c74..5c9b152d 100644
--- a/Makefile
+++ b/Makefile
@@ -33,10 +33,11 @@ LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
 LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
 LIBVER := $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH)
 
-CFLAGS ?= -O3
+CFLAGS ?= -O2 -mno-sse4   # ensure auto-vectorization is disabled
 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
           -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
-		  -Wstrict-prototypes -Wundef
+          -Wstrict-prototypes -Wundef
+
 FLAGS   = $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(MOREFLAGS)
 XXHSUM_VERSION=$(LIBVER)
 MD2ROFF = ronn
@@ -67,7 +68,7 @@ LIBXXH = libxxhash.$(SHARED_EXT_VER)
 
 
 .PHONY: default
-default: lib xxhsum
+default: lib xxhsum_and_links
 
 .PHONY: all
 all: lib xxhsum xxhsum32 xxhsum_inlinedXXH
@@ -75,8 +76,11 @@ all: lib xxhsum xxhsum32 xxhsum_inlinedXXH
 xxhsum32: CFLAGS += -m32
 xxhsum xxhsum32: xxhash.c xxhsum.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
-	ln -sf $@ xxh32sum
-	ln -sf $@ xxh64sum
+
+.PHONY: xxhsum_and_links
+xxhsum_and_links: xxhsum
+	ln -sf xxhsum xxh32sum
+	ln -sf xxhsum xxh64sum
 
 xxhsum_inlinedXXH: xxhsum.c
 	$(CC) $(FLAGS) -DXXH_PRIVATE_API $^ -o $@$(EXT)

From 744892b802dcf61a78a3f2f1311d542577c16d66 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Fri, 29 Dec 2017 14:13:18 +0100
Subject: [PATCH 03/50] added SSE4 detection module

flag -mno-sse4 fails on ARM architecture (and likely any non-x86 architecture).
Only disable when it's already enabled by default (typically OS-X)
---
 Makefile | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 5c9b152d..841f6f48 100644
--- a/Makefile
+++ b/Makefile
@@ -33,7 +33,15 @@ LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT))
 LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
 LIBVER := $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH)
 
-CFLAGS ?= -O2 -mno-sse4   # ensure auto-vectorization is disabled
+# SSE4 detection
+HAVE_SSE4 := $(shell $(CC) -dM -E - < /dev/null | grep "SSE4" > /dev/null && echo 1 || echo 0)
+ifeq ($(HAVE_SSE4), 1)
+NOSSE4 := -mno-sse4
+else
+NOSSE4 :=
+endif
+
+CFLAGS ?= -O2 $(NOSSE4)   # disables potential auto-vectorization
 CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
           -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
           -Wstrict-prototypes -Wundef

From 80a8a34dafed655545bdb2ae5ea6fb2eb0d18d6a Mon Sep 17 00:00:00 2001
From: Ben Boeckel <ben.boeckel@kitware.com>
Date: Thu, 8 Feb 2018 09:11:51 -0500
Subject: [PATCH 04/50] intel: do not use __attribute__((packed)) on Windows

On Windows, the Intel compiler is closer to MSVC rather than GCC and
does not support the GCC attribute syntax.
---
 xxhash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xxhash.c b/xxhash.c
index 51f01a49..9c54c980 100644
--- a/xxhash.c
+++ b/xxhash.c
@@ -54,7 +54,7 @@
                         || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
                         || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
 #    define XXH_FORCE_MEMORY_ACCESS 2
-#  elif defined(__INTEL_COMPILER) || \
+#  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
                     || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
                     || defined(__ARM_ARCH_7S__) ))

From c39e4f91415c6908e0db6fc5d9d0fada9a0345f5 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Fri, 16 Feb 2018 18:58:40 -0800
Subject: [PATCH 05/50] benchmark display presents it/s

---
 xxhsum.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/xxhsum.c b/xxhsum.c
index fabd1d38..2a2f7dac 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -243,11 +243,15 @@ static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer,
         if (r==0) DISPLAY(".\r");  /* need to do something with r to avoid compiler "optimizing" away hash function */
         {   double const timeS = ((double)BMK_clockSpan(cStart) / CLOCKS_PER_SEC) / nbHashes;
             if (timeS < fastestH) fastestH = timeS;
-            DISPLAY("%1i-%-17.17s : %10u -> %7.1f MB/s\r",
-                    iterationNb, hName, (U32)bufferSize, ((double)bufferSize / (1<<20)) / fastestH );
+            DISPLAY("%1i-%-17.17s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
+                    iterationNb, hName, (U32)bufferSize,
+                    (double)1 / fastestH,
+                    ((double)bufferSize / (1<<20)) / fastestH );
         }
     }
-    DISPLAY("%-19.19s : %10u -> %7.1f MB/s  \n", hName, (U32)bufferSize, ((double)bufferSize / (1<<20)) / fastestH);
+    DISPLAY("%-19.19s : %10u -> %8.0f it/s (%7.1f MB/s)  \n", hName, (U32)bufferSize,
+        (double)1 / fastestH,
+        ((double)bufferSize / (1<<20)) / fastestH);
 }
 
 

From b1192827d72163f374f16ce5e2d3526e40e07c5f Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sat, 17 Feb 2018 10:53:52 -0800
Subject: [PATCH 06/50] added option -q

makes benchmark results less verbose
for easier storage into file.
---
 xxhsum.c | 41 +++++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/xxhsum.c b/xxhsum.c
index 2a2f7dac..129a22c7 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -153,8 +153,8 @@ static const algoType g_defaultAlgo = algo_xxh64;    /* required within main() &
 **************************************/
 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
 #define DISPLAYRESULT(...)   fprintf(stdout, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) DISPLAY(__VA_ARGS__);
-static U32 g_displayLevel = 1;
+#define DISPLAYLEVEL(l, ...) do { if (g_displayLevel>=l) DISPLAY(__VA_ARGS__); } while (0)
+static int g_displayLevel = 2;
 
 
 /* ************************************
@@ -223,13 +223,13 @@ static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer,
     U32 iterationNb;
     double fastestH = 100000000.;
 
-    DISPLAY("\r%79s\r", "");       /* Clean display line */
+    DISPLAYLEVEL(2, "\r%70s\r", "");       /* Clean display line */
     if (g_nbIterations<1) g_nbIterations=1;
     for (iterationNb = 1; iterationNb <= g_nbIterations; iterationNb++) {
         U32 nbHashes = 0, r=0;
         clock_t cStart;
 
-        DISPLAY("%1i-%-17.17s : %10u ->\r", iterationNb, hName, (U32)bufferSize);
+        DISPLAYLEVEL(2, "%1i-%-17.17s : %10u ->\r", iterationNb, hName, (U32)bufferSize);
         cStart = clock();
         while (clock() == cStart);   /* starts clock() at its exact beginning */
         cStart = clock();
@@ -240,16 +240,16 @@ static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer,
                 r += h(buffer, bufferSize, i);
             nbHashes += nbh_perloop;
         }
-        if (r==0) DISPLAY(".\r");  /* need to do something with r to avoid compiler "optimizing" away hash function */
+        if (r==0) DISPLAYLEVEL(3,".\r");  /* need to do something with r to avoid compiler "optimizing" away hash function */
         {   double const timeS = ((double)BMK_clockSpan(cStart) / CLOCKS_PER_SEC) / nbHashes;
             if (timeS < fastestH) fastestH = timeS;
-            DISPLAY("%1i-%-17.17s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
+            DISPLAYLEVEL(2, "%1i-%-17.17s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
                     iterationNb, hName, (U32)bufferSize,
                     (double)1 / fastestH,
                     ((double)bufferSize / (1<<20)) / fastestH );
         }
     }
-    DISPLAY("%-19.19s : %10u -> %8.0f it/s (%7.1f MB/s)  \n", hName, (U32)bufferSize,
+    DISPLAY("%-19.19s : %10u -> %8.0f it/s (%7.1f MB/s) \n", hName, (U32)bufferSize,
         (double)1 / fastestH,
         ((double)bufferSize / (1<<20)) / fastestH);
 }
@@ -295,7 +295,7 @@ static int BMK_benchFiles(const char** fileNamesTable, int nbFiles)
 
         /* Checks */
         if ((inFile==NULL) || (inFileName==NULL)) {
-            DISPLAY( "Pb opening %s\n", inFileName);
+            DISPLAY("Pb opening %s\n", inFileName);
             free(buffer);
             return 11;
         }
@@ -336,7 +336,7 @@ static int BMK_benchInternal(void)
     }
 
     /* bench */
-    DISPLAY("\rSample of %u KB...        \n", (U32)(benchedSize >> 10));
+    DISPLAY("Sample of %u KB...        \n", (U32)(benchedSize >> 10));
     BMK_benchMem(buffer, benchedSize);
 
     free(buffer);
@@ -347,8 +347,9 @@ static int BMK_benchInternal(void)
 static void BMK_checkResult(U32 r1, U32 r2)
 {
     static int nbTests = 1;
-    if (r1==r2) DISPLAY("\rTest%3i : %08X == %08X   ok   ", nbTests, r1, r2);
-    else {
+    if (r1==r2) {
+        DISPLAYLEVEL(3, "\rTest%3i : %08X == %08X   ok   ", nbTests, r1, r2);
+    } else {
         DISPLAY("\rERROR : Test%3i : %08X <> %08X   !!!!!   \n", nbTests, r1, r2);
         exit(1);
     }
@@ -383,7 +384,8 @@ static void BMK_testSequence64(void* sentence, size_t len, U64 seed, U64 Nresult
     BMK_checkResult64(Dresult, Nresult);
 
     XXH64_reset(&state, seed);
-    for (pos=0; pos<len; pos++) XXH64_update(&state, ((char*)sentence)+pos, 1);
+    for (pos=0; pos<len; pos++)
+        XXH64_update(&state, ((char*)sentence)+pos, 1);
     Dresult = XXH64_digest(&state);
     BMK_checkResult64(Dresult, Nresult);
 }
@@ -404,7 +406,8 @@ static void BMK_testSequence(const void* sequence, size_t len, U32 seed, U32 Nre
     BMK_checkResult(Dresult, Nresult);
 
     XXH32_reset(&state, seed);
-    for (pos=0; pos<len; pos++) XXH32_update(&state, ((const char*)sequence)+pos, 1);
+    for (pos=0; pos<len; pos++)
+        XXH32_update(&state, ((const char*)sequence)+pos, 1);
     Dresult = XXH32_digest(&state);
     BMK_checkResult(Dresult, Nresult);
 }
@@ -441,8 +444,8 @@ static void BMK_sanityCheck(void)
     BMK_testSequence64(sanityBuffer, SANITY_BUFFER_SIZE, 0,     0x0EAB543384F878ADULL);
     BMK_testSequence64(sanityBuffer, SANITY_BUFFER_SIZE, prime, 0xCAA65939306F1E21ULL);
 
-    DISPLAY("\r%79s\r", "");       /* Clean display line */
-    DISPLAYLEVEL(2, "Sanity check -- all tests ok\n");
+    DISPLAYLEVEL(3, "\r%70s\r", "");       /* Clean display line */
+    DISPLAYLEVEL(3, "Sanity check -- all tests ok\n");
 }
 
 
@@ -1217,6 +1220,12 @@ int main(int argc, const char** argv)
                     g_sampleSize *= 10, g_sampleSize += argument[0]-'0', argument++;
                 break;
 
+            /* Modify verbosity of benchmark output (hidden option) */
+            case 'q':
+                argument++;
+                g_displayLevel--;
+                break;
+
             default:
                 return badusage(exename);
             }
@@ -1225,7 +1234,7 @@ int main(int argc, const char** argv)
 
     /* Check benchmark mode */
     if (benchmarkMode) {
-        DISPLAY( WELCOME_MESSAGE(exename) );
+        DISPLAYLEVEL(2, WELCOME_MESSAGE(exename) );
         BMK_sanityCheck();
         if (filenamesStart==0) return BMK_benchInternal();
         return BMK_benchFiles(argv+filenamesStart, argc-filenamesStart);

From 9a4fe59dda10aba3bf418cc1d116697cd29ee0f5 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sun, 18 Feb 2018 16:56:14 -0800
Subject: [PATCH 07/50] improved timing for short inputs

by calling clock() (much) less often
---
 xxhsum.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/xxhsum.c b/xxhsum.c
index 129a22c7..3b72304f 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -49,11 +49,12 @@
 *  Includes
 **************************************/
 #include <stdlib.h>     /* malloc, calloc, free, exit */
-#include <stdio.h>      /* fprintf, fopen, ftello64, fread, stdin, stdout; when present : _fileno */
+#include <stdio.h>      /* fprintf, fopen, ftello64, fread, stdin, stdout, _fileno (when present) */
 #include <string.h>     /* strcmp */
-#include <sys/types.h>  /* stat64 */
-#include <sys/stat.h>   /* stat64 */
+#include <sys/types.h>  /* stat, stat64, _stat64 */
+#include <sys/stat.h>   /* stat, stat64, _stat64 */
 #include <time.h>       /* clock_t, clock, CLOCKS_PER_SEC */
+#include <assert.h>     /* assert */
 
 #define XXH_STATIC_LINKING_ONLY   /* *_state_t */
 #include "xxhash.h"
@@ -219,14 +220,14 @@ static U32 localXXH64(const void* buffer, size_t bufferSize, U32 seed) { return
 
 static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer, size_t bufferSize)
 {
-    static const U32 nbh_perloop = 100;
+    U32 nbh_perIteration = ((300 MB) / (bufferSize+1)) + 1;  /* first loop conservatively aims for 300 MB/s */
     U32 iterationNb;
     double fastestH = 100000000.;
 
     DISPLAYLEVEL(2, "\r%70s\r", "");       /* Clean display line */
     if (g_nbIterations<1) g_nbIterations=1;
     for (iterationNb = 1; iterationNb <= g_nbIterations; iterationNb++) {
-        U32 nbHashes = 0, r=0;
+        U32 r=0;
         clock_t cStart;
 
         DISPLAYLEVEL(2, "%1i-%-17.17s : %10u ->\r", iterationNb, hName, (U32)bufferSize);
@@ -234,20 +235,20 @@ static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer,
         while (clock() == cStart);   /* starts clock() at its exact beginning */
         cStart = clock();
 
-        while (BMK_clockSpan(cStart) < TIMELOOP) {
-            U32 i;
-            for (i=0; i<nbh_perloop; i++)
+        {   U32 i;
+            for (i=0; i<nbh_perIteration; i++)
                 r += h(buffer, bufferSize, i);
-            nbHashes += nbh_perloop;
         }
         if (r==0) DISPLAYLEVEL(3,".\r");  /* need to do something with r to avoid compiler "optimizing" away hash function */
-        {   double const timeS = ((double)BMK_clockSpan(cStart) / CLOCKS_PER_SEC) / nbHashes;
+        {   double const timeS = ((double)BMK_clockSpan(cStart) / CLOCKS_PER_SEC) / nbh_perIteration;
             if (timeS < fastestH) fastestH = timeS;
             DISPLAYLEVEL(2, "%1i-%-17.17s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
                     iterationNb, hName, (U32)bufferSize,
                     (double)1 / fastestH,
                     ((double)bufferSize / (1<<20)) / fastestH );
         }
+        assert(fastestH > 2./1000000000);  /* avoid U32 overflow */
+        nbh_perIteration = (U32)(1 / fastestH) + 1;  /* adjust nbh_perIteration to last roughtly one second */
     }
     DISPLAY("%-19.19s : %10u -> %8.0f it/s (%7.1f MB/s) \n", hName, (U32)bufferSize,
         (double)1 / fastestH,

From deb2d0be22df001e6a3663f7f14b8fd885ef7954 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sun, 18 Feb 2018 17:38:48 -0800
Subject: [PATCH 08/50] can select to benchmark one specific test

new hidden command -b#
---
 xxhsum.c | 102 +++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 73 insertions(+), 29 deletions(-)

diff --git a/xxhsum.c b/xxhsum.c
index 3b72304f..a94f9209 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -125,16 +125,17 @@ static const char author[] = "Yann Collet";
 #define WELCOME_MESSAGE(exename) "%s %s (%i-bits %s), by %s \n", \
                     exename, PROGRAM_VERSION, g_nbBits, ENDIAN_NAME, author
 
+#define KB *( 1<<10)
+#define MB *( 1<<20)
+#define GB *(1U<<30)
+
+static size_t XXH_DEFAULT_SAMPLE_SIZE = 100 KB;
 #define NBLOOPS    3                              /* Default number of benchmark iterations */
 #define TIMELOOP_S 1
 #define TIMELOOP  (TIMELOOP_S * CLOCKS_PER_SEC)   /* Minimum timing per iteration */
 #define XXHSUM32_DEFAULT_SEED 0                   /* Default seed for algo_xxh32 */
 #define XXHSUM64_DEFAULT_SEED 0                   /* Default seed for algo_xxh64 */
 
-#define KB *( 1<<10)
-#define MB *( 1<<20)
-#define GB *(1U<<30)
-
 #define MAX_MEM    (2 GB - 64 MB)
 
 static const char stdinName[] = "-";
@@ -161,7 +162,6 @@ static int g_displayLevel = 2;
 /* ************************************
 *  Local variables
 **************************************/
-static size_t g_sampleSize = 100 KB;
 static U32 g_nbIterations = NBLOOPS;
 
 
@@ -256,20 +256,35 @@ static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer,
 }
 
 
-/* Note : buffer is supposed malloc'ed, hence aligned */
-static void BMK_benchMem(const void* buffer, size_t bufferSize)
+/* BMK_benchMem():
+ * specificTest : 0 == run all tests, 1+ run only specific test
+ * buffer : is supposed 8-bytes aligned (if malloc'ed, it should be)
+ * @return : 0 on success, 1 if error (invalid mode selected) */
+static int BMK_benchMem(const void* buffer, size_t bufferSize, U32 specificTest)
 {
+    assert((((size_t)buffer) & 8) == 0);  /* ensure alignment */
+
     /* XXH32 bench */
-    BMK_benchHash(localXXH32, "XXH32", buffer, bufferSize);
+    if ((specificTest==0) | (specificTest==1))
+        BMK_benchHash(localXXH32, "XXH32", buffer, bufferSize);
 
     /* Bench XXH32 on Unaligned input */
-    BMK_benchHash(localXXH32, "XXH32 unaligned", ((const char*)buffer)+1, bufferSize);
+    if ((specificTest==0) | (specificTest==2))
+        BMK_benchHash(localXXH32, "XXH32 unaligned", ((const char*)buffer)+1, bufferSize);
 
     /* Bench XXH64 */
-    BMK_benchHash(localXXH64, "XXH64", buffer, bufferSize);
+    if ((specificTest==0) | (specificTest==3))
+        BMK_benchHash(localXXH64, "XXH64", buffer, bufferSize);
 
     /* Bench XXH64 on Unaligned input */
-    BMK_benchHash(localXXH64, "XXH64 unaligned", ((const char*)buffer)+3, bufferSize);
+    if ((specificTest==0) | (specificTest==4))
+        BMK_benchHash(localXXH64, "XXH64 unaligned", ((const char*)buffer)+3, bufferSize);
+
+    if (specificTest > 4) {
+        DISPLAY("benchmark mode invalid \n");
+        return 1;
+    }
+    return 0;
 }
 
 
@@ -284,9 +299,11 @@ static size_t BMK_selectBenchedSize(const char* fileName)
 }
 
 
-static int BMK_benchFiles(const char** fileNamesTable, int nbFiles)
+static int BMK_benchFiles(const char** fileNamesTable, int nbFiles, U32 specificTest)
 {
+    int result = 0;
     int fileIdx;
+
     for (fileIdx=0; fileIdx<nbFiles; fileIdx++) {
         const char* const inFileName = fileNamesTable[fileIdx];
         FILE* const inFile = fopen( inFileName, "rb" );
@@ -317,31 +334,37 @@ static int BMK_benchFiles(const char** fileNamesTable, int nbFiles)
         }   }
 
         /* bench */
-        BMK_benchMem(alignedBuffer, benchedSize);
+        result |= BMK_benchMem(alignedBuffer, benchedSize, specificTest);
 
         free(buffer);
     }
 
-    return 0;
+    return result;
 }
 
 
 
-static int BMK_benchInternal(void)
+static int BMK_benchInternal(size_t keySize, int specificTest)
 {
-    size_t const benchedSize = g_sampleSize;
-    void* const buffer = calloc(benchedSize+3, 1);
+    void* const buffer = calloc(keySize+3, 1);
     if(!buffer) {
         DISPLAY("\nError: not enough memory!\n");
         return 12;
     }
 
     /* bench */
-    DISPLAY("Sample of %u KB...        \n", (U32)(benchedSize >> 10));
-    BMK_benchMem(buffer, benchedSize);
+    DISPLAY("Sample of ");
+    if (keySize > 10 KB) {
+        DISPLAY("%u KB", (U32)(keySize >> 10));
+    } else {
+        DISPLAY("%u bytes", (U32)keySize);
+    }
+    DISPLAY("...        \n");
 
-    free(buffer);
-    return 0;
+    {   int const result = BMK_benchMem(buffer, keySize, specificTest);
+        free(buffer);
+        return result;
+    }
 }
 
 
@@ -1133,6 +1156,26 @@ static int badusage(const char* exename)
     return 1;
 }
 
+/*! readU32FromChar() :
+   @return : unsigned integer value read from input in `char` format,
+             0 is no figure at *stringPtr position.
+    Interprets K, KB, KiB, M, MB and MiB suffix.
+    Modifies `*stringPtr`, advancing it to position where reading stopped.
+    Note : function result can overflow if digit string > MAX_UINT */
+static unsigned readU32FromChar(const char** stringPtr)
+{
+    unsigned result = 0;
+    while ((**stringPtr >='0') && (**stringPtr <='9'))
+        result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
+    if ((**stringPtr=='K') || (**stringPtr=='M')) {
+        result <<= 10;
+        if (**stringPtr=='M') result <<= 10;
+        (*stringPtr)++ ;
+        if (**stringPtr=='i') (*stringPtr)++;
+        if (**stringPtr=='B') (*stringPtr)++;
+    }
+    return result;
+}
 
 int main(int argc, const char** argv)
 {
@@ -1144,6 +1187,8 @@ int main(int argc, const char** argv)
     U32 statusOnly    = 0;
     U32 warn          = 0;
     U32 quiet         = 0;
+    U32 specificTest  = 0;
+    size_t keySize    = XXH_DEFAULT_SAMPLE_SIZE;
     algoType algo = g_defaultAlgo;
     endianess displayEndianess = big_endian;
 
@@ -1204,21 +1249,20 @@ int main(int argc, const char** argv)
             /* Trigger benchmark mode */
             case 'b':
                 argument++;
-                benchmarkMode=1;
+                benchmarkMode = 1;
+                specificTest = readU32FromChar(&argument);   /* can select one specific benchmark test (hidden option) */
                 break;
 
             /* Modify Nb Iterations (benchmark only) */
             case 'i':
-                g_nbIterations = argument[1] - '0';
-                argument+=2;
+                argument++;
+                g_nbIterations = readU32FromChar(&argument);
                 break;
 
             /* Modify Block size (benchmark only) */
             case 'B':
                 argument++;
-                g_sampleSize = 0;
-                while (argument[0]>='0' && argument[0]<='9')
-                    g_sampleSize *= 10, g_sampleSize += argument[0]-'0', argument++;
+                keySize = readU32FromChar(&argument);
                 break;
 
             /* Modify verbosity of benchmark output (hidden option) */
@@ -1237,8 +1281,8 @@ int main(int argc, const char** argv)
     if (benchmarkMode) {
         DISPLAYLEVEL(2, WELCOME_MESSAGE(exename) );
         BMK_sanityCheck();
-        if (filenamesStart==0) return BMK_benchInternal();
-        return BMK_benchFiles(argv+filenamesStart, argc-filenamesStart);
+        if (filenamesStart==0) return BMK_benchInternal(keySize, specificTest);
+        return BMK_benchFiles(argv+filenamesStart, argc-filenamesStart, specificTest);
     }
 
     /* Check if input is defined as console; trigger an error in this case */

From 51edcaba406ad2f430ab6ccab19bb81e96528f5a Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sun, 18 Feb 2018 17:54:58 -0800
Subject: [PATCH 09/50] added special mode 0 (-qq) for benchmark

only output the result in it/s with a comma
for later retrieval as .csv file
---
 xxhsum.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/xxhsum.c b/xxhsum.c
index a94f9209..8de3a3d4 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -250,9 +250,11 @@ static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer,
         assert(fastestH > 2./1000000000);  /* avoid U32 overflow */
         nbh_perIteration = (U32)(1 / fastestH) + 1;  /* adjust nbh_perIteration to last roughtly one second */
     }
-    DISPLAY("%-19.19s : %10u -> %8.0f it/s (%7.1f MB/s) \n", hName, (U32)bufferSize,
+    DISPLAYLEVEL(1, "%-19.19s : %10u -> %8.0f it/s (%7.1f MB/s) \n", hName, (U32)bufferSize,
         (double)1 / fastestH,
         ((double)bufferSize / (1<<20)) / fastestH);
+    if (g_displayLevel<1)
+        DISPLAYLEVEL(0, "%u, ", (U32)((double)1 / fastestH));
 }
 
 
@@ -324,7 +326,7 @@ static int BMK_benchFiles(const char** fileNamesTable, int nbFiles, U32 specific
         }
 
         /* Fill input buffer */
-        DISPLAY("\rLoading %s...        \n", inFileName);
+        DISPLAYLEVEL(1, "\rLoading %s...        \n", inFileName);
         {   size_t const readSize = fread(alignedBuffer, 1, benchedSize, inFile);
             fclose(inFile);
             if(readSize != benchedSize) {
@@ -353,13 +355,13 @@ static int BMK_benchInternal(size_t keySize, int specificTest)
     }
 
     /* bench */
-    DISPLAY("Sample of ");
+    DISPLAYLEVEL(1, "Sample of ");
     if (keySize > 10 KB) {
-        DISPLAY("%u KB", (U32)(keySize >> 10));
+        DISPLAYLEVEL(1, "%u KB", (U32)(keySize >> 10));
     } else {
-        DISPLAY("%u bytes", (U32)keySize);
+        DISPLAYLEVEL(1, "%u bytes", (U32)keySize);
     }
-    DISPLAY("...        \n");
+    DISPLAYLEVEL(1, "...        \n");
 
     {   int const result = BMK_benchMem(buffer, keySize, specificTest);
         free(buffer);

From b1175fec7695aea92dab7244c09bc88a030590c8 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Mon, 19 Feb 2018 01:06:07 -0800
Subject: [PATCH 10/50] added make list target

---
 Makefile | 4 ++++
 xxhsum.c | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 841f6f48..92220010 100644
--- a/Makefile
+++ b/Makefile
@@ -224,6 +224,10 @@ clean: clean-xxhsum-c
 #-----------------------------------------------------------------------------
 ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU OpenBSD FreeBSD NetBSD DragonFly SunOS))
 
+.PHONY: list
+list:
+	@$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs
+
 DESTDIR     ?=
 # directory variables : GNU conventions prefer lowercase
 # see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
diff --git a/xxhsum.c b/xxhsum.c
index 8de3a3d4..22040112 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -239,7 +239,7 @@ static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer,
             for (i=0; i<nbh_perIteration; i++)
                 r += h(buffer, bufferSize, i);
         }
-        if (r==0) DISPLAYLEVEL(3,".\r");  /* need to do something with r to avoid compiler "optimizing" away hash function */
+        if (r==0) DISPLAYLEVEL(3,".\r");  /* do something with r to avoid compiler "optimizing" away hash function */
         {   double const timeS = ((double)BMK_clockSpan(cStart) / CLOCKS_PER_SEC) / nbh_perIteration;
             if (timeS < fastestH) fastestH = timeS;
             DISPLAYLEVEL(2, "%1i-%-17.17s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
@@ -1252,7 +1252,7 @@ int main(int argc, const char** argv)
             case 'b':
                 argument++;
                 benchmarkMode = 1;
-                specificTest = readU32FromChar(&argument);   /* can select one specific benchmark test (hidden option) */
+                specificTest = readU32FromChar(&argument);   /* select one specific test (hidden option) */
                 break;
 
             /* Modify Nb Iterations (benchmark only) */

From 81118ba2f446c8f2a7ef4ee28637527be918dbc9 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Mon, 19 Feb 2018 01:36:09 -0800
Subject: [PATCH 11/50] fixed assert() ensuring one-hash time evaluation >
 1/2Billions

so that 1/time doesn't overflow U32
---
 xxhsum.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xxhsum.c b/xxhsum.c
index 22040112..25f7bb85 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -247,7 +247,7 @@ static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer,
                     (double)1 / fastestH,
                     ((double)bufferSize / (1<<20)) / fastestH );
         }
-        assert(fastestH > 2./1000000000);  /* avoid U32 overflow */
+        assert(fastestH > 1./2000000000);  /* avoid U32 overflow */
         nbh_perIteration = (U32)(1 / fastestH) + 1;  /* adjust nbh_perIteration to last roughtly one second */
     }
     DISPLAYLEVEL(1, "%-19.19s : %10u -> %8.0f it/s (%7.1f MB/s) \n", hName, (U32)bufferSize,

From 3bb5bb64baff1035d562b601eec3e50f8313e1e1 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Mon, 19 Feb 2018 20:02:50 -0800
Subject: [PATCH 12/50] changed XXH32 finalization to use switch()/case
 strategy

proposed by Jens Bauer

Improves performance for many small keys
(especially when not power of 2).
Small advantage (+5-10%) with small keys of unpredictable length.
---
 xxhash.c | 145 ++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 95 insertions(+), 50 deletions(-)

diff --git a/xxhash.c b/xxhash.c
index 51f01a49..a8c24922 100644
--- a/xxhash.c
+++ b/xxhash.c
@@ -111,6 +111,8 @@ static void  XXH_free  (void* p)  { free(p); }
 #include <string.h>
 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
 
+#include <assert.h>   /* assert */
+
 #define XXH_STATIC_LINKING_ONLY
 #include "xxhash.h"
 
@@ -215,8 +217,12 @@ typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
 
 /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
 #ifndef XXH_CPU_LITTLE_ENDIAN
-    static const int g_one = 1;
-#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&g_one))
+static int XXH_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
+    return one.c[0];
+}
+#   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
 #endif
 
 
@@ -268,12 +274,87 @@ static U32 XXH32_round(U32 seed, U32 input)
     return seed;
 }
 
-FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
+/* mix all bits */
+static U32 XXH32_avalanche(U32 h32)
+{
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+    return(h32);
+}
+
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+static U32
+XXH32_finalize(U32 h32, const void* ptr, size_t len,
+                XXH_endianess endian, XXH_alignment align)
+
+{
+    const BYTE* p = (const BYTE*)ptr;
+#define PROCESS1             \
+    h32 += (*p) * PRIME32_5; \
+    p++;                     \
+    h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+
+#define PROCESS4                         \
+    h32 += XXH_get32bits(p) * PRIME32_3; \
+    p+=4;                                \
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
+
+    switch(len&15)  /* or switch(bEnd - p) */
+    {
+      case 12:      PROCESS4;
+                    /* fallthrough */
+      case 8:       PROCESS4;
+                    /* fallthrough */
+      case 4:       PROCESS4;
+                    return XXH32_avalanche(h32);
+
+      case 13:      PROCESS4;
+                    /* fallthrough */
+      case 9:       PROCESS4;
+                    /* fallthrough */
+      case 5:       PROCESS4;
+                    PROCESS1;
+                    return XXH32_avalanche(h32);
+
+      case 14:      PROCESS4;
+                    /* fallthrough */
+      case 10:      PROCESS4;
+                    /* fallthrough */
+      case 6:       PROCESS4;
+                    PROCESS1;
+                    PROCESS1;
+                    return XXH32_avalanche(h32);
+
+      case 15:      PROCESS4;
+                    /* fallthrough */
+      case 11:      PROCESS4;
+                    /* fallthrough */
+      case 7:       PROCESS4;
+                    /* fallthrough */
+      case 3:       PROCESS1;
+                    /* fallthrough */
+      case 2:       PROCESS1;
+                    /* fallthrough */
+      case 1:       PROCESS1;
+                    /* fallthrough */
+      case 0:       return XXH32_avalanche(h32);
+    }
+    assert(0);
+    return h32;   /* reaching this point is deemed impossible */
+}
+
+
+FORCE_INLINE U32
+XXH32_endian_align(const void* input, size_t len, U32 seed,
+                    XXH_endianess endian, XXH_alignment align)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* bEnd = p + len;
     U32 h32;
-#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
 
 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
     if (p==NULL) {
@@ -283,7 +364,7 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH
 #endif
 
     if (len>=16) {
-        const BYTE* const limit = bEnd - 16;
+        const BYTE* const limit = bEnd - 15;
         U32 v1 = seed + PRIME32_1 + PRIME32_2;
         U32 v2 = seed + PRIME32_2;
         U32 v3 = seed + 0;
@@ -294,34 +375,17 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH
             v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
             v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
             v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
-        } while (p<=limit);
+        } while (p < limit);
 
-        h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+        h32 = XXH_rotl32(v1, 1)  + XXH_rotl32(v2, 7)
+            + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
     } else {
         h32  = seed + PRIME32_5;
     }
 
-    h32 += (U32) len;
-
-    while (p+4<=bEnd) {
-        h32 += XXH_get32bits(p) * PRIME32_3;
-        h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
-        p+=4;
-    }
-
-    while (p<bEnd) {
-        h32 += (*p) * PRIME32_5;
-        h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
-        p++;
-    }
-
-    h32 ^= h32 >> 15;
-    h32 *= PRIME32_2;
-    h32 ^= h32 >> 13;
-    h32 *= PRIME32_3;
-    h32 ^= h32 >> 16;
+    h32 += (U32)len;
 
-    return h32;
+    return XXH32_finalize(h32, p, len&15, endian, align);
 }
 
 
@@ -446,6 +510,7 @@ XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size
     return XXH_OK;
 }
 
+
 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
@@ -457,11 +522,9 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void*
 }
 
 
-
-FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+FORCE_INLINE U32
+XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
 {
-    const BYTE * p = (const BYTE*)state->mem32;
-    const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
     U32 h32;
 
     if (state->large_len) {
@@ -475,25 +538,7 @@ FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess
 
     h32 += state->total_len_32;
 
-    while (p+4<=bEnd) {
-        h32 += XXH_readLE32(p, endian) * PRIME32_3;
-        h32  = XXH_rotl32(h32, 17) * PRIME32_4;
-        p+=4;
-    }
-
-    while (p<bEnd) {
-        h32 += (*p) * PRIME32_5;
-        h32  = XXH_rotl32(h32, 11) * PRIME32_1;
-        p++;
-    }
-
-    h32 ^= h32 >> 15;
-    h32 *= PRIME32_2;
-    h32 ^= h32 >> 13;
-    h32 *= PRIME32_3;
-    h32 ^= h32 >> 16;
-
-    return h32;
+    return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned);
 }
 
 

From 0e5dab97db33b84dc21bbede3b31fa3ad5954ad8 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Feb 2018 15:45:03 -0800
Subject: [PATCH 13/50] converted XXH64 finalization to switch()/case

based on previous XXH32 finalization suggested by Jens Bauer.
---
 xxhash.c | 161 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 104 insertions(+), 57 deletions(-)

diff --git a/xxhash.c b/xxhash.c
index 42d03b15..0841c8f1 100644
--- a/xxhash.c
+++ b/xxhash.c
@@ -684,12 +684,111 @@ static U64 XXH64_mergeRound(U64 acc, U64 val)
     return acc;
 }
 
-FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
+static U64 XXH64_avalanche(U64 h64)
+{
+    h64 ^= h64 >> 33;
+    h64 *= PRIME64_2;
+    h64 ^= h64 >> 29;
+    h64 *= PRIME64_3;
+    h64 ^= h64 >> 32;
+    return h64;
+}
+
+
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+static U64
+XXH64_finalize(U64 h64, const void* ptr, size_t len,
+               XXH_endianess endian, XXH_alignment align)
+{
+    const BYTE* p = (const BYTE*)ptr;
+
+#define PROCESS1_64          \
+    h64 ^= (*p) * PRIME64_5; \
+    p++;                     \
+    h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+
+#define PROCESS4_64          \
+    h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \
+    p+=4;                    \
+    h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+
+#define PROCESS8_64 {        \
+    U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \
+    p+=8;                    \
+    h64 ^= k1;               \
+    h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
+}
+
+    switch(len&31) {
+      case 24: PROCESS8_64;
+      case 16: PROCESS8_64;
+      case  8: PROCESS8_64;
+               return XXH64_avalanche(h64);
+
+      case 28: PROCESS8_64;
+      case 20: PROCESS8_64;
+      case 12: PROCESS8_64;
+      case  4: PROCESS4_64;
+               return XXH64_avalanche(h64);
+
+      case 25: PROCESS8_64;
+      case 17: PROCESS8_64;
+      case  9: PROCESS8_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 29: PROCESS8_64;
+      case 21: PROCESS8_64;
+      case 13: PROCESS8_64;
+      case  5: PROCESS4_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 26: PROCESS8_64;
+      case 18: PROCESS8_64;
+      case 10: PROCESS8_64;
+               PROCESS1_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 30: PROCESS8_64;
+      case 22: PROCESS8_64;
+      case 14: PROCESS8_64;
+      case  6: PROCESS4_64;
+               PROCESS1_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 27: PROCESS8_64;
+      case 19: PROCESS8_64;
+      case 11: PROCESS8_64;
+               PROCESS1_64;
+               PROCESS1_64;
+               PROCESS1_64;
+               return XXH64_avalanche(h64);
+
+      case 31: PROCESS8_64;
+      case 23: PROCESS8_64;
+      case 15: PROCESS8_64;
+      case  7: PROCESS4_64;
+      case  3: PROCESS1_64;
+      case  2: PROCESS1_64;
+      case  1: PROCESS1_64;
+      case  0: return XXH64_avalanche(h64);
+    }
+    /* impossible to reach */
+    assert(0);
+
+}
+
+FORCE_INLINE U64
+XXH64_endian_align(const void* input, size_t len, U64 seed,
+                XXH_endianess endian, XXH_alignment align)
 {
     const BYTE* p = (const BYTE*)input;
     const BYTE* bEnd = p + len;
     U64 h64;
-#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
 
 #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
     if (p==NULL) {
@@ -724,32 +823,7 @@ FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH
 
     h64 += (U64) len;
 
-    while (p+8<=bEnd) {
-        U64 const k1 = XXH64_round(0, XXH_get64bits(p));
-        h64 ^= k1;
-        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
-        p+=8;
-    }
-
-    if (p+4<=bEnd) {
-        h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
-        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
-        p+=4;
-    }
-
-    while (p<bEnd) {
-        h64 ^= (*p) * PRIME64_5;
-        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
-        p++;
-    }
-
-    h64 ^= h64 >> 33;
-    h64 *= PRIME64_2;
-    h64 ^= h64 >> 29;
-    h64 *= PRIME64_3;
-    h64 ^= h64 >> 32;
-
-    return h64;
+    return XXH64_finalize(h64, p, len, endian, align);
 }
 
 
@@ -880,8 +954,6 @@ XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void*
 
 FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
 {
-    const BYTE * p = (const BYTE*)state->mem64;
-    const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
     U64 h64;
 
     if (state->total_len >= 32) {
@@ -896,37 +968,12 @@ FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess
         h64 = XXH64_mergeRound(h64, v3);
         h64 = XXH64_mergeRound(h64, v4);
     } else {
-        h64  = state->v3 + PRIME64_5;
+        h64  = state->v3 /*seed*/ + PRIME64_5;
     }
 
     h64 += (U64) state->total_len;
 
-    while (p+8<=bEnd) {
-        U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
-        h64 ^= k1;
-        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
-        p+=8;
-    }
-
-    if (p+4<=bEnd) {
-        h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
-        h64  = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
-        p+=4;
-    }
-
-    while (p<bEnd) {
-        h64 ^= (*p) * PRIME64_5;
-        h64  = XXH_rotl64(h64, 11) * PRIME64_1;
-        p++;
-    }
-
-    h64 ^= h64 >> 33;
-    h64 *= PRIME64_2;
-    h64 ^= h64 >> 29;
-    h64 *= PRIME64_3;
-    h64 ^= h64 >> 32;
-
-    return h64;
+    return XXH64_finalize(h64, state->mem64, state->total_len, endian, XXH_aligned);
 }
 
 XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)

From 921182349a87760bfc016dcd54fa3136d4c8150d Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Feb 2018 16:00:47 -0800
Subject: [PATCH 14/50] fixed minor conversion warning

---
 xxhash.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xxhash.c b/xxhash.c
index 0841c8f1..a7ba9e83 100644
--- a/xxhash.c
+++ b/xxhash.c
@@ -973,7 +973,7 @@ FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess
 
     h64 += (U64) state->total_len;
 
-    return XXH64_finalize(h64, state->mem64, state->total_len, endian, XXH_aligned);
+    return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned);
 }
 
 XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)

From 3b589804fcd0379d652c405dabce5d049a10c918 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 13 Mar 2018 15:39:03 -0700
Subject: [PATCH 15/50] added fall-through statements for gcc-7

---
 xxhash.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/xxhash.c b/xxhash.c
index a7ba9e83..f14d4b36 100644
--- a/xxhash.c
+++ b/xxhash.c
@@ -722,46 +722,63 @@ XXH64_finalize(U64 h64, const void* ptr, size_t len,
 
     switch(len&31) {
       case 24: PROCESS8_64;
+                    /* fallthrough */
       case 16: PROCESS8_64;
+                    /* fallthrough */
       case  8: PROCESS8_64;
                return XXH64_avalanche(h64);
 
       case 28: PROCESS8_64;
+                    /* fallthrough */
       case 20: PROCESS8_64;
+                    /* fallthrough */
       case 12: PROCESS8_64;
+                    /* fallthrough */
       case  4: PROCESS4_64;
                return XXH64_avalanche(h64);
 
       case 25: PROCESS8_64;
+                    /* fallthrough */
       case 17: PROCESS8_64;
+                    /* fallthrough */
       case  9: PROCESS8_64;
                PROCESS1_64;
                return XXH64_avalanche(h64);
 
       case 29: PROCESS8_64;
+                    /* fallthrough */
       case 21: PROCESS8_64;
+                    /* fallthrough */
       case 13: PROCESS8_64;
+                    /* fallthrough */
       case  5: PROCESS4_64;
                PROCESS1_64;
                return XXH64_avalanche(h64);
 
       case 26: PROCESS8_64;
+                    /* fallthrough */
       case 18: PROCESS8_64;
+                    /* fallthrough */
       case 10: PROCESS8_64;
                PROCESS1_64;
                PROCESS1_64;
                return XXH64_avalanche(h64);
 
       case 30: PROCESS8_64;
+                    /* fallthrough */
       case 22: PROCESS8_64;
+                    /* fallthrough */
       case 14: PROCESS8_64;
+                    /* fallthrough */
       case  6: PROCESS4_64;
                PROCESS1_64;
                PROCESS1_64;
                return XXH64_avalanche(h64);
 
       case 27: PROCESS8_64;
+                    /* fallthrough */
       case 19: PROCESS8_64;
+                    /* fallthrough */
       case 11: PROCESS8_64;
                PROCESS1_64;
                PROCESS1_64;
@@ -769,12 +786,19 @@ XXH64_finalize(U64 h64, const void* ptr, size_t len,
                return XXH64_avalanche(h64);
 
       case 31: PROCESS8_64;
+                    /* fallthrough */
       case 23: PROCESS8_64;
+                    /* fallthrough */
       case 15: PROCESS8_64;
+                    /* fallthrough */
       case  7: PROCESS4_64;
+                    /* fallthrough */
       case  3: PROCESS1_64;
+                    /* fallthrough */
       case  2: PROCESS1_64;
+                    /* fallthrough */
       case  1: PROCESS1_64;
+                    /* fallthrough */
       case  0: return XXH64_avalanche(h64);
     }
     /* impossible to reach */

From 9231e6d8ed60e4818cd8a9e3d361d06a19dc45dc Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 13 Mar 2018 15:52:21 -0700
Subject: [PATCH 16/50] added build macro XXH_INLINE_ALL

same as XXH_PRIVATE_API
but insists on the performance improvement side.

also : bumped version number to v0.6.5
---
 README.md | 15 ++++++++++-----
 xxhash.h  | 13 +++++++------
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 743659eb..68b4a9a1 100644
--- a/README.md
+++ b/README.md
@@ -66,6 +66,13 @@ The utility `xxhsum` is GPL licensed.
 The following macros can be set at compilation time,
 they modify xxhash behavior. They are all disabled by default.
 
+- `XXH_INLINE_ALL` : Make all functions `inline`, with bodies directly included within `xxhash.h`.
+                     There is no need for an `xxhash.o` module in this case.
+                     Inlining functions is generally beneficial for speed on small keys.
+                     It's especially effective when key length is a compile time constant,
+                     with observed performance improvement in the +200% range .
+- `XXH_PRIVATE_API` : same as `XXH_INLINE_ALL`.
+                      name insists on the fact the symbols will not published on library public interface.
 - `XXH_ACCEPT_NULL_INPUT_POINTER` : if set to `1`, when input is a null-pointer,
                                     xxhash result is the same as a null-length key,
                                     instead of a dereference segfault.
@@ -79,14 +86,12 @@ they modify xxhash behavior. They are all disabled by default.
                               Breaks consistency with little-endian results.
 - `XXH_NAMESPACE` : prefix all symbols with the value of `XXH_NAMESPACE`.
                     Useful to evade symbol naming collisions,
-                    in case of multiple inclusions of xxHash library.
-                    Client programs can still use regular function name, symbols are automatically translated through `xxhash.h`.
+                    in case of multiple inclusions of xxHash source code.
+                    Client applications can still use regular function name, symbols are automatically translated through `xxhash.h`.
 - `XXH_STATIC_LINKING_ONLY` : gives access to state definition for static allocation.
                               Incompatible with dynamic linking, due to risks of ABI changes.
-- `XXH_PRIVATE_API` : Make all functions `static`, directly accessible through `#include xxhash.h`, for inlining.
-                      Do not compile `xxhash.c` as a separate module in this case.
 - `XXH_NO_LONG_LONG` : removes support for XXH64,
-                       useful for targets without 64-bit support.
+                       for targets without 64-bit support.
 
 
 ### Example
diff --git a/xxhash.h b/xxhash.h
index 4099da02..f96a871a 100644
--- a/xxhash.h
+++ b/xxhash.h
@@ -82,16 +82,17 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
 /* ****************************
 *  API modifier
 ******************************/
-/** XXH_PRIVATE_API
+/** XXH_INLINE_ALL (and XXH_PRIVATE_API)
 *   This is useful to include xxhash functions in `static` mode
 *   in order to inline them, and remove their symbol from the public list.
+*   Inlining can offer dramatic performance improvement on small keys.
 *   Methodology :
-*     #define XXH_PRIVATE_API
+*     #define XXH_INLINE_ALL
 *     #include "xxhash.h"
 *   `xxhash.c` is automatically included.
 *   It's not useful to compile and link it as a separate module.
 */
-#ifdef XXH_PRIVATE_API
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
 #  ifndef XXH_STATIC_LINKING_ONLY
 #    define XXH_STATIC_LINKING_ONLY
 #  endif
@@ -107,7 +108,7 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
 #  endif
 #else
 #  define XXH_PUBLIC_API   /* do nothing */
-#endif /* XXH_PRIVATE_API */
+#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
 
 /*!XXH_NAMESPACE, aka Namespace Emulation :
 
@@ -150,7 +151,7 @@ regular symbol name will be automatically translated by this header.
 ***************************************/
 #define XXH_VERSION_MAJOR    0
 #define XXH_VERSION_MINOR    6
-#define XXH_VERSION_RELEASE  4
+#define XXH_VERSION_RELEASE  5
 #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
 XXH_PUBLIC_API unsigned XXH_versionNumber (void);
 
@@ -280,7 +281,7 @@ struct XXH64_state_s {
 };   /* typedef'd to XXH64_state_t */
 #endif
 
-#ifdef XXH_PRIVATE_API
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
 #  include "xxhash.c"   /* include xxhash function bodies as `static`, for inlining */
 #endif
 

From 77db5fca2570fca6a4bc6f563da7ed68a9d99935 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 13 Mar 2018 17:35:52 -0700
Subject: [PATCH 17/50] added xxHash specification

using markdown format.

created directory /doc
---
 doc/xxhash_spec.md | 311 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 311 insertions(+)
 create mode 100644 doc/xxhash_spec.md

diff --git a/doc/xxhash_spec.md b/doc/xxhash_spec.md
new file mode 100644
index 00000000..e673334b
--- /dev/null
+++ b/doc/xxhash_spec.md
@@ -0,0 +1,311 @@
+xxHash fast digest algorithm
+======================
+
+### Notices
+
+Copyright (c) Yann Collet
+
+Permission is granted to copy and distribute this document
+for any purpose and without charge,
+including translations into other languages
+and incorporation into compilations,
+provided that the copyright notice and this notice are preserved,
+and that any substantive changes or deletions from the original
+are clearly marked.
+Distribution of this document is unlimited.
+
+### Version
+
+0.1.0 (15/01/18)
+
+
+Table of Contents
+---------------------
+- [Introduction](#introduction)
+- [XXH32 algorithm description](#xxh32-algorithm-description)
+- [XXH64 algorithm description](#xxh64-algorithm-description)
+- [Performance considerations](#performance-considerations)
+- [Reference Implementation](#reference-implementation)
+
+
+Introduction
+----------------
+
+This document describes the xxHash digest algorithm, for both 32 and 64 variants, named `XXH32` and `XXH64`. The algorithm takes as input a message of arbitrary length and an optional seed value, it then produces an output of 32 or 64-bit as "fingerprint" or "digest".
+
+xxHash is primarily designed for speed. It is labelled non-cryptographic, and is not meant to avoid intentional collisions (same digest for 2 different messages), or to prevent producing a message with predefined digest.
+
+XXH32 is designed to be fast on 32-bits machines.
+XXH64 is designed to be fast on 64-bits machines.
+Both variants produce different output.
+However, a given variant shall produce exactly the same output, irrespective of the cpu / os used. In particular, the result remains identical whatever the endianness and width of the cpu.
+
+### Operation notations
+
+All operations are performed modulo {32,64} bits. Arithmetic overflows are expected.
+`XXH32` uses 32-bit modular operations. `XXH64` uses 64-bit modular operations.
+
+- `+` : denote modular addition
+- `*` : denote modular multiplication
+- `X <<< s` : denote the value obtained by circularly shifting (rotating) `X` left by `s` bit positions.  
+- `X >> s` : denote the value obtained by shifting `X` right by s bit positions. Upper `s` bits become `0`.  
+- `X xor Y` : denote the bit-wise XOR of `X` and `Y` (same width).
+
+
+XXH32 Algorithm Description
+-------------------------------------
+
+### Overview
+
+We begin by supposing that we have a message of any length `L` as input, and that we wish to find its digest. Here `L` is an arbitrary nonnegative integer; `L` may be zero. The following steps are performed to compute the digest of the message.
+
+The algorithm collect and transform input in _stripes_ of 16 bytes. The transforms are stored inside 4 "accumulators", each one storing an unsigned 32-bit value. Each accumulator can be processed independently in parallel, speeding up processing for cpu with multiple execution units.
+
+The algorithm uses 32-bits addition, multiplication, rotate, shift and xor operations. Many operations require some 32-bits prime number constants, all defined below :
+
+    static const u32 PRIME32_1 = 2654435761U;
+    static const u32 PRIME32_2 = 2246822519U;
+    static const u32 PRIME32_3 = 3266489917U;
+    static const u32 PRIME32_4 =  668265263U;
+    static const u32 PRIME32_5 =  374761393U;
+
+### Step 1. Initialise internal accumulators
+
+Each accumulator gets an initial value based on optional `seed` input. Since the `seed` is optional, it can be `0`.
+
+        u32 acc1 = seed + PRIME32_1 + PRIME32_2;
+        u32 acc2 = seed + PRIME32_2;
+        u32 acc3 = seed + 0;
+        u32 acc4 = seed - PRIME32_1;
+
+#### Special case : input is less than 16 bytes
+
+When input is too small (< 16 bytes), the algorithm will not process any stripe. Consequently, it will not make use of parallel accumulators.
+
+In which case, a simplified initialization is performed, using a single accumulator :
+
+      u32 acc  = seed + PRIME32_5;
+
+The algorithm then proceeds directly to step 4.
+
+### Step 2. Process stripes
+
+A stripe is a contiguous segment of 16 bytes.
+It is evenly divided into 4 _lanes_, of 4 bytes each.
+The first lane is used to update accumulator 1, the second lane is used to update accumulator 2, and so on.
+
+Each lane read its associated 32-bit value using __little-endian__ convention.
+
+For each {lane, accumulator}, the update process is called a _round_, and applies the following formula :
+
+    accN = accN + (laneN * PRIME32_2);
+    accN = accN <<< 13;
+    accN = accN * PRIME32_1;
+
+This shuffles the bits so that any bit from input _lane_ impacts several bits in output _accumulator_. All operations are performed modulo 2^32.
+
+Input is consumed one full stripe at a time. Step 2 is looped as many times as necessary to consume the whole input, except the last remaining bytes which cannot form a stripe (< 16 bytes).
+When that happens, move to step 3.
+
+### Step 3. Accumulator convergence
+
+All 4 lane accumulators from previous steps are merged to produce a single remaining accumulator of same width (32-bit). The associated formula is as follows :
+
+    acc = (acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18);
+
+### Step 4. Add input length
+
+The input total length is presumed known at this stage. This step is just about adding the length to accumulator, so that it participates to final mixing.
+
+    acc = acc + (u32)inputLength;
+
+Note that, if input length is so large that it requires more than 32-bits, only the lower 32-bits are added to the accumulator.
+
+### Step 5. Consume remaining input
+
+There may be up to 15 bytes remaining to consume from the input.
+The final stage will digest them according to following pseudo-code :
+
+    while (remainingLength >= 4) {
+        lane = read_32bit_little_endian(input_ptr);
+        acc = acc + lane * PRIME32_3;
+        acc = (acc <<< 17) * PRIME32_4;
+        input_ptr += 4; remainingLength -= 4;
+    }
+
+    while (remainingLength >= 1) {
+        lane = read_byte(input_ptr);
+        acc = acc + lane * PRIME32_5;
+        acc = (acc <<< 11) * PRIME32_1;
+        input_ptr += 1; remainingLength -= 1;
+    }
+
+This process ensures that all input bytes are present in the final mix.
+
+### Step 6. Final mix (avalanche)
+
+The final mix ensures that all input bits have a chance to impact any bit in the output digest, resulting in an unbiased distribution. This is also called avalanche effect.
+
+    acc = acc xor (acc >> 15);
+    acc = acc * PRIME32_2;
+    acc = acc xor (acc >> 13);
+    acc = acc * PRIME32_3;
+    acc = acc xor (acc >> 16);
+
+### Step 7. Output
+
+The `XXH32()` function produces an unsigned 32-bit value as output.
+
+For systems which require to store and/or display the result in binary or hexadecimal format, the canonical format is defined to reproduce the same value as the natural decimal format, hence follows __big-endian__ convention (most significant byte first).
+
+
+XXH64 Algorithm Description
+-------------------------------------
+
+### Overview
+
+`XXH64` algorithm structure is very similar to `XXH32` one. The major difference is that `XXH64` uses 64-bit arithmetic, speeding up memory transfer for 64-bit compliant systems, but also relying on cpu capability to efficiently perform 64-bit operations.
+
+The algorithm collects and transforms input in _stripes_ of 32 bytes. The transforms are stored inside 4 "accumulators", each one storing an unsigned 64-bit value. Each accumulator can be processed independently in parallel, speeding up processing for cpu with multiple execution units.
+
+The algorithm uses 64-bit addition, multiplication, rotate, shift and xor operations. Many operations require some 64-bit prime number constants, all defined below :
+
+    static const u64 PRIME64_1 = 11400714785074694791ULL;
+    static const u64 PRIME64_2 = 14029467366897019727ULL;
+    static const u64 PRIME64_3 =  1609587929392839161ULL;
+    static const u64 PRIME64_4 =  9650029242287828579ULL;
+    static const u64 PRIME64_5 =  2870177450012600261ULL;
+
+### Step 1. Initialise internal accumulators
+
+Each accumulator gets an initial value based on optional `seed` input. Since the `seed` is optional, it can be `0`.
+
+        u64 acc1 = seed + PRIME64_1 + PRIME64_2;
+        u64 acc2 = seed + PRIME64_2;
+        u64 acc3 = seed + 0;
+        u64 acc4 = seed - PRIME64_1;
+
+#### Special case : input is less than 32 bytes
+
+When input is too small (< 32 bytes), the algorithm will not process any stripe. Consequently, it will not make use of parallel accumulators.
+
+In which case, a simplified initialization is performed, using a single accumulator :
+
+      u64 acc  = seed + PRIME64_5;
+
+The algorithm then proceeds directly to step 4.
+
+### Step 2. Process stripes
+
+A stripe is a contiguous segment of 32 bytes.
+It is evenly divided into 4 _lanes_, of 8 bytes each.
+The first lane is used to update accumulator 1, the second lane is used to update accumulator 2, and so on.
+
+Each lane read its associated 64-bit value using __little-endian__ convention.
+
+For each {lane, accumulator}, the update process is called a _round_, and applies the following formula :
+
+    round(accN,laneN):
+    accN = accN + (laneN * PRIME64_2);
+    accN = accN <<< 31;
+    return accN * PRIME64_1;
+
+This shuffles the bits so that any bit from input _lane_ impacts several bits in output _accumulator_. All operations are performed modulo 2^64.
+
+Input is consumed one full stripe at a time. Step 2 is looped as many times as necessary to consume the whole input, except the last remaining bytes which cannot form a stripe (< 32 bytes).
+When that happens, move to step 3.
+
+### Step 3. Accumulator convergence
+
+All 4 lane accumulators from previous steps are merged to produce a single remaining accumulator of same width (64-bit). The associated formula is as follows.
+
+Note that accumulator convergence is more complex than 32-bit variant, and requires to define another function called _mergeAccumulator()_ :
+
+    mergeAccumulator(acc,accN):
+    acc  = acc xor round(0, accN);
+    acc  = acc * PRIME64_1
+    return acc + PRIME64_4;
+
+which is then used in the convergence formula :
+
+    acc = (acc1 <<< 1) + (acc2 <<< 7) + (acc3 <<< 12) + (acc4 <<< 18);
+    acc = mergeAccumulator(acc, acc1);
+    acc = mergeAccumulator(acc, acc2);
+    acc = mergeAccumulator(acc, acc3);
+    acc = mergeAccumulator(acc, acc4);
+
+### Step 4. Add input length
+
+The input total length is presumed known at this stage. This step is just about adding the length to accumulator, so that it participates to final mixing.
+
+    acc = acc + inputLength;
+
+### Step 5. Consume remaining input
+
+There may be up to 31 bytes remaining to consume from the input.
+The final stage will digest them according to following pseudo-code :
+
+    while (remainingLength >= 8) {
+        lane = read_64bit_little_endian(input_ptr);
+        acc = acc xor round(0, lane);
+        acc = (acc <<< 27) * PRIME64_1;
+        acc = acc + PRIME64_4;
+        input_ptr += 8; remainingLength -= 8;
+    }
+
+    if (remainingLength >= 4) {
+        lane = read_32bit_little_endian(input_ptr);
+        acc = acc xor (lane * PRIME64_1);
+        acc = (acc <<< 23) * PRIME64_2;
+        acc = acc + PRIME64_3;
+        input_ptr += 4; remainingLength -= 4;
+    }
+
+    while (remainingLength >= 1) {
+        lane = read_byte(input_ptr);
+        acc = acc xor (lane * PRIME64_5);
+        acc = (acc <<< 11) * PRIME64_1;
+        input_ptr += 1; remainingLength -= 1;
+    }
+
+This process ensures that all input bytes are present in the final mix.
+
+### Step 6. Final mix (avalanche)
+
+The final mix ensures that all input bits have a chance to impact any bit in the output digest, resulting in an unbiased distribution. This is also called avalanche effect.
+
+    acc = acc xor (acc >> 33);
+    acc = acc * PRIME64_2;
+    acc = acc xor (acc >> 29);
+    acc = acc * PRIME64_3;
+    acc = acc xor (acc >> 32);
+
+### Step 7. Output
+
+The `XXH64()` function produces an unsigned 64-bit value as output.
+
+For systems which require to store and/or display the result in binary or hexadecimal format, the canonical format is defined to reproduce the same value as the natural decimal format, hence follows __big-endian__ convention (most significant byte first).
+
+Performance considerations
+----------------------------------
+
+The xxHash algorithms are simple and compact to implement. They provide a system independent "fingerprint" or digest of a message of arbitrary length.
+
+The algorithm allows input to be streamed and processed in multiple steps. In such case, an internal buffer is needed to ensure data is presented to the algorithm in full stripes.
+
+On 64-bit systems, the 64-bit variant `XXH64` is generally faster to compute, so it is a recommended variant, even when only 32-bit are needed.
+
+On 32-bit systems though, positions are reversed : `XXH64` performance is reduced, due to its usage of 64-bit arithmetic. `XXH32` becomes a faster variant.
+
+
+Reference Implementation
+----------------------------------------
+
+A reference library written in C is available at http://www.xxhash.com .
+The web page also links to multiple other implementations written in many different languages.
+It links to the [github project page](https://github.com/Cyan4973/xxHash) where an [issue board](https://github.com/Cyan4973/xxHash/issues) can be used for further public discussions on the topic.
+
+
+Version changes
+--------------------
+v0.1.0 : initial release

From 1c8812006860ab985a6a13eece1f2b07295a16f5 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 13 Mar 2018 17:38:25 -0700
Subject: [PATCH 18/50] minor readme correction

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 68b4a9a1..4999b66d 100644
--- a/README.md
+++ b/README.md
@@ -71,8 +71,6 @@ they modify xxhash behavior. They are all disabled by default.
                      Inlining functions is generally beneficial for speed on small keys.
                      It's especially effective when key length is a compile time constant,
                      with observed performance improvement in the +200% range .
-- `XXH_PRIVATE_API` : same as `XXH_INLINE_ALL`.
-                      name insists on the fact the symbols will not published on library public interface.
 - `XXH_ACCEPT_NULL_INPUT_POINTER` : if set to `1`, when input is a null-pointer,
                                     xxhash result is the same as a null-length key,
                                     instead of a dereference segfault.
@@ -84,6 +82,8 @@ they modify xxhash behavior. They are all disabled by default.
                             Setting it to 0 forces big-endian.
 - `XXH_FORCE_NATIVE_FORMAT` : on big-endian systems : use native number representation.
                               Breaks consistency with little-endian results.
+- `XXH_PRIVATE_API` : same impact as `XXH_INLINE_ALL`.
+                      Name underlines that symbols will not be published on library public interface.
 - `XXH_NAMESPACE` : prefix all symbols with the value of `XXH_NAMESPACE`.
                     Useful to evade symbol naming collisions,
                     in case of multiple inclusions of xxHash source code.

From d88fe9010d0930271d21c02eccf22c416e62ed71 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Wed, 14 Mar 2018 23:52:37 -0700
Subject: [PATCH 19/50] README: minor update

link to article
---
 README.md | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 4999b66d..30318a9f 100644
--- a/README.md
+++ b/README.md
@@ -71,9 +71,10 @@ they modify xxhash behavior. They are all disabled by default.
                      Inlining functions is generally beneficial for speed on small keys.
                      It's especially effective when key length is a compile time constant,
                      with observed performance improvement in the +200% range .
+                     See [this article](https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html) for details.
 - `XXH_ACCEPT_NULL_INPUT_POINTER` : if set to `1`, when input is a null-pointer,
-                                    xxhash result is the same as a null-length key,
-                                    instead of a dereference segfault.
+                                    xxhash result is the same as a zero-length key
+                                    (instead of a dereference segfault).
 - `XXH_FORCE_MEMORY_ACCESS` : default method `0` uses a portable `memcpy()` notation.
                               Method `1` uses a gcc-specific `packed` attribute, which can provide better performance for some targets.
                               Method `2` forces unaligned reads, which is not standard compliant, but might sometimes be the only way to extract better performance.
@@ -87,8 +88,9 @@ they modify xxhash behavior. They are all disabled by default.
 - `XXH_NAMESPACE` : prefix all symbols with the value of `XXH_NAMESPACE`.
                     Useful to evade symbol naming collisions,
                     in case of multiple inclusions of xxHash source code.
-                    Client applications can still use regular function name, symbols are automatically translated through `xxhash.h`.
-- `XXH_STATIC_LINKING_ONLY` : gives access to state definition for static allocation.
+                    Client applications can still use regular function name,
+                    symbols are automatically translated through `xxhash.h`.
+- `XXH_STATIC_LINKING_ONLY` : gives access to state declaration for static allocation.
                               Incompatible with dynamic linking, due to risks of ABI changes.
 - `XXH_NO_LONG_LONG` : removes support for XXH64,
                        for targets without 64-bit support.

From b2fad1787dae389bddbbc577f3fbe245fbdf262b Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sun, 18 Mar 2018 17:17:02 -0700
Subject: [PATCH 20/50] added trailingWhitespace test as make target

---
 Makefile                        | 4 ++++
 cmake_unofficial/CMakeLists.txt | 2 +-
 cmake_unofficial/README.md      | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 92220010..d6fdc610 100644
--- a/Makefile
+++ b/Makefile
@@ -212,6 +212,10 @@ test-all: clean all namespaceTest test test32 test-xxhsum-c clean-xxhsum-c \
 listL120:  # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility)
 	find . -type f -name '*.c' -o -name '*.h' | while read -r filename; do awk 'length > 120 {print FILENAME "(" FNR "): " $$0}' $$filename; done
 
+.PHONY: trailingWhitespace
+trailingWhitespace:
+	! grep -E "`printf '[ \\t]$$'`" *.1 *.c *.h LICENSE Makefile cmake_unofficial/CMakeLists.txt
+
 .PHONY: clean
 clean: clean-xxhsum-c
 	@$(RM) core *.o libxxhash.*
diff --git a/cmake_unofficial/CMakeLists.txt b/cmake_unofficial/CMakeLists.txt
index 82b32ff4..4038a36b 100644
--- a/cmake_unofficial/CMakeLists.txt
+++ b/cmake_unofficial/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_policy(VERSION 2.6)
 
 project(xxhash)
 
-set(XXHASH_LIB_VERSION "0.6.3")
+set(XXHASH_LIB_VERSION "0.6.5")
 set(XXHASH_LIB_SOVERSION "0")
 
 option(BUILD_XXHSUM "Build the xxhsum binary" ON)
diff --git a/cmake_unofficial/README.md b/cmake_unofficial/README.md
index fb93042d..4fca58dd 100644
--- a/cmake_unofficial/README.md
+++ b/cmake_unofficial/README.md
@@ -3,4 +3,4 @@
 The `cmake` script present in this directory offers the following options :
 
 - `BUILD_XXHSUM` : build the command line binary. ON by default
-- `BUILD_SHARED_LIBS` : build a dynamic library. OFF by default, builds static library instead.
+- `BUILD_SHARED_LIBS` : build dynamic library. ON by default.

From 14705e3c61ad2c0ef5add8987bdcdcbfa7b35d78 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sun, 18 Mar 2018 17:37:16 -0700
Subject: [PATCH 21/50] cmake: dynamic determination of library version

---
 cmake_unofficial/CMakeLists.txt | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/cmake_unofficial/CMakeLists.txt b/cmake_unofficial/CMakeLists.txt
index 4038a36b..40139da8 100644
--- a/cmake_unofficial/CMakeLists.txt
+++ b/cmake_unofficial/CMakeLists.txt
@@ -3,8 +3,18 @@ cmake_policy(VERSION 2.6)
 
 project(xxhash)
 
-set(XXHASH_LIB_VERSION "0.6.5")
-set(XXHASH_LIB_SOVERSION "0")
+set(XXHASH_DIR "${CMAKE_CURRENT_SOURCE_DIR}/..")
+
+file(STRINGS "${XXHASH_DIR}/xxhash.h" XXHASH_VERSION_MAJOR REGEX "^#define XXH_VERSION_MAJOR +([0-9]+) *$")
+string(REGEX REPLACE "^#define XXH_VERSION_MAJOR +([0-9]+) *$" "\\1" XXHASH_VERSION_MAJOR "${XXHASH_VERSION_MAJOR}")
+file(STRINGS "${XXHASH_DIR}/xxhash.h" XXHASH_VERSION_MINOR REGEX "^#define XXH_VERSION_MINOR +([0-9]+) *$")
+string(REGEX REPLACE "^#define XXH_VERSION_MINOR +([0-9]+) *$" "\\1" XXHASH_VERSION_MINOR "${XXHASH_VERSION_MINOR}")
+file(STRINGS "${XXHASH_DIR}/xxhash.h" XXHASH_VERSION_RELEASE REGEX "^#define XXH_VERSION_RELEASE +([0-9]+) *$")
+string(REGEX REPLACE "^#define XXH_VERSION_RELEASE +([0-9]+) *$" "\\1" XXHASH_VERSION_RELEASE "${XXHASH_VERSION_RELEASE}")
+set(XXHASH_VERSION_STRING "${XXHASH_VERSION_MAJOR}.${XXHASH_VERSION_MINOR}.${XXHASH_VERSION_RELEASE}")
+set(XXHASH_LIB_VERSION ${XXHASH_VERSION_STRING})
+set(XXHASH_LIB_SOVERSION "${XXHASH_VERSION_MAJOR}")
+mark_as_advanced(XXHASH_VERSION_MAJOR XXHASH_VERSION_MINOR XXHASH_VERSION_RELEASE XXHASH_VERSION_STRING XXHASH_LIB_VERSION XXHASH_LIB_SOVERSION)
 
 option(BUILD_XXHSUM "Build the xxhsum binary" ON)
 option(BUILD_SHARED_LIBS "Build shared library" ON)

From 530ade1f59084ecbbfa4b7731e0a683450a3c51e Mon Sep 17 00:00:00 2001
From: Evan Nemerson <evan@nemerson.com>
Date: Thu, 17 Nov 2016 15:10:39 -0800
Subject: [PATCH 22/50] cmake: rewrite based on LZ4's CMake support

---
 cmake_unofficial/CMakeLists.txt | 107 +++++++++++++++++++++++---------
 1 file changed, 78 insertions(+), 29 deletions(-)

diff --git a/cmake_unofficial/CMakeLists.txt b/cmake_unofficial/CMakeLists.txt
index 40139da8..1ca7a06d 100644
--- a/cmake_unofficial/CMakeLists.txt
+++ b/cmake_unofficial/CMakeLists.txt
@@ -1,7 +1,9 @@
-cmake_minimum_required(VERSION 2.6)
-cmake_policy(VERSION 2.6)
-
-project(xxhash)
+# To the extent possible under law, the author(s) have dedicated all
+# copyright and related and neighboring rights to this software to
+# the public domain worldwide. This software is distributed without
+# any warranty.
+#
+# For details, see <http://creativecommons.org/publicdomain/zero/1.0/>.
 
 set(XXHASH_DIR "${CMAKE_CURRENT_SOURCE_DIR}/..")
 
@@ -19,33 +21,80 @@ mark_as_advanced(XXHASH_VERSION_MAJOR XXHASH_VERSION_MINOR XXHASH_VERSION_RELEAS
 option(BUILD_XXHSUM "Build the xxhsum binary" ON)
 option(BUILD_SHARED_LIBS "Build shared library" ON)
 
-# Make CMake's RPATH handling not be insane. This suff has cmake set rpaths appropriately for
-# where things end up in the install tree. For some reason that's not the default:
-# https://cmake.org/Wiki/CMake_RPATH_handling
-SET(CMAKE_SKIP_BUILD_RPATH FALSE)
-SET(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
+if("${CMAKE_VERSION}" VERSION_LESS "3.0")
+  project(XXHASH C)
+else()
+  cmake_policy (SET CMP0048 NEW)
+  project(XXHASH
+    VERSION ${XXHASH_VERSION_STRING}
+    LANGUAGES C)
+endif()
+
+cmake_minimum_required (VERSION 2.8.12)
 
-# Where we search for shared libraries
-SET(CMAKE_INSTALL_RPATH "./lib")
+# If XXHASH is being bundled in another project, we don't want to
+# install anything.  However, we want to let people override this, so
+# we'll use the XXHASH_BUNDLED_MODE variable to let them do that; just
+# set it to OFF in your project before you add_subdirectory(xxhash/contrib/cmake_unofficial).
+if(CMAKE_CURRENT_SOURCE_DIR STREQUAL "${CMAKE_SOURCE_DIR}")
+  # Bundled mode hasn't been set one way or the other, set the default
+  # depending on whether or not we are the top-level project.
+  if("${XXHASH_PARENT_DIRECTORY}" STREQUAL "")
+    set(XXHASH_BUNDLED_MODE OFF)
+  else()
+    set(XXHASH_BUNDLED_MODE ON)
+  endif()
+endif()
+mark_as_advanced(XXHASH_BUNDLED_MODE)
 
-# add the automatically determined parts of the RPATH
-# which point to directories outside the build tree to the install RPATH
-SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+# Allow people to choose whether to build shared or static libraries
+# via the BUILD_SHARED_LIBS option unless we are in bundled mode, in
+# which case we always use static libraries.
+include(CMakeDependentOption)
+CMAKE_DEPENDENT_OPTION(BUILD_SHARED_LIBS "Build shared libraries" ON "NOT XXHASH_BUNDLED_MODE" OFF)
 
-add_library(xxhash ../xxhash.c)
-set_target_properties(xxhash PROPERTIES COMPILE_DEFINITIONS "XXHASH_EXPORT"
-                       VERSION "${XXHASH_LIB_VERSION}"
-                       SOVERSION "${XXHASH_LIB_SOVERSION}")
+include_directories("${XXHASH_DIR}")
 
-if (BUILD_XXHSUM)
-    add_executable(xxhsum ../xxhsum.c)
-    target_link_libraries(xxhsum xxhash)
-endif()
+# libxxhash
+add_library(xxhash "${XXHASH_DIR}/xxhash.c")
+set_target_properties(xxhash PROPERTIES
+  SOVERSION "${XXHASH_VERSION_STRING}"
+  VERSION "${XXHASH_VERSION_STRING}")
+
+# xxhsum
+add_executable(xxhsum "${XXHASH_DIR}/xxhsum.c")
+target_link_libraries(xxhsum xxhash)
+
+# Extra warning flags
+include (CheckCCompilerFlag)
+foreach (flag
+    -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow
+    -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement
+    -Wstrict-prototypes -Wundef)
+  # Because https://gcc.gnu.org/wiki/FAQ#wnowarning
+  string(REGEX REPLACE "\\-Wno\\-(.+)" "-W\\1" flag_to_test "${flag}")
+  string(REGEX REPLACE "[^a-zA-Z0-9]+" "_" test_name "CFLAG_${flag_to_test}")
+
+  check_c_compiler_flag("${ADD_COMPILER_FLAGS_PREPEND} ${flag_to_test}" ${test_name})
+
+  if(${test_name})
+    set(CMAKE_C_FLAGS "${flag} ${CMAKE_C_FLAGS}")
+  endif()
+
+  unset(test_name)
+  unset(flag_to_test)
+endforeach (flag)
+
+if(NOT XXHASH_BUNDLED_MODE)
+  include(GNUInstallDirs)
 
-INSTALL(FILES ../xxhash.h DESTINATION include)
-INSTALL(
-    TARGETS xxhash xxhsum
-    RUNTIME DESTINATION bin
-    ARCHIVE DESTINATION lib
-    LIBRARY DESTINATION lib
-)
+  install(TARGETS xxhsum
+    RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
+  install(TARGETS xxhash
+    LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+    ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")
+  install(FILES "${XXHASH_DIR}/xxhash.h"
+    DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
+  install(FILES "${XXHASH_DIR}/xxhsum.1"
+    DESTINATION "${CMAKE_INSTALL_MANDIR}/man1")
+endif(NOT XXHASH_BUNDLED_MODE)

From 735e0e38e54c8778b6512921da7c50e8c12a134c Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 10:12:50 -0700
Subject: [PATCH 23/50] added appveyor script

for Windows CI tests
---
 appveyor.yml | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 appveyor.yml

diff --git a/appveyor.yml b/appveyor.yml
new file mode 100644
index 00000000..e2f8f7bd
--- /dev/null
+++ b/appveyor.yml
@@ -0,0 +1,73 @@
+version: 1.0.{build}
+environment:
+  matrix:
+  - COMPILER: "gcc"
+    PLATFORM: "mingw64"
+  - COMPILER: "gcc"
+    PLATFORM: "mingw32"
+  - COMPILER: "gcc"
+    PLATFORM: "clang"
+
+install:
+  - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION%
+  - MKDIR bin
+  - if [%COMPILER%]==[gcc] SET PATH_ORIGINAL=%PATH%
+  - if [%COMPILER%]==[gcc] (
+      SET "PATH_MINGW32=c:\MinGW\bin;c:\MinGW\usr\bin" &&
+      SET "PATH_MINGW64=c:\msys64\mingw64\bin;c:\msys64\usr\bin" &&
+      COPY C:\MinGW\bin\mingw32-make.exe C:\MinGW\bin\make.exe &&
+      COPY C:\MinGW\bin\gcc.exe C:\MinGW\bin\cc.exe
+    ) else (
+      IF [%PLATFORM%]==[x64] (SET ADDITIONALPARAM=/p:LibraryPath="C:\Program Files\Microsoft SDKs\Windows\v7.1\lib\x64;c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\lib\amd64;C:\Program Files (x86)\Microsoft Visual Studio 10.0\;C:\Program Files (x86)\Microsoft Visual Studio 10.0\lib\amd64;")
+    )
+
+build_script:
+  - if [%PLATFORM%]==[mingw32] SET PATH=%PATH_MINGW32%;%PATH_ORIGINAL%
+  - if [%PLATFORM%]==[mingw64] SET PATH=%PATH_MINGW64%;%PATH_ORIGINAL%
+  - if [%PLATFORM%]==[clang] SET PATH=%PATH_MINGW64%;%PATH_ORIGINAL%
+  - ECHO *** &&
+      ECHO Building %COMPILER% %PLATFORM% %CONFIGURATION% &&
+      ECHO ***
+  - if [%PLATFORM%]==[clang] (clang -v)
+  - if [%COMPILER%]==[gcc] (gcc -v)
+  - if [%COMPILER%]==[gcc] (
+      echo ----- &&
+      make -v &&
+      echo ----- &&
+      if not [%PLATFORM%]==[clang] (
+        make -B test-all
+      ) ELSE (
+        make -B test-all CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion"
+      )
+    )
+  - if [%COMPILER%]==[visual] (
+# not useful now, kept here for reference in case we add Visual tests later on
+      ECHO *** &&
+      ECHO *** Building Visual Studio 2010 %PLATFORM%\%CONFIGURATION% &&
+      ECHO *** &&
+      msbuild "visual\VS2010\lz4.sln" %ADDITIONALPARAM% /m /verbosity:minimal /property:PlatformToolset=v100 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /p:EnableWholeProgramOptimization=true /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      ECHO *** &&
+      ECHO *** Building Visual Studio 2012 %PLATFORM%\%CONFIGURATION% &&
+      ECHO *** &&
+      msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      ECHO *** &&
+      ECHO *** Building Visual Studio 2013 %PLATFORM%\%CONFIGURATION% &&
+      ECHO *** &&
+      msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v120 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      ECHO *** &&
+      ECHO *** Building Visual Studio 2015 %PLATFORM%\%CONFIGURATION% &&
+      ECHO *** &&
+      msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v140 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+      COPY visual\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe programs\
+    )
+
+test_script:
+  - ECHO *** &&
+      ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION% &&
+      ECHO ***
+  - if not [%COMPILER%]==[unknown] (
+      xxhsum -h &&
+      xxhsum xxhsum.exe &&
+      xxhsum -bi1
+      echo ------- xxhsum tested ------- &&
+    )

From 73daa8c9b426d7d446f6756ed743a2fa1f320a94 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 10:14:00 -0700
Subject: [PATCH 24/50] changed comment

---
 appveyor.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index e2f8f7bd..513978ec 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -40,8 +40,7 @@ build_script:
         make -B test-all CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion"
       )
     )
-  - if [%COMPILER%]==[visual] (
-# not useful now, kept here for reference in case we add Visual tests later on
+  - if [%COMPILER%]==[visual] (  # not useful now, kept here for reference in case we add Visual tests later on
       ECHO *** &&
       ECHO *** Building Visual Studio 2010 %PLATFORM%\%CONFIGURATION% &&
       ECHO *** &&

From 0ee84d378d04e3349937dd718f56bf34feef7f88 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 10:14:40 -0700
Subject: [PATCH 25/50] removed comment

---
 appveyor.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/appveyor.yml b/appveyor.yml
index 513978ec..3eb4502e 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -40,7 +40,7 @@ build_script:
         make -B test-all CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion"
       )
     )
-  - if [%COMPILER%]==[visual] (  # not useful now, kept here for reference in case we add Visual tests later on
+  - if [%COMPILER%]==[visual] ( 
       ECHO *** &&
       ECHO *** Building Visual Studio 2010 %PLATFORM%\%CONFIGURATION% &&
       ECHO *** &&

From 5d41eb818482bc08ab05f7049313836b906b4bdc Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 10:19:05 -0700
Subject: [PATCH 26/50] added a return statement in an unreachable place

as mingw compiler complains without it (believes it could be reached).
---
 xxhash.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/xxhash.c b/xxhash.c
index f14d4b36..da06ea72 100644
--- a/xxhash.c
+++ b/xxhash.c
@@ -801,9 +801,10 @@ XXH64_finalize(U64 h64, const void* ptr, size_t len,
                     /* fallthrough */
       case  0: return XXH64_avalanche(h64);
     }
+
     /* impossible to reach */
     assert(0);
-
+    return 0;  /* unreachable, but some compilers complain without it */
 }
 
 FORCE_INLINE U64

From 2d967356a39b7467eba2127431e459182d609e1c Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 10:23:09 -0700
Subject: [PATCH 27/50] appveyor Windows tests : added -Werror flag

so that compilation warnings trigger an error
---
 appveyor.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index 3eb4502e..1ecbaa2a 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -35,12 +35,12 @@ build_script:
       make -v &&
       echo ----- &&
       if not [%PLATFORM%]==[clang] (
-        make -B test-all
+        MOREFLAGS=-Werror make -B test-all
       ) ELSE (
-        make -B test-all CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion"
+        CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion" make -B test-all
       )
     )
-  - if [%COMPILER%]==[visual] ( 
+  - if [%COMPILER%]==[visual] (
       ECHO *** &&
       ECHO *** Building Visual Studio 2010 %PLATFORM%\%CONFIGURATION% &&
       ECHO *** &&

From 0af67e60617fc75626eb796b9e19c1cb68acfc48 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 10:24:40 -0700
Subject: [PATCH 28/50] moved compilation variables after `make`

since Windows is not compatible with unix' environment variables
---
 appveyor.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index 1ecbaa2a..09e7310b 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -35,9 +35,9 @@ build_script:
       make -v &&
       echo ----- &&
       if not [%PLATFORM%]==[clang] (
-        MOREFLAGS=-Werror make -B test-all
+        make -B test-all MOREFLAGS=-Werror
       ) ELSE (
-        CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion" make -B test-all
+        make -B test-all CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion" 
       )
     )
   - if [%COMPILER%]==[visual] (

From 4af72306da20e9671b94452bb9436b982a2df0d2 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 10:34:08 -0700
Subject: [PATCH 29/50] removed mingw32 specific declaration

---
 xxhsum.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/xxhsum.c b/xxhsum.c
index 25f7bb85..ce837732 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -66,9 +66,6 @@
 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
 #  include <fcntl.h>    /* _O_BINARY */
 #  include <io.h>       /* _setmode, _isatty */
-#  ifdef __MINGW32__
-   int _fileno(FILE *stream);   /* MINGW somehow forgets to include this windows declaration into <stdio.h> */
-#  endif
 #  define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
 #  define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
 #else
@@ -1181,7 +1178,7 @@ static unsigned readU32FromChar(const char** stringPtr)
 
 int main(int argc, const char** argv)
 {
-    int i, filenamesStart=0;
+    int i, filenamesStart = 0;
     const char* const exename = argv[0];
     U32 benchmarkMode = 0;
     U32 fileCheckMode = 0;
@@ -1191,7 +1188,7 @@ int main(int argc, const char** argv)
     U32 quiet         = 0;
     U32 specificTest  = 0;
     size_t keySize    = XXH_DEFAULT_SAMPLE_SIZE;
-    algoType algo = g_defaultAlgo;
+    algoType algo     = g_defaultAlgo;
     endianess displayEndianess = big_endian;
 
     /* special case : xxh32sum default to 32 bits checksum */

From c3557bcbfd774c29d6d2818805b524da6598f1b7 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 10:41:22 -0700
Subject: [PATCH 30/50] added target `make check`

`make test` does not contain platform-specific tests (32-bit, arm, etc.)

`make test-all` still includes them.
---
 Makefile     | 13 ++++++-------
 appveyor.yml |  4 ++--
 xxhsum.c     | 30 +++++++++++++++---------------
 3 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/Makefile b/Makefile
index d6fdc610..35be3140 100644
--- a/Makefile
+++ b/Makefile
@@ -116,8 +116,8 @@ lib: libxxhash.a libxxhash
 
 # tests
 
-.PHONY: test
-test: xxhsum
+.PHONY: check
+check: xxhsum
 	# stdin
 	./xxhsum < xxhash.c
 	# multiple files
@@ -159,8 +159,6 @@ test-xxhsum-c: xxhsum
 	# Expects "FAILED open or read"
 	echo "0000000000000000  test-expects-file-not-found" | ./xxhsum -c -; test $$? -eq 1
 	echo "00000000  test-expects-file-not-found" | ./xxhsum -c -; test $$? -eq 1
-
-clean-xxhsum-c:
 	@$(RM) -f .test.xxh32 .test.xxh64
 
 armtest: clean
@@ -205,8 +203,9 @@ clean-man:
 preview-man: clean-man man
 	man ./xxhsum.1
 
-test-all: clean all namespaceTest test test32 test-xxhsum-c clean-xxhsum-c \
-	armtest clangtest gpptest c90test test-mem usan staticAnalyze
+test: all namespaceTest check test-xxhsum-c c90test usan staticAnalyze
+
+test-all: test test32 armtest clangtest gpptest listL120 trailingWhitespace
 
 .PHONY: listL120
 listL120:  # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility)
@@ -217,7 +216,7 @@ trailingWhitespace:
 	! grep -E "`printf '[ \\t]$$'`" *.1 *.c *.h LICENSE Makefile cmake_unofficial/CMakeLists.txt
 
 .PHONY: clean
-clean: clean-xxhsum-c
+clean:
 	@$(RM) core *.o libxxhash.*
 	@$(RM) xxhsum$(EXT) xxhsum32$(EXT) xxhsum_inlinedXXH$(EXT) xxh32sum xxh64sum
 	@echo cleaning completed
diff --git a/appveyor.yml b/appveyor.yml
index 09e7310b..df8c8429 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -35,9 +35,9 @@ build_script:
       make -v &&
       echo ----- &&
       if not [%PLATFORM%]==[clang] (
-        make -B test-all MOREFLAGS=-Werror
+        make -B clean test MOREFLAGS=-Werror
       ) ELSE (
-        make -B test-all CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion" 
+        make -B clean test CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion"
       )
     )
   - if [%COMPILER%]==[visual] (
diff --git a/xxhsum.c b/xxhsum.c
index ce837732..592d9f7d 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -32,8 +32,8 @@
 #define XXHASH_C_2097394837
 
 /* ************************************
-*  Compiler Options
-**************************************/
+ *  Compiler Options
+ **************************************/
 /* MS Visual */
 #if defined(_MSC_VER) || defined(_WIN32)
 #  define _CRT_SECURE_NO_WARNINGS   /* removes visual warnings */
@@ -46,8 +46,8 @@
 
 
 /* ************************************
-*  Includes
-**************************************/
+ *  Includes
+ **************************************/
 #include <stdlib.h>     /* malloc, calloc, free, exit */
 #include <stdio.h>      /* fprintf, fopen, ftello64, fread, stdin, stdout, _fileno (when present) */
 #include <string.h>     /* strcmp */
@@ -60,9 +60,9 @@
 #include "xxhash.h"
 
 
-/*-************************************
-*  OS-Specific Includes
-**************************************/
+/* ************************************
+ *  OS-Specific Includes
+ **************************************/
 #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
 #  include <fcntl.h>    /* _O_BINARY */
 #  include <io.h>       /* _setmode, _isatty */
@@ -108,8 +108,8 @@ static unsigned BMK_isLittleEndian(void)
 
 
 /* *************************************
-*  Constants
-***************************************/
+ *  Constants
+ ***************************************/
 #define LIB_VERSION XXH_VERSION_MAJOR.XXH_VERSION_MINOR.XXH_VERSION_RELEASE
 #define QUOTE(str) #str
 #define EXPAND_AND_QUOTE(str) QUOTE(str)
@@ -148,8 +148,8 @@ static const algoType g_defaultAlgo = algo_xxh64;    /* required within main() &
 
 
 /* ************************************
-*  Display macros
-**************************************/
+ *  Display macros
+ **************************************/
 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
 #define DISPLAYRESULT(...)   fprintf(stdout, __VA_ARGS__)
 #define DISPLAYLEVEL(l, ...) do { if (g_displayLevel>=l) DISPLAY(__VA_ARGS__); } while (0)
@@ -157,14 +157,14 @@ static int g_displayLevel = 2;
 
 
 /* ************************************
-*  Local variables
-**************************************/
+ *  Local variables
+ **************************************/
 static U32 g_nbIterations = NBLOOPS;
 
 
 /* ************************************
-*  Benchmark Functions
-**************************************/
+ *  Benchmark Functions
+ **************************************/
 static clock_t BMK_clockSpan( clock_t start )
 {
     return clock() - start;   /* works even if overflow; Typical max span ~ 30 mn */

From 343a471516845ee9fa914ebc72db7dca0cabbf22 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 10:52:31 -0700
Subject: [PATCH 31/50] removed `test32` from `all`

platform-specific
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 35be3140..7cfdbe70 100644
--- a/Makefile
+++ b/Makefile
@@ -79,7 +79,7 @@ LIBXXH = libxxhash.$(SHARED_EXT_VER)
 default: lib xxhsum_and_links
 
 .PHONY: all
-all: lib xxhsum xxhsum32 xxhsum_inlinedXXH
+all: lib xxhsum xxhsum_inlinedXXH
 
 xxhsum32: CFLAGS += -m32
 xxhsum xxhsum32: xxhash.c xxhsum.c

From 7d6a996d5d485c77b9108f7877fb8b040931a267 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 10:55:28 -0700
Subject: [PATCH 32/50] changed multi-file test

so that it doesn't grab directories
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 7cfdbe70..33dca5cb 100644
--- a/Makefile
+++ b/Makefile
@@ -121,7 +121,7 @@ check: xxhsum
 	# stdin
 	./xxhsum < xxhash.c
 	# multiple files
-	./xxhsum *
+	./xxhsum xxhash.* xxhsum.*
 	# internal bench
 	./xxhsum -bi1
 	# file bench

From 376ffba6ff1e916b6003816ccb050f527b79bafc Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 10:58:09 -0700
Subject: [PATCH 33/50] changed xxhsum multi-file test

so that it doesn't grab directories
---
 Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index 33dca5cb..6420fc5e 100644
--- a/Makefile
+++ b/Makefile
@@ -141,11 +141,11 @@ test32: clean xxhsum32
 
 test-xxhsum-c: xxhsum
 	# xxhsum to/from pipe
-	./xxhsum * | ./xxhsum -c -
-	./xxhsum -H0 * | ./xxhsum -c -
+	./xxhsum xxhsum.* | ./xxhsum -c -
+	./xxhsum -H0 xxhsum.* | ./xxhsum -c -
 	# xxhsum to/from file, shell redirection
-	./xxhsum * > .test.xxh64
-	./xxhsum -H0 * > .test.xxh32
+	./xxhsum xxhsum.* > .test.xxh64
+	./xxhsum -H0 xxhsum.* > .test.xxh32
 	./xxhsum -c .test.xxh64
 	./xxhsum -c .test.xxh32
 	./xxhsum -c < .test.xxh64

From 1fc0c6e3eb8f30982c9145d29c917337ca77da03 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 11:00:48 -0700
Subject: [PATCH 34/50] moved usan tests to `test-all`

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 6420fc5e..5a1d05d5 100644
--- a/Makefile
+++ b/Makefile
@@ -203,9 +203,9 @@ clean-man:
 preview-man: clean-man man
 	man ./xxhsum.1
 
-test: all namespaceTest check test-xxhsum-c c90test usan staticAnalyze
+test: all namespaceTest check test-xxhsum-c c90test staticAnalyze
 
-test-all: test test32 armtest clangtest gpptest listL120 trailingWhitespace
+test-all: test test32 armtest clangtest gpptest usan listL120 trailingWhitespace
 
 .PHONY: listL120
 listL120:  # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility)

From b574a1561d4cacb4958a63d02e819b24d0313658 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 11:04:30 -0700
Subject: [PATCH 35/50] fix appveyor script

moved scan-build test to `test-all`
---
 Makefile     | 4 ++--
 appveyor.yml | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 5a1d05d5..79878ce5 100644
--- a/Makefile
+++ b/Makefile
@@ -203,9 +203,9 @@ clean-man:
 preview-man: clean-man man
 	man ./xxhsum.1
 
-test: all namespaceTest check test-xxhsum-c c90test staticAnalyze
+test: all namespaceTest check test-xxhsum-c c90test
 
-test-all: test test32 armtest clangtest gpptest usan listL120 trailingWhitespace
+test-all: test test32 armtest clangtest gpptest usan listL120 trailingWhitespace staticAnalyze
 
 .PHONY: listL120
 listL120:  # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility)
diff --git a/appveyor.yml b/appveyor.yml
index df8c8429..2f26ab27 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -68,5 +68,5 @@ test_script:
       xxhsum -h &&
       xxhsum xxhsum.exe &&
       xxhsum -bi1
-      echo ------- xxhsum tested ------- &&
+      echo ------- xxhsum tested -------
     )

From 6cbce878e053c491e1dbcc0ed3036ef8936d694c Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 11:07:42 -0700
Subject: [PATCH 36/50] fixed appveyor test script

---
 appveyor.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/appveyor.yml b/appveyor.yml
index 2f26ab27..b95d5092 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -67,6 +67,6 @@ test_script:
   - if not [%COMPILER%]==[unknown] (
       xxhsum -h &&
       xxhsum xxhsum.exe &&
-      xxhsum -bi1
+      xxhsum -bi1 &&
       echo ------- xxhsum tested -------
     )

From 03be2d33470c26d2d65fd4a8ad64628031e505e0 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 11:13:30 -0700
Subject: [PATCH 37/50] libxxhash : -fPIC flag conditional (!Windows)

---
 Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 79878ce5..4e3047a9 100644
--- a/Makefile
+++ b/Makefile
@@ -101,7 +101,10 @@ libxxhash.a: xxhash.o
 	@echo compiling static library
 	@$(AR) $(ARFLAGS) $@ $^
 
-$(LIBXXH): LDFLAGS += -shared -fPIC
+$(LIBXXH): LDFLAGS += -shared
+ifneq (,$(filter Windows%,$(OS)))
+$(LIBXXH): LDFLAGS += -fPIC
+endif
 $(LIBXXH): xxhash.c
 	@echo compiling dynamic library $(LIBVER)
 	@$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@

From 6c6ffbb27f5a48096f10c13998f7ce9e810d1968 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 11:16:26 -0700
Subject: [PATCH 38/50] fixed Windows test

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 4e3047a9..26b8c13f 100644
--- a/Makefile
+++ b/Makefile
@@ -102,7 +102,7 @@ libxxhash.a: xxhash.o
 	@$(AR) $(ARFLAGS) $@ $^
 
 $(LIBXXH): LDFLAGS += -shared
-ifneq (,$(filter Windows%,$(OS)))
+ifeq (,$(filter Windows%,$(OS)))
 $(LIBXXH): LDFLAGS += -fPIC
 endif
 $(LIBXXH): xxhash.c

From fd7ea04f492776c8bf4a9f2e6f8057c317573c87 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 12:49:13 -0700
Subject: [PATCH 39/50] make clean : added removal of Mac OS-X specific bin
 directories

---
 Makefile | 1 +
 xxhsum.c | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 26b8c13f..bf0dfd7f 100644
--- a/Makefile
+++ b/Makefile
@@ -220,6 +220,7 @@ trailingWhitespace:
 
 .PHONY: clean
 clean:
+	@$(RM) -r *.dSYM   # Mac OS-X specific
 	@$(RM) core *.o libxxhash.*
 	@$(RM) xxhsum$(EXT) xxhsum32$(EXT) xxhsum_inlinedXXH$(EXT) xxh32sum xxh64sum
 	@echo cleaning completed
diff --git a/xxhsum.c b/xxhsum.c
index 592d9f7d..49b3df31 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -258,6 +258,7 @@ static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer,
 /* BMK_benchMem():
  * specificTest : 0 == run all tests, 1+ run only specific test
  * buffer : is supposed 8-bytes aligned (if malloc'ed, it should be)
+ * the real allocated size of buffer is supposed to be >= (bufferSize+3).
  * @return : 0 on success, 1 if error (invalid mode selected) */
 static int BMK_benchMem(const void* buffer, size_t bufferSize, U32 specificTest)
 {
@@ -574,9 +575,9 @@ static int BMK_hash(const char* fileName,
         const char* const fileNameEnd = fileName + fileNameSize;
         const size_t maxInfoFilenameSize = fileNameSize > 30 ? 30 : fileNameSize;
         size_t infoFilenameSize = 1;
-        while ( (infoFilenameSize < maxInfoFilenameSize)
-              &&(fileNameEnd[-1-infoFilenameSize] != '/')
-              &&(fileNameEnd[-1-infoFilenameSize] != '\\') )
+        while ((infoFilenameSize < maxInfoFilenameSize)
+            && (fileNameEnd[-1-infoFilenameSize] != '/')
+            && (fileNameEnd[-1-infoFilenameSize] != '\\') )
               infoFilenameSize++;
         DISPLAY("\rLoading %s...  \r", fileNameEnd - infoFilenameSize);
 

From d0f432ca7ba2554f3d5fda8200cc71db4a9e1b5b Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 12:50:23 -0700
Subject: [PATCH 40/50] usan tests warnings will now fail

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index bf0dfd7f..6a8bb760 100644
--- a/Makefile
+++ b/Makefile
@@ -183,7 +183,7 @@ c90test: clean
 
 usan: clean
 	@echo ---- check undefined behavior - sanitize ----
-	$(MAKE) clean test CC=clang MOREFLAGS="-g -fsanitize=undefined"
+	$(MAKE) clean test CC=clang MOREFLAGS="-g -fsanitize=undefined -fno-sanitize-recover=all"
 
 staticAnalyze: clean
 	@echo ---- static analyzer - scan-build ----

From c7d815c2ad3b4222e553beb77a087e0095d6fac9 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 12:51:36 -0700
Subject: [PATCH 41/50] fixed usan warning

---
 xxhsum.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/xxhsum.c b/xxhsum.c
index 49b3df31..1376a8b0 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -573,8 +573,8 @@ static int BMK_hash(const char* fileName,
     /* loading notification */
     {   const size_t fileNameSize = strlen(fileName);
         const char* const fileNameEnd = fileName + fileNameSize;
-        const size_t maxInfoFilenameSize = fileNameSize > 30 ? 30 : fileNameSize;
-        size_t infoFilenameSize = 1;
+        const int maxInfoFilenameSize = (int)(fileNameSize > 30 ? 30 : fileNameSize);
+        int infoFilenameSize = 1;
         while ((infoFilenameSize < maxInfoFilenameSize)
             && (fileNameEnd[-1-infoFilenameSize] != '/')
             && (fileNameEnd[-1-infoFilenameSize] != '\\') )

From dd43ae210049646ebefddef855cd8c9a05631603 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Tue, 20 Mar 2018 12:53:25 -0700
Subject: [PATCH 42/50] usan tests can accept externally defined compiler

using CC=
---
 Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 6a8bb760..b7d91509 100644
--- a/Makefile
+++ b/Makefile
@@ -181,9 +181,10 @@ c90test: clean
 	$(CC) -std=c90 -Werror -pedantic -DXXH_NO_LONG_LONG -c xxhash.c
 	$(RM) xxhash.o
 
+usan: CC=clang
 usan: clean
 	@echo ---- check undefined behavior - sanitize ----
-	$(MAKE) clean test CC=clang MOREFLAGS="-g -fsanitize=undefined -fno-sanitize-recover=all"
+	$(MAKE) clean test CC=$(CC) MOREFLAGS="-g -fsanitize=undefined -fno-sanitize-recover=all"
 
 staticAnalyze: clean
 	@echo ---- static analyzer - scan-build ----

From 8762a1d5f43224c978b63cc2b779a41864165169 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Wed, 21 Mar 2018 06:16:16 -0700
Subject: [PATCH 43/50] synthetic benchmark : ensure buffer alignment by 8

since `malloc()` only provides alignment by 4 when compiling with mingw32.
---
 xxhsum.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/xxhsum.c b/xxhsum.c
index 1376a8b0..0c9e94df 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -347,6 +347,7 @@ static int BMK_benchFiles(const char** fileNamesTable, int nbFiles, U32 specific
 static int BMK_benchInternal(size_t keySize, int specificTest)
 {
     void* const buffer = calloc(keySize+3, 1);
+    void* const alignedBuffer = ((char*)buffer+15) - (((size_t)((char*)buffer+15)) & 0xF);  /* align on next 16 bytes */
     if(!buffer) {
         DISPLAY("\nError: not enough memory!\n");
         return 12;
@@ -361,7 +362,7 @@ static int BMK_benchInternal(size_t keySize, int specificTest)
     }
     DISPLAYLEVEL(1, "...        \n");
 
-    {   int const result = BMK_benchMem(buffer, keySize, specificTest);
+    {   int const result = BMK_benchMem(alignedBuffer, keySize, specificTest);
         free(buffer);
         return result;
     }

From 47128513a92bc30e8e765a0718453418289dca2b Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Wed, 21 Mar 2018 06:18:58 -0700
Subject: [PATCH 44/50] enlarge initial allocation

to be able to move start forward to an aligned position.
---
 xxhsum.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xxhsum.c b/xxhsum.c
index 0c9e94df..69931f72 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -346,7 +346,7 @@ static int BMK_benchFiles(const char** fileNamesTable, int nbFiles, U32 specific
 
 static int BMK_benchInternal(size_t keySize, int specificTest)
 {
-    void* const buffer = calloc(keySize+3, 1);
+    void* const buffer = calloc(keySize+16+3, 1);
     void* const alignedBuffer = ((char*)buffer+15) - (((size_t)((char*)buffer+15)) & 0xF);  /* align on next 16 bytes */
     if(!buffer) {
         DISPLAY("\nError: not enough memory!\n");

From d4f3fffd4a3d268c318af1c38f90ad402707fade Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Wed, 21 Mar 2018 09:56:25 -0700
Subject: [PATCH 45/50] appveyor: removed clang test

clang on Windows apparently defines _MSC_VER_ (?),
introducing Windows keyword
which are not compatible with `-std=c90` test.
---
 appveyor.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index b95d5092..aa712227 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -5,8 +5,6 @@ environment:
     PLATFORM: "mingw64"
   - COMPILER: "gcc"
     PLATFORM: "mingw32"
-  - COMPILER: "gcc"
-    PLATFORM: "clang"
 
 install:
   - ECHO Installing %COMPILER% %PLATFORM% %CONFIGURATION%

From 76f7ea37821a45c38d3b573082a32f105efffa96 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Wed, 21 Mar 2018 18:24:19 -0700
Subject: [PATCH 46/50] use stdint.h inside xxhash.h

to avoid type mismatch in some specific circumstances
---
 xxhash.h | 50 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 8 deletions(-)

diff --git a/xxhash.h b/xxhash.h
index f96a871a..08f27e88 100644
--- a/xxhash.h
+++ b/xxhash.h
@@ -243,6 +243,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 #endif  /* XXH_NO_LONG_LONG */
 
 
+
 #ifdef XXH_STATIC_LINKING_ONLY
 
 /* ================================================================================================
@@ -253,8 +254,38 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 =================================================================================================== */
 
 /* These definitions are only meant to make possible
-   static allocation of XXH state, on stack or in a struct for example.
-   Never use members directly. */
+ * static allocation of XXH state, on stack or in a struct for example.
+ * Never **ever** use members directly. */
+
+#if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+
+struct XXH32_state_s {
+   uint32_t total_len_32;
+   uint32_t large_len;
+   uint32_t v1;
+   uint32_t v2;
+   uint32_t v3;
+   uint32_t v4;
+   uint32_t mem32[4];
+   uint32_t memsize;
+   uint32_t reserved;   /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH32_state_t */
+
+struct XXH64_state_s {
+   uint64_t total_len;
+   uint64_t v1;
+   uint64_t v2;
+   uint64_t v3;
+   uint64_t v4;
+   uint64_t mem64[4];
+   uint32_t memsize;
+   uint32_t reserved[2];          /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH64_state_t */
+
+# else
 
 struct XXH32_state_s {
    unsigned total_len_32;
@@ -263,23 +294,26 @@ struct XXH32_state_s {
    unsigned v2;
    unsigned v3;
    unsigned v4;
-   unsigned mem32[4];   /* buffer defined as U32 for alignment */
+   unsigned mem32[4];
    unsigned memsize;
-   unsigned reserved;   /* never read nor write, will be removed in a future version */
+   unsigned reserved;   /* never read nor write, might be removed in a future version */
 };   /* typedef'd to XXH32_state_t */
 
-#ifndef XXH_NO_LONG_LONG   /* remove 64-bit support */
+#   ifndef XXH_NO_LONG_LONG  /* remove 64-bit support */
 struct XXH64_state_s {
    unsigned long long total_len;
    unsigned long long v1;
    unsigned long long v2;
    unsigned long long v3;
    unsigned long long v4;
-   unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
+   unsigned long long mem64[4];
    unsigned memsize;
-   unsigned reserved[2];          /* never read nor write, will be removed in a future version */
+   unsigned reserved[2];     /* never read nor write, might be removed in a future version */
 };   /* typedef'd to XXH64_state_t */
-#endif
+#    endif
+
+# endif
+
 
 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
 #  include "xxhash.c"   /* include xxhash function bodies as `static`, for inlining */

From 1a4376afa849464b3e03fc25e82c1c54fd7aa071 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sun, 1 Apr 2018 15:15:19 -0700
Subject: [PATCH 47/50] attempt to add code coverage

---
 .travis.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 4adeb390..605ae7f8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,6 @@
 language: c
 compiler: gcc
-script: make -B test-all
+script: CFLAGS=-coverage make -B test-all
 before_install:
   - sudo apt-get update  -qq
   - sudo apt-get install -qq gcc-arm-linux-gnueabi
@@ -9,3 +9,5 @@ before_install:
   - sudo apt-get install -qq gcc-multilib
   - sudo apt-get install -qq valgrind
 
+after_success:
+  - bash <(curl -s https://codecov.io/bash)

From 63c8575d094f1374231d59d3c82afd51d84dfbef Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sun, 1 Apr 2018 15:57:02 -0700
Subject: [PATCH 48/50] hash lib* instead of xxhsum*

to avoid hashing xxhsum.gcda,
which content change while running xxhsum
---
 Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index b7d91509..3e19eb9b 100644
--- a/Makefile
+++ b/Makefile
@@ -144,11 +144,11 @@ test32: clean xxhsum32
 
 test-xxhsum-c: xxhsum
 	# xxhsum to/from pipe
-	./xxhsum xxhsum.* | ./xxhsum -c -
-	./xxhsum -H0 xxhsum.* | ./xxhsum -c -
+	./xxhsum lib* | ./xxhsum -c -
+	./xxhsum -H0 lib* | ./xxhsum -c -
 	# xxhsum to/from file, shell redirection
-	./xxhsum xxhsum.* > .test.xxh64
-	./xxhsum -H0 xxhsum.* > .test.xxh32
+	./xxhsum lib* > .test.xxh64
+	./xxhsum -H0 lib* > .test.xxh32
 	./xxhsum -c .test.xxh64
 	./xxhsum -c .test.xxh32
 	./xxhsum -c < .test.xxh64

From d4eea23960c5dcb58d824e9ceccd02b4aa43a231 Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Sun, 1 Apr 2018 19:43:36 -0700
Subject: [PATCH 49/50] removed valgrind

and removed code coverage,
which can only work correctly when compiling only once
(here, we have multiple binaries created)
---
 .travis.yml | 6 +-----
 Makefile    | 4 ++--
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 605ae7f8..895da855 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,13 +1,9 @@
 language: c
 compiler: gcc
-script: CFLAGS=-coverage make -B test-all
+script: make -B test-all
 before_install:
   - sudo apt-get update  -qq
   - sudo apt-get install -qq gcc-arm-linux-gnueabi
   - sudo apt-get install -qq clang
   - sudo apt-get install -qq g++-multilib
   - sudo apt-get install -qq gcc-multilib
-  - sudo apt-get install -qq valgrind
-
-after_success:
-  - bash <(curl -s https://codecov.io/bash)
diff --git a/Makefile b/Makefile
index 3e19eb9b..6dd738f2 100644
--- a/Makefile
+++ b/Makefile
@@ -134,8 +134,8 @@ check: xxhsum
 test-mem: xxhsum
 	# memory tests
 	valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -bi1 xxhash.c
-	valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -H0 xxhash.c
-	valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -H1 xxhash.c
+	valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -H0  xxhash.c
+	valgrind --leak-check=yes --error-exitcode=1 ./xxhsum -H1  xxhash.c
 
 .PHONY: test32
 test32: clean xxhsum32

From 3064d42e7d74b0921bdd1818395d9cb37bb8976a Mon Sep 17 00:00:00 2001
From: Yann Collet <cyan@fb.com>
Date: Wed, 18 Apr 2018 14:27:00 -0700
Subject: [PATCH 50/50] minor code comment editing

---
 xxhash.h | 93 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 46 insertions(+), 47 deletions(-)

diff --git a/xxhash.h b/xxhash.h
index 08f27e88..d6bad943 100644
--- a/xxhash.h
+++ b/xxhash.h
@@ -80,18 +80,18 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
 
 
 /* ****************************
-*  API modifier
-******************************/
+ *  API modifier
+ ******************************/
 /** XXH_INLINE_ALL (and XXH_PRIVATE_API)
-*   This is useful to include xxhash functions in `static` mode
-*   in order to inline them, and remove their symbol from the public list.
-*   Inlining can offer dramatic performance improvement on small keys.
-*   Methodology :
-*     #define XXH_INLINE_ALL
-*     #include "xxhash.h"
-*   `xxhash.c` is automatically included.
-*   It's not useful to compile and link it as a separate module.
-*/
+ *  This is useful to include xxhash functions in `static` mode
+ *  in order to inline them, and remove their symbol from the public list.
+ *  Inlining can offer dramatic performance improvement on small keys.
+ *  Methodology :
+ *     #define XXH_INLINE_ALL
+ *     #include "xxhash.h"
+ * `xxhash.c` is automatically included.
+ *  It's not useful to compile and link it as a separate module.
+ */
 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
 #  ifndef XXH_STATIC_LINKING_ONLY
 #    define XXH_STATIC_LINKING_ONLY
@@ -110,17 +110,17 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
 #  define XXH_PUBLIC_API   /* do nothing */
 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
 
-/*!XXH_NAMESPACE, aka Namespace Emulation :
-
-If you want to include _and expose_ xxHash functions from within your own library,
-but also want to avoid symbol collisions with other libraries which may also include xxHash,
-
-you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
-with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
-
-Note that no change is required within the calling program as long as it includes `xxhash.h` :
-regular symbol name will be automatically translated by this header.
-*/
+/*! XXH_NAMESPACE, aka Namespace Emulation :
+ *
+ * If you want to include _and expose_ xxHash functions from within your own library,
+ * but also want to avoid symbol collisions with other libraries which may also include xxHash,
+ *
+ * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+ * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
+ *
+ * Note that no change is required within the calling program as long as it includes `xxhash.h` :
+ * regular symbol name will be automatically translated by this header.
+ */
 #ifdef XXH_NAMESPACE
 #  define XXH_CAT(A,B) A##B
 #  define XXH_NAME2(A,B) XXH_CAT(A,B)
@@ -179,26 +179,25 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void*
 XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
 
 /*
-These functions generate the xxHash of an input provided in multiple segments.
-Note that, for small input, they are slower than single-call functions, due to state management.
-For small input, prefer `XXH32()` and `XXH64()` .
-
-XXH state must first be allocated, using XXH*_createState() .
-
-Start a new hash by initializing state with a seed, using XXH*_reset().
-
-Then, feed the hash state by calling XXH*_update() as many times as necessary.
-Obviously, input must be allocated and read accessible.
-The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
-
-Finally, a hash value can be produced anytime, by using XXH*_digest().
-This function returns the nn-bits hash as an int or long long.
-
-It's still possible to continue inserting input into the hash state after a digest,
-and generate some new hashes later on, by calling again XXH*_digest().
-
-When done, free XXH state space if it was allocated dynamically.
-*/
+ * Streaming functions generate the xxHash of an input provided in multiple segments.
+ * Note that, for small input, they are slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * XXH state must first be allocated, using XXH*_createState() .
+ *
+ * Start a new hash by initializing state with a seed, using XXH*_reset().
+ *
+ * Then, feed the hash state by calling XXH*_update() as many times as necessary.
+ * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using XXH*_digest().
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a digest,
+ * and generate some new hashes later on, by calling again XXH*_digest().
+ *
+ * When done, free XXH state space if it was allocated dynamically.
+ */
 
 /*======   Canonical representation   ======*/
 
@@ -207,10 +206,10 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
 
 /* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
-*  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
-*  These functions allow transformation of hash result into and from its canonical format.
-*  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
-*/
+ * The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+ * These functions allow transformation of hash result into and from its canonical format.
+ * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+ */
 
 
 #ifndef XXH_NO_LONG_LONG
@@ -253,7 +252,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
    Never use them in association with dynamic linking !
 =================================================================================================== */
 
-/* These definitions are only meant to make possible
+/* These definitions are only present to allow
  * static allocation of XXH state, on stack or in a struct for example.
  * Never **ever** use members directly. */