diff --git a/README.md b/README.md index bce91a49..7a1f06e9 100644 --- a/README.md +++ b/README.md @@ -25,12 +25,12 @@ To add a dependency on hash4j using Maven, use the following: com.dynatrace.hash4j hash4j - 0.13.0 + 0.14.0 ``` To add a dependency using Gradle: ```gradle -implementation 'com.dynatrace.hash4j:hash4j:0.13.0' +implementation 'com.dynatrace.hash4j:hash4j:0.14.0' ``` ## Hash algorithms @@ -193,11 +193,20 @@ HashValue128 hash = FileHashing.imohash1_0_2().hashFileTo128Bits(file); See also [FileHashingDemo.java](src/test/java/com/dynatrace/hash4j/file/FileHashingDemo.java). ## Consistent hashing -This library contains an implementation of [JumpHash](https://arxiv.org/abs/1406.2294) -that can be used to achieve distributed agreement when assigning hash values to a given number of buckets. -The hash values are distributed uniformly over the buckets. -The algorithm also minimizes the number of reassignments needed for balancing when the number of buckets changes. - +This library contains various algorithms for the distributed agreement on the assignment of hash values to a given number of buckets. +In the naive approach, the hash values are assigned to the buckets with the modulo operation according to +`bucketIdx = abs(hash) % numBuckets`. +If the number of buckets is changed, the bucket index will change for most hash values. +With a consistent hash algorithm, the above expression can be replaced by +`bucketIdx = consistentBucketHasher.getBucket(hash, numBuckets)` +to minimize the number of reassignments while still ensuring a fair distribution across all buckets. + +The following consistent hashing algorithms are available: +* [JumpHash](https://arxiv.org/abs/1406.2294): This algorithm has a calculation time that scales logarithmically with the number of buckets +* [Improved Consistent Weighted Sampling](https://doi.org/10.1109/ICDM.2010.80): This algorithm is based on improved +consistent weighted sampling with a constant computation time independent of the number of buckets. This algorithm is faster than +JumpHash for large numbers of buckets. + ### Usage ```java // create a consistent bucket hasher diff --git a/build.gradle b/build.gradle index e2e61644..935b8574 100644 --- a/build.gradle +++ b/build.gradle @@ -68,7 +68,7 @@ java { } group = 'com.dynatrace.hash4j' -version = '0.13.0' +version = '0.14.0' spotless { ratchetFrom 'origin/main' diff --git a/src/jmh/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasherPerformanceTest.java b/src/jmh/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasherPerformanceTest.java new file mode 100644 index 00000000..cd780cc7 --- /dev/null +++ b/src/jmh/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasherPerformanceTest.java @@ -0,0 +1,49 @@ +/* + * Copyright 2023 Dynatrace LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dynatrace.hash4j.consistent; + +import com.dynatrace.hash4j.random.PseudoRandomGeneratorProvider; +import java.util.SplittableRandom; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +public class ConsistentJumpBucketHasherPerformanceTest { + + private static final ConsistentBucketHasher CONSISTENT_BUCKET_HASHER = + ConsistentHashing.jumpHash(PseudoRandomGeneratorProvider.splitMix64_V1()); + + @State(Scope.Thread) + public static class TestState { + + @Param({"1", "10", "100", "1000", "10000", "100000", "1000000"}) + int numBuckets; + + SplittableRandom random; + + @Setup + public void init() { + random = new SplittableRandom(0x87c5950e6677341eL); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + public void getBucket(TestState testState, Blackhole blackhole) { + int bucket = + CONSISTENT_BUCKET_HASHER.getBucket(testState.random.nextLong(), testState.numBuckets); + blackhole.consume(bucket); + } +} diff --git a/src/jmh/java/com/dynatrace/hash4j/consistent/ImprovedConsistentWeightedSamplingPerformanceTest.java b/src/jmh/java/com/dynatrace/hash4j/consistent/ImprovedConsistentWeightedSamplingPerformanceTest.java new file mode 100644 index 00000000..5046d9d7 --- /dev/null +++ b/src/jmh/java/com/dynatrace/hash4j/consistent/ImprovedConsistentWeightedSamplingPerformanceTest.java @@ -0,0 +1,50 @@ +/* + * Copyright 2023 Dynatrace LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dynatrace.hash4j.consistent; + +import com.dynatrace.hash4j.random.PseudoRandomGeneratorProvider; +import java.util.SplittableRandom; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +public class ImprovedConsistentWeightedSamplingPerformanceTest { + + private static final ConsistentBucketHasher CONSISTENT_BUCKET_HASHER = + ConsistentHashing.improvedConsistentWeightedSampling( + PseudoRandomGeneratorProvider.splitMix64_V1()); + + @State(Scope.Thread) + public static class TestState { + + @Param({"1", "10", "100", "1000", "10000", "100000", "1000000"}) + int numBuckets; + + SplittableRandom random; + + @Setup + public void init() { + random = new SplittableRandom(0x87c5950e6677341eL); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + public void getBucket(TestState testState, Blackhole blackhole) { + int bucket = + CONSISTENT_BUCKET_HASHER.getBucket(testState.random.nextLong(), testState.numBuckets); + blackhole.consume(bucket); + } +} diff --git a/src/jmh/java/com/dynatrace/hash4j/consistent/ModuloPerformanceTest.java b/src/jmh/java/com/dynatrace/hash4j/consistent/ModuloPerformanceTest.java new file mode 100644 index 00000000..b572d9d8 --- /dev/null +++ b/src/jmh/java/com/dynatrace/hash4j/consistent/ModuloPerformanceTest.java @@ -0,0 +1,44 @@ +/* + * Copyright 2023 Dynatrace LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dynatrace.hash4j.consistent; + +import java.util.SplittableRandom; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +public class ModuloPerformanceTest { + + @State(Scope.Thread) + public static class TestState { + + @Param({"1", "10", "100", "1000", "10000", "100000", "1000000"}) + int numBuckets; + + SplittableRandom random; + + @Setup + public void init() { + random = new SplittableRandom(0x87c5950e6677341eL); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + public void getBucket(TestState testState, Blackhole blackhole) { + int bucket = (int) ((testState.random.nextLong() & 0x7FFFFFFFFFFFFFFFL) % testState.numBuckets); + blackhole.consume(bucket); + } +} diff --git a/src/jmh/java/com/dynatrace/hash4j/consistent/RandomNumberPerformanceTest.java b/src/jmh/java/com/dynatrace/hash4j/consistent/RandomNumberPerformanceTest.java new file mode 100644 index 00000000..b7b36e15 --- /dev/null +++ b/src/jmh/java/com/dynatrace/hash4j/consistent/RandomNumberPerformanceTest.java @@ -0,0 +1,40 @@ +/* + * Copyright 2023 Dynatrace LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dynatrace.hash4j.consistent; + +import java.util.SplittableRandom; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +public class RandomNumberPerformanceTest { + + @State(Scope.Thread) + public static class TestState { + + SplittableRandom random; + + @Setup + public void init() { + random = new SplittableRandom(0x87c5950e6677341eL); + } + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + public void getBucket(TestState testState, Blackhole blackhole) { + blackhole.consume(testState.random.nextLong()); + } +} diff --git a/src/main/java/com/dynatrace/hash4j/consistent/ConsistentHashing.java b/src/main/java/com/dynatrace/hash4j/consistent/ConsistentHashing.java index 5df51d1b..ee1a2620 100644 --- a/src/main/java/com/dynatrace/hash4j/consistent/ConsistentHashing.java +++ b/src/main/java/com/dynatrace/hash4j/consistent/ConsistentHashing.java @@ -29,6 +29,8 @@ private ConsistentHashing() {} * consistent hash algorithm." arXiv preprint arXiv:1406.2294 (2014). * + *

The average computation time depends logarithmically on the number of buckets. + * * @param pseudoRandomGeneratorProvider a {@link PseudoRandomGeneratorProvider} * @return a {@link ConsistentBucketHasher} */ @@ -36,4 +38,23 @@ public static ConsistentBucketHasher jumpHash( PseudoRandomGeneratorProvider pseudoRandomGeneratorProvider) { return new ConsistentJumpBucketHasher(pseudoRandomGeneratorProvider); } + + /** + * Returns a {@link ConsistentBucketHasher}. + * + *

This algorithm is based on the method described in Sergey Ioffe, "Improved Consistent + * Sampling, Weighted Minhash and L1 Sketching," 2010, doi: 10.1109/ICDM.2010.80. which is applied to a + * one-dimensional input vector whose value is equal to the number of buckets. + * + *

The computation time is constant independent of the number of buckets. This method is faster + * than {@link #jumpHash(PseudoRandomGeneratorProvider)} for large number of buckets. + * + * @param pseudoRandomGeneratorProvider a {@link PseudoRandomGeneratorProvider} + * @return a {@link ConsistentBucketHasher} + */ + public static ConsistentBucketHasher improvedConsistentWeightedSampling( + PseudoRandomGeneratorProvider pseudoRandomGeneratorProvider) { + return new ImprovedConsistentWeightedSampling(pseudoRandomGeneratorProvider); + } } diff --git a/src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java b/src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java index e63a3691..57140ab9 100644 --- a/src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java +++ b/src/main/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasher.java @@ -54,7 +54,7 @@ class ConsistentJumpBucketHasher implements ConsistentBucketHasher { // see // https://github.com/google/guava/blob/0a17f4a429323589396c38d8ce75ca058faa6c64/guava/src/com/google/common/hash/Hashing.java#L559 @Override - public int getBucket(long hash, int numBuckets) { + public strictfp int getBucket(long hash, int numBuckets) { checkArgument(numBuckets > 0, "buckets must be positive"); pseudoRandomGenerator.reset(hash); @@ -64,11 +64,10 @@ public int getBucket(long hash, int numBuckets) { // Jump from bucket to bucket until we go out of range while (true) { next = (int) ((candidate + 1) / pseudoRandomGenerator.nextDouble()); - if (next > candidate && next < numBuckets) { - candidate = next; - } else { - return candidate; - } + if (next >= numBuckets || next <= candidate) + return candidate; // second condition protects against infinite loops caused by bad random + // values such as NaN or values outside of [0,1) + candidate = next; } } } diff --git a/src/main/java/com/dynatrace/hash4j/consistent/ImprovedConsistentWeightedSampling.java b/src/main/java/com/dynatrace/hash4j/consistent/ImprovedConsistentWeightedSampling.java new file mode 100644 index 00000000..77bbea6d --- /dev/null +++ b/src/main/java/com/dynatrace/hash4j/consistent/ImprovedConsistentWeightedSampling.java @@ -0,0 +1,52 @@ +/* + * Copyright 2023 Dynatrace LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dynatrace.hash4j.consistent; + +import static com.dynatrace.hash4j.util.Preconditions.checkArgument; +import static java.util.Objects.requireNonNull; + +import com.dynatrace.hash4j.random.PseudoRandomGenerator; +import com.dynatrace.hash4j.random.PseudoRandomGeneratorProvider; + +/** + * Consistent hashing algorithm based on a simplified version of the algorithm described in Sergey + * Ioffe, "Improved Consistent + * Sampling, Weighted Minhash and L1 Sketching," 2010 IEEE International Conference on Data + * Mining, Sydney, NSW, Australia, 2010, pp. 246-255, doi: 10.1109/ICDM.2010.80. + */ +class ImprovedConsistentWeightedSampling implements ConsistentBucketHasher { + + private final PseudoRandomGenerator pseudoRandomGenerator; + + ImprovedConsistentWeightedSampling(PseudoRandomGeneratorProvider pseudoRandomGeneratorProvider) { + requireNonNull(pseudoRandomGeneratorProvider); + this.pseudoRandomGenerator = pseudoRandomGeneratorProvider.create(); + } + + @Override + public strictfp int getBucket(long hash, int numBuckets) { + checkArgument(numBuckets > 0, "buckets must be positive"); + pseudoRandomGenerator.reset(hash); + double r = pseudoRandomGenerator.nextExponential() + pseudoRandomGenerator.nextExponential(); + double b = pseudoRandomGenerator.nextDouble(); + double t = StrictMath.floor(StrictMath.log(numBuckets) / r + b); + double y = StrictMath.exp(r * (t - b)); + // y should always be in the range [0, numBuckets), + // but could be larger due to numerical inaccuracies, + // therefore limit result after rounding down to numBuckets - 1 + return Math.min((int) y, numBuckets - 1); + } +} diff --git a/src/test/java/com/dynatrace/hash4j/consistent/AbstractConsistentBucketHasherTest.java b/src/test/java/com/dynatrace/hash4j/consistent/AbstractConsistentBucketHasherTest.java new file mode 100644 index 00000000..8704d4c3 --- /dev/null +++ b/src/test/java/com/dynatrace/hash4j/consistent/AbstractConsistentBucketHasherTest.java @@ -0,0 +1,202 @@ +/* + * Copyright 2023 Dynatrace LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dynatrace.hash4j.consistent; + +import static org.assertj.core.api.Assertions.*; +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +import com.dynatrace.hash4j.hashing.HashStream64; +import com.dynatrace.hash4j.hashing.Hashing; +import com.dynatrace.hash4j.random.PseudoRandomGeneratorProvider; +import com.dynatrace.hash4j.random.PseudoRandomGeneratorProviderForTesting; +import java.util.Arrays; +import java.util.SplittableRandom; +import java.util.stream.IntStream; +import org.hipparchus.stat.inference.AlternativeHypothesis; +import org.hipparchus.stat.inference.BinomialTest; +import org.hipparchus.stat.inference.ChiSquareTest; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; + +abstract class AbstractConsistentBucketHasherTest { + + protected abstract ConsistentBucketHasher getConsistentBucketHasher( + PseudoRandomGeneratorProvider pseudoRandomGeneratorProvider); + + @Test + void testIllegalNumBuckets() { + ConsistentBucketHasher consistentBucketHasher = + getConsistentBucketHasher(PseudoRandomGeneratorProvider.splitMix64_V1()); + assertThatIllegalArgumentException().isThrownBy(() -> consistentBucketHasher.getBucket(0L, 0)); + assertThatIllegalArgumentException().isThrownBy(() -> consistentBucketHasher.getBucket(0L, -1)); + } + + @Test + void testNullPseudoRandomNumberGenerator() { + assertThatNullPointerException().isThrownBy(() -> ConsistentHashing.jumpHash(null)); + } + + @ParameterizedTest + @MethodSource("getNumBuckets") + void testUniformDistribution(int numBuckets) { + double alpha = 0.0001; + int numCycles = 500000; + long seed = + Hashing.komihash5_0() + .hashStream() + .putLong(0x1c1e29a7c6f82fa8L) + .putInt(numBuckets) + .getAsLong(); + long[] counts = new long[numBuckets]; + double[] expected = new double[numBuckets]; + Arrays.fill(expected, 1.0); + ConsistentBucketHasher consistentBucketHasher = + getConsistentBucketHasher(PseudoRandomGeneratorProvider.splitMix64_V1()); + + SplittableRandom random = new SplittableRandom(seed); + for (int i = 0; i < numCycles; ++i) { + int bucketIdx = consistentBucketHasher.getBucket(random.nextLong(), numBuckets); + counts[bucketIdx] += 1; + } + + if (numBuckets >= 2) { + double pValue = new ChiSquareTest().chiSquareTest(expected, counts); + assertThat(pValue).isGreaterThan(alpha); + } + } + + private void testRedistribution(int numBuckets, int numCycles, long seed) { + ConsistentBucketHasher consistentBucketHasher = + getConsistentBucketHasher(PseudoRandomGeneratorProvider.splitMix64_V1()); + + SplittableRandom random = new SplittableRandom(seed); + for (int i = 0; i < numCycles; ++i) { + long hash = random.nextLong(); + int oldBucketIdx = consistentBucketHasher.getBucket(hash, numBuckets); + int newBucketIdx = consistentBucketHasher.getBucket(hash, numBuckets + 1); + if (oldBucketIdx != newBucketIdx) { + assertThat(newBucketIdx).isEqualTo(numBuckets); + } + } + } + + private static IntStream getNumBuckets() { + int maxNumBuckets = 200; + return IntStream.range(1, maxNumBuckets + 1); + } + + @ParameterizedTest + @MethodSource("getNumBuckets") + void testRedistribution(int numBuckets) { + int numCycles = 10000; + long seed = + Hashing.komihash5_0() + .hashStream() + .putLong(0x3df6dcebff42e20dL) + .putInt(numBuckets) + .getAsLong(); + testRedistribution(numBuckets, numCycles, seed); + } + + @Test + void testMaxNumBuckets() { + double alpha = 0.001; + + SplittableRandom random = new SplittableRandom(0x5cfb4dcb296c1921L); + + int numBuckets = Integer.MAX_VALUE; + int numTrials = 1000000; + + int numZero = 0; + int numEven = 0; + int numLower = 0; + + ConsistentBucketHasher consistentBucketHasher = + getConsistentBucketHasher(PseudoRandomGeneratorProvider.splitMix64_V1()); + + for (int i = 0; i < numTrials; ++i) { + int bucket = consistentBucketHasher.getBucket(random.nextLong(), numBuckets); + if (bucket == 0) { + numZero += 1; + } else { + if ((bucket & 1) == 0) { + numEven += 1; + } + if (bucket < numBuckets / 2) { + numLower += 1; + } + } + } + assertThat( + new BinomialTest() + .binomialTest(numTrials - numZero, numEven, 0.5, AlternativeHypothesis.TWO_SIDED)) + .isGreaterThan(alpha); + assertThat( + new BinomialTest() + .binomialTest(numTrials - numZero, numLower, 0.5, AlternativeHypothesis.TWO_SIDED)) + .isGreaterThan(alpha); + assertThat( + new BinomialTest() + .binomialTest(numTrials, numZero, 1. / numBuckets, AlternativeHypothesis.TWO_SIDED)) + .isGreaterThan(alpha); + } + + protected abstract long getCheckSum(); + + @Test + void testCheckSum() { + int numIterations = 1_000_000; + SplittableRandom random = new SplittableRandom(0x2df5ae93946a7653L); + ConsistentBucketHasher hasher = + getConsistentBucketHasher(PseudoRandomGeneratorProvider.splitMix64_V1()); + HashStream64 checkSumHashStream = Hashing.komihash5_0().hashStream(); + for (int i = 0; i < numIterations; ++i) { + int numBuckets = random.nextInt(Integer.MAX_VALUE); + long hash = random.nextLong(); + int bucketIdx = hasher.getBucket(hash, numBuckets); + checkSumHashStream.putInt(bucketIdx); + } + assertThat(checkSumHashStream.getAsLong()).isEqualTo(getCheckSum()); + } + + @ParameterizedTest + @ValueSource( + doubles = { + Double.NEGATIVE_INFINITY, + -Double.MAX_VALUE, + -2, + -1, + 0., + 1., + 2, + Double.MAX_VALUE, + Double.POSITIVE_INFINITY, + Double.NaN + }) + void testInvalidPseudoRandomGeneratorNextDouble(double randomValue) { + PseudoRandomGeneratorProviderForTesting pseudoRandomGeneratorProvider = + new PseudoRandomGeneratorProviderForTesting(); + + ConsistentBucketHasher consistentBucketHasher = + getConsistentBucketHasher(pseudoRandomGeneratorProvider); + + pseudoRandomGeneratorProvider.setDoubleValue(randomValue); + assertThatNoException() + .isThrownBy(() -> consistentBucketHasher.getBucket(0x82739fa8da9a7728L, 10)); + } +} diff --git a/src/test/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasherTest.java b/src/test/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasherTest.java index 0eb9d3c1..b9216565 100644 --- a/src/test/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasherTest.java +++ b/src/test/java/com/dynatrace/hash4j/consistent/ConsistentJumpBucketHasherTest.java @@ -18,102 +18,17 @@ import static org.assertj.core.api.Assertions.*; import com.dynatrace.hash4j.random.PseudoRandomGeneratorProvider; -import com.dynatrace.hash4j.random.PseudoRandomGeneratorProviderForTesting; -import java.util.Arrays; -import java.util.SplittableRandom; -import org.hipparchus.stat.inference.AlternativeHypothesis; -import org.hipparchus.stat.inference.BinomialTest; -import org.hipparchus.stat.inference.ChiSquareTest; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; -class ConsistentJumpBucketHasherTest { +class ConsistentJumpBucketHasherTest extends AbstractConsistentBucketHasherTest { - @Test - void testIllegalNumBuckets() { - ConsistentBucketHasher consistentBucketHasher = - ConsistentHashing.jumpHash(PseudoRandomGeneratorProvider.splitMix64_V1()); - assertThatIllegalArgumentException().isThrownBy(() -> consistentBucketHasher.getBucket(0L, 0)); - assertThatIllegalArgumentException().isThrownBy(() -> consistentBucketHasher.getBucket(0L, -1)); + @Override + protected ConsistentBucketHasher getConsistentBucketHasher( + PseudoRandomGeneratorProvider pseudoRandomGeneratorProvider) { + return ConsistentHashing.jumpHash(pseudoRandomGeneratorProvider); } - @Test - void testNullPseudoRandomNumberGenerator() { - assertThatNullPointerException().isThrownBy(() -> ConsistentHashing.jumpHash(null)); - } - - @Test - void testUniformDistribution() { - - int numBuckets = 10; - int numCycles = 100000; - long[] counts = new long[numBuckets]; - double[] expected = new double[numBuckets]; - Arrays.fill(expected, 1.0); - ConsistentBucketHasher consistentBucketHasher = - ConsistentHashing.jumpHash(PseudoRandomGeneratorProvider.splitMix64_V1()); - - SplittableRandom random = new SplittableRandom(0x392c64621adad448L); - for (int i = 0; i < numCycles; ++i) { - int bucketIdx = consistentBucketHasher.getBucket(random.nextLong(), numBuckets); - counts[bucketIdx] += 1; - } - - double pValue = new ChiSquareTest().chiSquareTest(expected, counts); - assertThat(pValue).isGreaterThan(0.01); - } - - @Test - void testOptimalRedistribution() { - - int numBuckets = 10; - int numCycles = 100000; - ConsistentBucketHasher consistentBucketHasher = - ConsistentHashing.jumpHash(PseudoRandomGeneratorProvider.splitMix64_V1()); - - SplittableRandom random = new SplittableRandom(0x08b6fbb0a6626254L); - int countNewBucket = 0; - for (int i = 0; i < numCycles; ++i) { - long hash = random.nextLong(); - int oldBucketIdx = consistentBucketHasher.getBucket(hash, numBuckets); - int newBucketIdx = consistentBucketHasher.getBucket(hash, numBuckets + 1); - if (oldBucketIdx != newBucketIdx) { - assertThat(newBucketIdx).isEqualTo(numBuckets); - countNewBucket += 1; - } - } - - double pValue = - new BinomialTest() - .binomialTest( - numCycles, countNewBucket, 1. / (numBuckets + 1), AlternativeHypothesis.TWO_SIDED); - assertThat(pValue).isGreaterThan(0.01); - } - - @ParameterizedTest - @ValueSource( - doubles = { - Double.NEGATIVE_INFINITY, - -Double.MAX_VALUE, - -2, - -1, - 0., - 1., - 2, - Double.MAX_VALUE, - Double.POSITIVE_INFINITY, - Double.NaN - }) - void testInvalidPseudoRandomGenerator(double randomValue) { - PseudoRandomGeneratorProviderForTesting pseudoRandomGeneratorProvider = - new PseudoRandomGeneratorProviderForTesting(); - - ConsistentBucketHasher consistentBucketHasher = - ConsistentHashing.jumpHash(pseudoRandomGeneratorProvider); - - pseudoRandomGeneratorProvider.setDoubleValue(randomValue); - assertThatNoException() - .isThrownBy(() -> consistentBucketHasher.getBucket(0x82739fa8da9a7728L, 10)); + @Override + protected long getCheckSum() { + return 0x42cf069c52a4ee21L; } } diff --git a/src/test/java/com/dynatrace/hash4j/consistent/ImprovedConsistentWeightedSamplingTest.java b/src/test/java/com/dynatrace/hash4j/consistent/ImprovedConsistentWeightedSamplingTest.java new file mode 100644 index 00000000..b72e729a --- /dev/null +++ b/src/test/java/com/dynatrace/hash4j/consistent/ImprovedConsistentWeightedSamplingTest.java @@ -0,0 +1,63 @@ +/* + * Copyright 2023 Dynatrace LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dynatrace.hash4j.consistent; + +import static org.assertj.core.api.Assertions.assertThatNoException; + +import com.dynatrace.hash4j.random.PseudoRandomGeneratorProvider; +import com.dynatrace.hash4j.random.PseudoRandomGeneratorProviderForTesting; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +class ImprovedConsistentWeightedSamplingTest extends AbstractConsistentBucketHasherTest { + + @Override + protected ConsistentBucketHasher getConsistentBucketHasher( + PseudoRandomGeneratorProvider pseudoRandomGeneratorProvider) { + return ConsistentHashing.improvedConsistentWeightedSampling(pseudoRandomGeneratorProvider); + } + + @Override + protected long getCheckSum() { + return 0x41b4e6aa922fae85L; + } + + @ParameterizedTest + @ValueSource( + doubles = { + Double.NEGATIVE_INFINITY, + -Double.MAX_VALUE, + -2, + -1, + 0., + 1., + 2, + Double.MAX_VALUE, + Double.POSITIVE_INFINITY, + Double.NaN + }) + void testInvalidPseudoRandomGeneratorNextExponential(double randomValue) { + PseudoRandomGeneratorProviderForTesting pseudoRandomGeneratorProvider = + new PseudoRandomGeneratorProviderForTesting(); + + ConsistentBucketHasher consistentBucketHasher = + getConsistentBucketHasher(pseudoRandomGeneratorProvider); + + pseudoRandomGeneratorProvider.setExponentialValue(randomValue); + assertThatNoException() + .isThrownBy(() -> consistentBucketHasher.getBucket(0x82739fa8da9a7728L, 10)); + } +}