test for comparison processor benchmark data generation

fusion-jena · Nov 1, 2023 · 3f91db8 · 3f91db8
1 parent c535ef9
commit 3f91db8
Show file tree

Hide file tree

Showing 5 changed files with 202 additions and 33 deletions.
diff --git a/pom.xml b/pom.xml
@@ -121,6 +121,13 @@
 			<version>${junit.jupiter.version}</version>
 			<scope>test</scope>
 		</dependency>
+		<dependency>
+			<!-- EPL 2.0 -->
+			<groupId>org.junit.jupiter</groupId>
+			<artifactId>junit-jupiter-params</artifactId>
+			<version>${junit.jupiter.version}</version>
+			<scope>test</scope>
+		</dependency>
 		<dependency>
 			<!-- Apache 2.0 -->
 			<groupId>com.github.tomakehurst</groupId>
@@ -134,6 +141,20 @@
 				</exclusion>
 			</exclusions>
 		</dependency>
+		<dependency>
+			<!-- GPL 2 with classpath exception -->
+			<groupId>org.openjdk.jmh</groupId>
+			<artifactId>jmh-core</artifactId>
+			<version>1.36</version>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<!-- GPL 2 with classpath exception -->
+			<groupId>org.openjdk.jmh</groupId>
+			<artifactId>jmh-generator-annprocess</artifactId>
+			<version>1.36</version>
+			<scope>test</scope>
+		</dependency>
 		<dependency>
 			<!-- Apache 2.0 -->
 			<groupId>org.apache.logging.log4j</groupId>

diff --git a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplier.java b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplier.java
@@ -1,7 +1,21 @@
+/**
+ * Copyright © 2019 Heinz Nixdorf Chair for Distributed Information Systems, Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package de.uni_jena.cs.fusion.abecto.processor;
 
 import com.google.common.collect.Streams;
-import de.uni_jena.cs.fusion.abecto.Aspect;
 import org.apache.jena.rdf.model.RDFNode;
 import org.apache.jena.rdf.model.Resource;
 import org.apache.jena.rdf.model.ResourceFactory;
@@ -12,20 +26,19 @@
 import java.util.stream.IntStream;
 import java.util.stream.Stream;
 
-public class ComparisonBenchmarkDataSupplier extends PopulationComparisonProcessor {
+public class ComparisonBenchmarkDataSupplier {
 
+    protected static final RDFNode correctValue = ResourceFactory.createTypedLiteral(-1);
     private final int populationSize, totalDatasetCount;
     private final double coverage, errorRate;
-    private final RDFNode correctValue;
     private final RDFNode[] wrongValues;
 
     public ComparisonBenchmarkDataSupplier(int populationSize, int datasetCount, double coverage, double errorRate) {
         this.populationSize = populationSize;
         this.totalDatasetCount = datasetCount;
         this.coverage = coverage;
         this.errorRate = errorRate;
-        // generate correct and wrong values per dataset
-        this.correctValue = ResourceFactory.createTypedLiteral(-1);
+        // generate wrong values per dataset
         // wrongValues[0] should not be used and exists to improve code readability by avoiding -1 shifts later on
         this.wrongValues = new RDFNode[datasetCount + 1];
         for (int i = 0; i <= datasetCount; i++) {
@@ -37,19 +50,17 @@ public Set<Resource> getDatasets() {
         return IntStream.range(0, totalDatasetCount).mapToObj(i -> ResourceFactory.createResource(Integer.toString(i))).collect(Collectors.toSet());
     }
 
-    public Stream<Resource> getResourceKeys(@SuppressWarnings("unused") Aspect aspect, Resource dataset) throws NullPointerException {
+    public Stream<Resource> getResourceKeys(Resource dataset) throws NullPointerException {
         int datasetNumber = Integer.parseInt(dataset.getURI());
         return IntStream.range(0, populationSize).map(i -> i + datasetNumber * populationSize).mapToObj(i -> ResourceFactory.createResource(Integer.toString(i)));
     }
 
     public Map<String, Set<RDFNode>> selectResourceValues(Resource resource, Resource dataset,
-                                                          @SuppressWarnings("unused") Aspect aspect,
                                                           Collection<String> variables) {
 
         int resourceNumber = Integer.parseInt(resource.getURI());
         if (resourceNumber % populationSize >= populationSize * errorRate) {
-            return Collections.singletonMap(variables.iterator().next(),
-                    Collections.singleton(this.correctValue));
+            return Collections.singletonMap(variables.iterator().next(), Collections.singleton(this.correctValue));
         } else {
             int datasetNumber = Integer.parseInt(dataset.getURI());
             return Collections.singletonMap(variables.iterator().next(),
@@ -58,8 +69,7 @@ public Map<String, Set<RDFNode>> selectResourceValues(Resource resource, Resourc
     }
 
     public Map<Resource, Map<String, Set<RDFNode>>> selectResourceValues(Collection<Resource> resources,
-                                                                         Resource dataset,
-                                                                         @SuppressWarnings("unused") Aspect aspect, List<String> variables) {
+                                                                         Resource dataset, List<String> variables) {
         int datasetNumber = Integer.parseInt(dataset.getURI());
 
         Map<Resource, Map<String, Set<RDFNode>>> resourceValues = new HashMap<>();
@@ -68,7 +78,7 @@ public Map<Resource, Map<String, Set<RDFNode>>> selectResourceValues(Collection<
             int resourceNumber = Integer.parseInt(resource.getURI());
             if (resourceNumber % populationSize >= populationSize * errorRate) {
                 resourceValues.put(resource, Collections.singletonMap(variables.iterator().next(),
-                        Collections.singleton(this.correctValue)));
+                        Collections.singleton(correctValue)));
             } else {
                 resourceValues.put(resource, Collections.singletonMap(variables.iterator().next(),
                         Collections.singleton(this.wrongValues[datasetNumber])));
@@ -78,7 +88,8 @@ public Map<Resource, Map<String, Set<RDFNode>>> selectResourceValues(Collection<
     }
 
     private double overlapShare(int overlappingDatasetCount, int totalDatasetCount, double coverage) {
-        return Math.pow(coverage, overlappingDatasetCount - 1) * Math.pow(1 - coverage, totalDatasetCount - overlappingDatasetCount);
+        return Math.pow(coverage, overlappingDatasetCount - 1) * Math.pow(1 - coverage,
+                totalDatasetCount - overlappingDatasetCount);
     }
 
 
@@ -97,8 +108,7 @@ public Stream<List<Resource>> getCorrespondenceGroups() {
                 (int) (populationSize * errorRate), (int) (populationSize * (1 - errorRate)));
 
         // join cases streams
-        @SuppressWarnings("unchecked")
-        Stream<List<Resource>>[] casesStreams = new Stream[(1 << totalDatasetCount) * 2];
+        @SuppressWarnings("unchecked") Stream<List<Resource>>[] casesStreams = new Stream[(1 << totalDatasetCount) * 2];
         System.arraycopy(errorCasesStreams, 0, casesStreams, 0, 1 << totalDatasetCount);
         System.arraycopy(correctCasesStreams, 0, casesStreams, 1 << totalDatasetCount, 1 << totalDatasetCount);
         return Streams.concat(casesStreams);
@@ -119,19 +129,17 @@ private Stream<List<Resource>>[] generateCasesStream(double[] overlapShare, int
         int[] nextId = new int[totalDatasetCount];
         Arrays.fill(nextId, idOffset);
 
-        @SuppressWarnings("unchecked")
-        Stream<List<Resource>>[] casesStreams = new Stream[1 << totalDatasetCount];
+        @SuppressWarnings("unchecked") Stream<List<Resource>>[] casesStreams = new Stream[1 << totalDatasetCount];
         // iterate through all subsets represented by the bits of an int, 0 = not contained, 1 = contained
-        for (int coveredDatasetsBits = 0; coveredDatasetsBits < 1 << totalDatasetCount /* = 2^{datasetCount}
-         */; coveredDatasetsBits++) {
+        int subsetCount = 1 << totalDatasetCount; // = 2^{datasetCount}
+        for (int coveredDatasetsBits = 0; coveredDatasetsBits < subsetCount; coveredDatasetsBits++) {
             int coveredDatasetsCount = Integer.bitCount(coveredDatasetsBits);
 
 
             if (coveredDatasetsCount >= 2) {
-                // get array of numbers of covered datasets
+                // get array of covered datasets ids
                 int[] coveredDatasets = new int[coveredDatasetsCount];
-                int i = 0;
-                for (int dataset = 0; dataset < totalDatasetCount; dataset++) {
+                for (int dataset = 0, i = 0; dataset < totalDatasetCount; dataset++) {
                     if ((coveredDatasetsBits & (1 << dataset  /* = 2^{dataset} */)) != 0) {
                         coveredDatasets[i++] = dataset;
                     }
@@ -140,7 +148,8 @@ private Stream<List<Resource>>[] generateCasesStream(double[] overlapShare, int
                 // calculate number of cases with covered datasets
                 int cases = (int) (overlapShare[coveredDatasetsCount] * totalCases);
                 // generate stream of cases with covered datasets
-                casesStreams[coveredDatasetsBits] = Stream.generate(new CorrespondenceGroupSupplier(coveredDatasets, nextId)).limit(cases);
+                casesStreams[coveredDatasetsBits] = Stream.generate(new CorrespondenceGroupSupplier(coveredDatasets,
+                        nextId)).limit(cases);
             } else {
                 // empty stream for combinations without correspondences
                 casesStreams[coveredDatasetsBits] = Stream.empty();
@@ -163,7 +172,8 @@ public List<Resource> get() {
             List<Resource> resources = Arrays.asList(new Resource[coveredDatasets.length]);
             for (int i = 0; i < coveredDatasets.length; i++) {
                 int coveredDataset = coveredDatasets[i];
-                resources.set(i, ResourceFactory.createResource(Integer.toString(nextId[coveredDataset]++ + coveredDataset * populationSize)));
+                resources.set(i,
+                        ResourceFactory.createResource(Integer.toString(nextId[coveredDataset]++ + coveredDataset * populationSize)));
             }
             return resources;
         }

diff --git a/...test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplierTest.java b/...test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplierTest.java
@@ -0,0 +1,108 @@
+/**
+ * Copyright © 2019 Heinz Nixdorf Chair for Distributed Information Systems, Friedrich Schiller University Jena
+ * (http://www.fusion.uni-jena.de/)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package de.uni_jena.cs.fusion.abecto.processor;
+
+import org.apache.jena.rdf.model.Resource;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class ComparisonBenchmarkDataSupplierTest {
+
+    @ParameterizedTest(name = "[{index}] Data generation for {0} datasources.")
+    @ValueSource(ints = {2, 3, 4, 5, 6, 7, 8, 9, 10})
+    public void supplierTest(int datasetCount) {
+        int populationSize = 1000;
+        double coverage = 0.75, errorRate = 0.1;
+        ComparisonBenchmarkDataSupplier supplier = new ComparisonBenchmarkDataSupplier(populationSize, datasetCount,
+                coverage, errorRate);
+
+        // check dataset count
+        List<Resource> dataset = new ArrayList<>(supplier.getDatasets());
+        Assertions.assertEquals(datasetCount, dataset.size());
+
+        // check population size
+        Set<Resource>[] population = new Set[datasetCount];
+        for (int i = 0; i < datasetCount; i++) {
+            population[i] = supplier.getResourceKeys(dataset.get(i)).collect(Collectors.toSet());
+            Assertions.assertEquals(populationSize, population[i].size());
+        }
+
+        // check values
+        for (int i = 0; i < datasetCount; i++) {
+            int rightValuesSingle = 0, wrongValuesSingle = 0, rightValuesCollection = 0, wrongValuesCollection = 0;
+            for (Resource resource : population[i]) {
+                // single resource method
+                Assertions.assertEquals(1, supplier.selectResourceValues(resource, dataset.get(i),
+                        Collections.singletonList("var")).get("var").size());
+                if (supplier.selectResourceValues(resource, dataset.get(i), Collections.singletonList("var")).get(
+                        "var").contains(ComparisonBenchmarkDataSupplier.correctValue)) {
+                    rightValuesSingle++;
+                } else {
+                    wrongValuesSingle++;
+                }
+
+                // resource collection method
+                Assertions.assertEquals(1, supplier.selectResourceValues(Collections.singletonList(resource),
+                        dataset.get(i), Collections.singletonList("var")).get(resource).size());
+                Assertions.assertEquals(1, supplier.selectResourceValues(Collections.singletonList(resource),
+                        dataset.get(i), Collections.singletonList("var")).get(resource).get("var").size());
+                if (supplier.selectResourceValues(Collections.singletonList(resource), dataset.get(i),
+                        Collections.singletonList("var")).get(resource).get("var").contains(ComparisonBenchmarkDataSupplier.correctValue)) {
+                    rightValuesCollection++;
+                } else {
+                    wrongValuesCollection++;
+                }
+            }
+            Assertions.assertEquals((int) (populationSize * errorRate), wrongValuesSingle);
+            Assertions.assertEquals((int) (populationSize * errorRate), wrongValuesCollection);
+            Assertions.assertEquals(populationSize - (int) (populationSize * errorRate), rightValuesSingle);
+            Assertions.assertEquals(populationSize - (int) (populationSize * errorRate), rightValuesCollection);
+        }
+
+
+        // pairwise check coverage
+        int[][] pairs = new int[datasetCount-1][datasetCount];
+        supplier.getCorrespondenceGroups().forEach(resources -> {
+            for (int i = 0; i < datasetCount; i++) {
+                if (resources.stream().anyMatch(population[i]::contains)) {
+                    for (int j = i + 1; j < datasetCount; j++) {
+                        if (resources.stream().anyMatch(population[j]::contains)) {
+                            pairs[i][j]++;
+                        }
+                    }
+                }
+            }
+        });
+        int expectedCoverageMax = (int) (populationSize * coverage);
+        // allow underrun by 1 per overlap case the pair is involved (2^{n-2}) and per right / wrong value case (x2)
+        int expectedCoverageMin = (int) (expectedCoverageMax - (Math.pow(2, datasetCount - 2) * 2));
+        int exampleCoverage = pairs[0][1];
+        for (int i = 0; i < datasetCount; i++) {
+            for (int j = i + 1; j < datasetCount; j++) {
+                // check for equal coverage of all pairs
+                Assertions.assertEquals(exampleCoverage,pairs[i][j]);
+                //check for a range to accept underrun due to rounding
+                Assertions.assertTrue(expectedCoverageMin <= pairs[i][j] && pairs[i][j] <= expectedCoverageMax,
+                        String.format("%s <= %s <= %s", expectedCoverageMin, pairs[i][j], expectedCoverageMax));
+            }
+        }
+    }
+}
diff --git a/...t/java/de/uni_jena/cs/fusion/abecto/processor/PopulationComparisonProcessorBenchmark.java b/...t/java/de/uni_jena/cs/fusion/abecto/processor/PopulationComparisonProcessorBenchmark.java
@@ -1,3 +1,18 @@
+/**
+ * Copyright © 2019 Heinz Nixdorf Chair for Distributed Information Systems, Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package de.uni_jena.cs.fusion.abecto.processor;
 
 import de.uni_jena.cs.fusion.abecto.Aspect;
@@ -9,9 +24,9 @@
 
 public class PopulationComparisonProcessorBenchmark {
 
-    private class IndependentPopulationComparisonProcessor extends PopulationComparisonProcessor {
+    private static class IndependentPopulationComparisonProcessor extends PopulationComparisonProcessor {
 
-        private ComparisonBenchmarkDataSupplier dataSupplier;
+        private final ComparisonBenchmarkDataSupplier dataSupplier;
 
         public IndependentPopulationComparisonProcessor(int populationSize, int datasetCount, double coverage) {
             this.dataSupplier = new ComparisonBenchmarkDataSupplier(populationSize, datasetCount, coverage, 0);
@@ -24,7 +39,7 @@ public Set<Resource> getDatasets() {
 
         @Override
         public Stream<Resource> getResourceKeys(Aspect aspect, Resource dataset) throws NullPointerException {
-            return this.dataSupplier.getResourceKeys(aspect, dataset);
+            return this.dataSupplier.getResourceKeys(dataset);
         }
 
         public Stream<List<Resource>> getCorrespondenceGroups() {