diff --git a/pom.xml b/pom.xml
index 8e589f1..045ad3a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -121,6 +121,13 @@
${junit.jupiter.version}
test
+
+
+ org.junit.jupiter
+ junit-jupiter-params
+ ${junit.jupiter.version}
+ test
+
com.github.tomakehurst
@@ -134,6 +141,20 @@
+
+
+ org.openjdk.jmh
+ jmh-core
+ 1.36
+ test
+
+
+
+ org.openjdk.jmh
+ jmh-generator-annprocess
+ 1.36
+ test
+
org.apache.logging.log4j
diff --git a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplier.java b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplier.java
index 28ca91d..a0c65b3 100644
--- a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplier.java
+++ b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplier.java
@@ -1,7 +1,21 @@
+/**
+ * Copyright © 2019 Heinz Nixdorf Chair for Distributed Information Systems, Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package de.uni_jena.cs.fusion.abecto.processor;
import com.google.common.collect.Streams;
-import de.uni_jena.cs.fusion.abecto.Aspect;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;
@@ -12,11 +26,11 @@
import java.util.stream.IntStream;
import java.util.stream.Stream;
-public class ComparisonBenchmarkDataSupplier extends PopulationComparisonProcessor {
+public class ComparisonBenchmarkDataSupplier {
+ protected static final RDFNode correctValue = ResourceFactory.createTypedLiteral(-1);
private final int populationSize, totalDatasetCount;
private final double coverage, errorRate;
- private final RDFNode correctValue;
private final RDFNode[] wrongValues;
public ComparisonBenchmarkDataSupplier(int populationSize, int datasetCount, double coverage, double errorRate) {
@@ -24,8 +38,7 @@ public ComparisonBenchmarkDataSupplier(int populationSize, int datasetCount, dou
this.totalDatasetCount = datasetCount;
this.coverage = coverage;
this.errorRate = errorRate;
- // generate correct and wrong values per dataset
- this.correctValue = ResourceFactory.createTypedLiteral(-1);
+ // generate wrong values per dataset
// wrongValues[0] should not be used and exists to improve code readability by avoiding -1 shifts later on
this.wrongValues = new RDFNode[datasetCount + 1];
for (int i = 0; i <= datasetCount; i++) {
@@ -37,19 +50,17 @@ public Set getDatasets() {
return IntStream.range(0, totalDatasetCount).mapToObj(i -> ResourceFactory.createResource(Integer.toString(i))).collect(Collectors.toSet());
}
- public Stream getResourceKeys(@SuppressWarnings("unused") Aspect aspect, Resource dataset) throws NullPointerException {
+ public Stream getResourceKeys(Resource dataset) throws NullPointerException {
int datasetNumber = Integer.parseInt(dataset.getURI());
return IntStream.range(0, populationSize).map(i -> i + datasetNumber * populationSize).mapToObj(i -> ResourceFactory.createResource(Integer.toString(i)));
}
public Map> selectResourceValues(Resource resource, Resource dataset,
- @SuppressWarnings("unused") Aspect aspect,
Collection variables) {
int resourceNumber = Integer.parseInt(resource.getURI());
if (resourceNumber % populationSize >= populationSize * errorRate) {
- return Collections.singletonMap(variables.iterator().next(),
- Collections.singleton(this.correctValue));
+ return Collections.singletonMap(variables.iterator().next(), Collections.singleton(this.correctValue));
} else {
int datasetNumber = Integer.parseInt(dataset.getURI());
return Collections.singletonMap(variables.iterator().next(),
@@ -58,8 +69,7 @@ public Map> selectResourceValues(Resource resource, Resourc
}
public Map>> selectResourceValues(Collection resources,
- Resource dataset,
- @SuppressWarnings("unused") Aspect aspect, List variables) {
+ Resource dataset, List variables) {
int datasetNumber = Integer.parseInt(dataset.getURI());
Map>> resourceValues = new HashMap<>();
@@ -68,7 +78,7 @@ public Map>> selectResourceValues(Collection<
int resourceNumber = Integer.parseInt(resource.getURI());
if (resourceNumber % populationSize >= populationSize * errorRate) {
resourceValues.put(resource, Collections.singletonMap(variables.iterator().next(),
- Collections.singleton(this.correctValue)));
+ Collections.singleton(correctValue)));
} else {
resourceValues.put(resource, Collections.singletonMap(variables.iterator().next(),
Collections.singleton(this.wrongValues[datasetNumber])));
@@ -78,7 +88,8 @@ public Map>> selectResourceValues(Collection<
}
private double overlapShare(int overlappingDatasetCount, int totalDatasetCount, double coverage) {
- return Math.pow(coverage, overlappingDatasetCount - 1) * Math.pow(1 - coverage, totalDatasetCount - overlappingDatasetCount);
+ return Math.pow(coverage, overlappingDatasetCount - 1) * Math.pow(1 - coverage,
+ totalDatasetCount - overlappingDatasetCount);
}
@@ -97,8 +108,7 @@ public Stream> getCorrespondenceGroups() {
(int) (populationSize * errorRate), (int) (populationSize * (1 - errorRate)));
// join cases streams
- @SuppressWarnings("unchecked")
- Stream>[] casesStreams = new Stream[(1 << totalDatasetCount) * 2];
+ @SuppressWarnings("unchecked") Stream>[] casesStreams = new Stream[(1 << totalDatasetCount) * 2];
System.arraycopy(errorCasesStreams, 0, casesStreams, 0, 1 << totalDatasetCount);
System.arraycopy(correctCasesStreams, 0, casesStreams, 1 << totalDatasetCount, 1 << totalDatasetCount);
return Streams.concat(casesStreams);
@@ -119,19 +129,17 @@ private Stream>[] generateCasesStream(double[] overlapShare, int
int[] nextId = new int[totalDatasetCount];
Arrays.fill(nextId, idOffset);
- @SuppressWarnings("unchecked")
- Stream>[] casesStreams = new Stream[1 << totalDatasetCount];
+ @SuppressWarnings("unchecked") Stream>[] casesStreams = new Stream[1 << totalDatasetCount];
// iterate through all subsets represented by the bits of an int, 0 = not contained, 1 = contained
- for (int coveredDatasetsBits = 0; coveredDatasetsBits < 1 << totalDatasetCount /* = 2^{datasetCount}
- */; coveredDatasetsBits++) {
+ int subsetCount = 1 << totalDatasetCount; // = 2^{datasetCount}
+ for (int coveredDatasetsBits = 0; coveredDatasetsBits < subsetCount; coveredDatasetsBits++) {
int coveredDatasetsCount = Integer.bitCount(coveredDatasetsBits);
if (coveredDatasetsCount >= 2) {
- // get array of numbers of covered datasets
+ // get array of covered datasets ids
int[] coveredDatasets = new int[coveredDatasetsCount];
- int i = 0;
- for (int dataset = 0; dataset < totalDatasetCount; dataset++) {
+ for (int dataset = 0, i = 0; dataset < totalDatasetCount; dataset++) {
if ((coveredDatasetsBits & (1 << dataset /* = 2^{dataset} */)) != 0) {
coveredDatasets[i++] = dataset;
}
@@ -140,7 +148,8 @@ private Stream>[] generateCasesStream(double[] overlapShare, int
// calculate number of cases with covered datasets
int cases = (int) (overlapShare[coveredDatasetsCount] * totalCases);
// generate stream of cases with covered datasets
- casesStreams[coveredDatasetsBits] = Stream.generate(new CorrespondenceGroupSupplier(coveredDatasets, nextId)).limit(cases);
+ casesStreams[coveredDatasetsBits] = Stream.generate(new CorrespondenceGroupSupplier(coveredDatasets,
+ nextId)).limit(cases);
} else {
// empty stream for combinations without correspondences
casesStreams[coveredDatasetsBits] = Stream.empty();
@@ -163,7 +172,8 @@ public List get() {
List resources = Arrays.asList(new Resource[coveredDatasets.length]);
for (int i = 0; i < coveredDatasets.length; i++) {
int coveredDataset = coveredDatasets[i];
- resources.set(i, ResourceFactory.createResource(Integer.toString(nextId[coveredDataset]++ + coveredDataset * populationSize)));
+ resources.set(i,
+ ResourceFactory.createResource(Integer.toString(nextId[coveredDataset]++ + coveredDataset * populationSize)));
}
return resources;
}
diff --git a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplierTest.java b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplierTest.java
new file mode 100644
index 0000000..442b1d1
--- /dev/null
+++ b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplierTest.java
@@ -0,0 +1,108 @@
+/**
+ * Copyright © 2019 Heinz Nixdorf Chair for Distributed Information Systems, Friedrich Schiller University Jena
+ * (http://www.fusion.uni-jena.de/)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+ * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+package de.uni_jena.cs.fusion.abecto.processor;
+
+import org.apache.jena.rdf.model.Resource;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class ComparisonBenchmarkDataSupplierTest {
+
+ @ParameterizedTest(name = "[{index}] Data generation for {0} datasources.")
+ @ValueSource(ints = {2, 3, 4, 5, 6, 7, 8, 9, 10})
+ public void supplierTest(int datasetCount) {
+ int populationSize = 1000;
+ double coverage = 0.75, errorRate = 0.1;
+ ComparisonBenchmarkDataSupplier supplier = new ComparisonBenchmarkDataSupplier(populationSize, datasetCount,
+ coverage, errorRate);
+
+ // check dataset count
+ List dataset = new ArrayList<>(supplier.getDatasets());
+ Assertions.assertEquals(datasetCount, dataset.size());
+
+ // check population size
+ Set[] population = new Set[datasetCount];
+ for (int i = 0; i < datasetCount; i++) {
+ population[i] = supplier.getResourceKeys(dataset.get(i)).collect(Collectors.toSet());
+ Assertions.assertEquals(populationSize, population[i].size());
+ }
+
+ // check values
+ for (int i = 0; i < datasetCount; i++) {
+ int rightValuesSingle = 0, wrongValuesSingle = 0, rightValuesCollection = 0, wrongValuesCollection = 0;
+ for (Resource resource : population[i]) {
+ // single resource method
+ Assertions.assertEquals(1, supplier.selectResourceValues(resource, dataset.get(i),
+ Collections.singletonList("var")).get("var").size());
+ if (supplier.selectResourceValues(resource, dataset.get(i), Collections.singletonList("var")).get(
+ "var").contains(ComparisonBenchmarkDataSupplier.correctValue)) {
+ rightValuesSingle++;
+ } else {
+ wrongValuesSingle++;
+ }
+
+ // resource collection method
+ Assertions.assertEquals(1, supplier.selectResourceValues(Collections.singletonList(resource),
+ dataset.get(i), Collections.singletonList("var")).get(resource).size());
+ Assertions.assertEquals(1, supplier.selectResourceValues(Collections.singletonList(resource),
+ dataset.get(i), Collections.singletonList("var")).get(resource).get("var").size());
+ if (supplier.selectResourceValues(Collections.singletonList(resource), dataset.get(i),
+ Collections.singletonList("var")).get(resource).get("var").contains(ComparisonBenchmarkDataSupplier.correctValue)) {
+ rightValuesCollection++;
+ } else {
+ wrongValuesCollection++;
+ }
+ }
+ Assertions.assertEquals((int) (populationSize * errorRate), wrongValuesSingle);
+ Assertions.assertEquals((int) (populationSize * errorRate), wrongValuesCollection);
+ Assertions.assertEquals(populationSize - (int) (populationSize * errorRate), rightValuesSingle);
+ Assertions.assertEquals(populationSize - (int) (populationSize * errorRate), rightValuesCollection);
+ }
+
+
+ // pairwise check coverage
+ int[][] pairs = new int[datasetCount-1][datasetCount];
+ supplier.getCorrespondenceGroups().forEach(resources -> {
+ for (int i = 0; i < datasetCount; i++) {
+ if (resources.stream().anyMatch(population[i]::contains)) {
+ for (int j = i + 1; j < datasetCount; j++) {
+ if (resources.stream().anyMatch(population[j]::contains)) {
+ pairs[i][j]++;
+ }
+ }
+ }
+ }
+ });
+ int expectedCoverageMax = (int) (populationSize * coverage);
+ // allow underrun by 1 per overlap case the pair is involved (2^{n-2}) and per right / wrong value case (x2)
+ int expectedCoverageMin = (int) (expectedCoverageMax - (Math.pow(2, datasetCount - 2) * 2));
+ int exampleCoverage = pairs[0][1];
+ for (int i = 0; i < datasetCount; i++) {
+ for (int j = i + 1; j < datasetCount; j++) {
+ // check for equal coverage of all pairs
+ Assertions.assertEquals(exampleCoverage,pairs[i][j]);
+ //check for a range to accept underrun due to rounding
+ Assertions.assertTrue(expectedCoverageMin <= pairs[i][j] && pairs[i][j] <= expectedCoverageMax,
+ String.format("%s <= %s <= %s", expectedCoverageMin, pairs[i][j], expectedCoverageMax));
+ }
+ }
+ }
+}
diff --git a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PopulationComparisonProcessorBenchmark.java b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PopulationComparisonProcessorBenchmark.java
index 8b43ec4..483b577 100644
--- a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PopulationComparisonProcessorBenchmark.java
+++ b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PopulationComparisonProcessorBenchmark.java
@@ -1,3 +1,18 @@
+/**
+ * Copyright © 2019 Heinz Nixdorf Chair for Distributed Information Systems, Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package de.uni_jena.cs.fusion.abecto.processor;
import de.uni_jena.cs.fusion.abecto.Aspect;
@@ -9,9 +24,9 @@
public class PopulationComparisonProcessorBenchmark {
- private class IndependentPopulationComparisonProcessor extends PopulationComparisonProcessor {
+ private static class IndependentPopulationComparisonProcessor extends PopulationComparisonProcessor {
- private ComparisonBenchmarkDataSupplier dataSupplier;
+ private final ComparisonBenchmarkDataSupplier dataSupplier;
public IndependentPopulationComparisonProcessor(int populationSize, int datasetCount, double coverage) {
this.dataSupplier = new ComparisonBenchmarkDataSupplier(populationSize, datasetCount, coverage, 0);
@@ -24,7 +39,7 @@ public Set getDatasets() {
@Override
public Stream getResourceKeys(Aspect aspect, Resource dataset) throws NullPointerException {
- return this.dataSupplier.getResourceKeys(aspect, dataset);
+ return this.dataSupplier.getResourceKeys(dataset);
}
public Stream> getCorrespondenceGroups() {
diff --git a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PropertyComparisonProcessorBenchmark.java b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PropertyComparisonProcessorBenchmark.java
index cfb8600..d667a86 100644
--- a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PropertyComparisonProcessorBenchmark.java
+++ b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PropertyComparisonProcessorBenchmark.java
@@ -1,3 +1,18 @@
+/**
+ * Copyright © 2019 Heinz Nixdorf Chair for Distributed Information Systems, Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package de.uni_jena.cs.fusion.abecto.processor;
import de.uni_jena.cs.fusion.abecto.Aspect;
@@ -9,9 +24,9 @@
public class PropertyComparisonProcessorBenchmark {
- private class IndependentPropertyComparisonProcessor extends PropertyComparisonProcessor {
+ private static class IndependentPropertyComparisonProcessor extends PropertyComparisonProcessor {
- private ComparisonBenchmarkDataSupplier dataSupplier;
+ private final ComparisonBenchmarkDataSupplier dataSupplier;
public IndependentPropertyComparisonProcessor(int populationSize, int datasetCount, double coverage) {
this.dataSupplier = new ComparisonBenchmarkDataSupplier(populationSize, datasetCount, coverage, 0);
@@ -23,21 +38,21 @@ public Set getDatasets() {
}
@Override
- public Stream getResourceKeys(Aspect aspect, Resource dataset) throws NullPointerException {
- return this.dataSupplier.getResourceKeys(aspect, dataset);
+ public Stream getResourceKeys(@SuppressWarnings("unused") Aspect aspect, Resource dataset) throws NullPointerException {
+ return this.dataSupplier.getResourceKeys(dataset);
}
public Map> selectResourceValues(Resource resource, Resource dataset,
@SuppressWarnings("unused") Aspect aspect,
Collection variables) {
- return this.dataSupplier.selectResourceValues(resource, dataset, aspect, variables);
+ return this.dataSupplier.selectResourceValues(resource, dataset, variables);
}
public Map>> selectResourceValues(Collection resources,
Resource dataset,
@SuppressWarnings("unused") Aspect aspect,
List variables) {
- return this.dataSupplier.selectResourceValues(resources, dataset, aspect, variables);
+ return this.dataSupplier.selectResourceValues(resources, dataset, variables);
}