diff --git a/pom.xml b/pom.xml index 8e589f1..045ad3a 100644 --- a/pom.xml +++ b/pom.xml @@ -121,6 +121,13 @@ ${junit.jupiter.version} test + + + org.junit.jupiter + junit-jupiter-params + ${junit.jupiter.version} + test + com.github.tomakehurst @@ -134,6 +141,20 @@ + + + org.openjdk.jmh + jmh-core + 1.36 + test + + + + org.openjdk.jmh + jmh-generator-annprocess + 1.36 + test + org.apache.logging.log4j diff --git a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplier.java b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplier.java index 28ca91d..a0c65b3 100644 --- a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplier.java +++ b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplier.java @@ -1,7 +1,21 @@ +/** + * Copyright © 2019 Heinz Nixdorf Chair for Distributed Information Systems, Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package de.uni_jena.cs.fusion.abecto.processor; import com.google.common.collect.Streams; -import de.uni_jena.cs.fusion.abecto.Aspect; import org.apache.jena.rdf.model.RDFNode; import org.apache.jena.rdf.model.Resource; import org.apache.jena.rdf.model.ResourceFactory; @@ -12,11 +26,11 @@ import java.util.stream.IntStream; import java.util.stream.Stream; -public class ComparisonBenchmarkDataSupplier extends PopulationComparisonProcessor { +public class ComparisonBenchmarkDataSupplier { + protected static final RDFNode correctValue = ResourceFactory.createTypedLiteral(-1); private final int populationSize, totalDatasetCount; private final double coverage, errorRate; - private final RDFNode correctValue; private final RDFNode[] wrongValues; public ComparisonBenchmarkDataSupplier(int populationSize, int datasetCount, double coverage, double errorRate) { @@ -24,8 +38,7 @@ public ComparisonBenchmarkDataSupplier(int populationSize, int datasetCount, dou this.totalDatasetCount = datasetCount; this.coverage = coverage; this.errorRate = errorRate; - // generate correct and wrong values per dataset - this.correctValue = ResourceFactory.createTypedLiteral(-1); + // generate wrong values per dataset // wrongValues[0] should not be used and exists to improve code readability by avoiding -1 shifts later on this.wrongValues = new RDFNode[datasetCount + 1]; for (int i = 0; i <= datasetCount; i++) { @@ -37,19 +50,17 @@ public Set getDatasets() { return IntStream.range(0, totalDatasetCount).mapToObj(i -> ResourceFactory.createResource(Integer.toString(i))).collect(Collectors.toSet()); } - public Stream getResourceKeys(@SuppressWarnings("unused") Aspect aspect, Resource dataset) throws NullPointerException { + public Stream getResourceKeys(Resource dataset) throws NullPointerException { int datasetNumber = Integer.parseInt(dataset.getURI()); return IntStream.range(0, populationSize).map(i -> i + datasetNumber * populationSize).mapToObj(i -> ResourceFactory.createResource(Integer.toString(i))); } public Map> selectResourceValues(Resource resource, Resource dataset, - @SuppressWarnings("unused") Aspect aspect, Collection variables) { int resourceNumber = Integer.parseInt(resource.getURI()); if (resourceNumber % populationSize >= populationSize * errorRate) { - return Collections.singletonMap(variables.iterator().next(), - Collections.singleton(this.correctValue)); + return Collections.singletonMap(variables.iterator().next(), Collections.singleton(this.correctValue)); } else { int datasetNumber = Integer.parseInt(dataset.getURI()); return Collections.singletonMap(variables.iterator().next(), @@ -58,8 +69,7 @@ public Map> selectResourceValues(Resource resource, Resourc } public Map>> selectResourceValues(Collection resources, - Resource dataset, - @SuppressWarnings("unused") Aspect aspect, List variables) { + Resource dataset, List variables) { int datasetNumber = Integer.parseInt(dataset.getURI()); Map>> resourceValues = new HashMap<>(); @@ -68,7 +78,7 @@ public Map>> selectResourceValues(Collection< int resourceNumber = Integer.parseInt(resource.getURI()); if (resourceNumber % populationSize >= populationSize * errorRate) { resourceValues.put(resource, Collections.singletonMap(variables.iterator().next(), - Collections.singleton(this.correctValue))); + Collections.singleton(correctValue))); } else { resourceValues.put(resource, Collections.singletonMap(variables.iterator().next(), Collections.singleton(this.wrongValues[datasetNumber]))); @@ -78,7 +88,8 @@ public Map>> selectResourceValues(Collection< } private double overlapShare(int overlappingDatasetCount, int totalDatasetCount, double coverage) { - return Math.pow(coverage, overlappingDatasetCount - 1) * Math.pow(1 - coverage, totalDatasetCount - overlappingDatasetCount); + return Math.pow(coverage, overlappingDatasetCount - 1) * Math.pow(1 - coverage, + totalDatasetCount - overlappingDatasetCount); } @@ -97,8 +108,7 @@ public Stream> getCorrespondenceGroups() { (int) (populationSize * errorRate), (int) (populationSize * (1 - errorRate))); // join cases streams - @SuppressWarnings("unchecked") - Stream>[] casesStreams = new Stream[(1 << totalDatasetCount) * 2]; + @SuppressWarnings("unchecked") Stream>[] casesStreams = new Stream[(1 << totalDatasetCount) * 2]; System.arraycopy(errorCasesStreams, 0, casesStreams, 0, 1 << totalDatasetCount); System.arraycopy(correctCasesStreams, 0, casesStreams, 1 << totalDatasetCount, 1 << totalDatasetCount); return Streams.concat(casesStreams); @@ -119,19 +129,17 @@ private Stream>[] generateCasesStream(double[] overlapShare, int int[] nextId = new int[totalDatasetCount]; Arrays.fill(nextId, idOffset); - @SuppressWarnings("unchecked") - Stream>[] casesStreams = new Stream[1 << totalDatasetCount]; + @SuppressWarnings("unchecked") Stream>[] casesStreams = new Stream[1 << totalDatasetCount]; // iterate through all subsets represented by the bits of an int, 0 = not contained, 1 = contained - for (int coveredDatasetsBits = 0; coveredDatasetsBits < 1 << totalDatasetCount /* = 2^{datasetCount} - */; coveredDatasetsBits++) { + int subsetCount = 1 << totalDatasetCount; // = 2^{datasetCount} + for (int coveredDatasetsBits = 0; coveredDatasetsBits < subsetCount; coveredDatasetsBits++) { int coveredDatasetsCount = Integer.bitCount(coveredDatasetsBits); if (coveredDatasetsCount >= 2) { - // get array of numbers of covered datasets + // get array of covered datasets ids int[] coveredDatasets = new int[coveredDatasetsCount]; - int i = 0; - for (int dataset = 0; dataset < totalDatasetCount; dataset++) { + for (int dataset = 0, i = 0; dataset < totalDatasetCount; dataset++) { if ((coveredDatasetsBits & (1 << dataset /* = 2^{dataset} */)) != 0) { coveredDatasets[i++] = dataset; } @@ -140,7 +148,8 @@ private Stream>[] generateCasesStream(double[] overlapShare, int // calculate number of cases with covered datasets int cases = (int) (overlapShare[coveredDatasetsCount] * totalCases); // generate stream of cases with covered datasets - casesStreams[coveredDatasetsBits] = Stream.generate(new CorrespondenceGroupSupplier(coveredDatasets, nextId)).limit(cases); + casesStreams[coveredDatasetsBits] = Stream.generate(new CorrespondenceGroupSupplier(coveredDatasets, + nextId)).limit(cases); } else { // empty stream for combinations without correspondences casesStreams[coveredDatasetsBits] = Stream.empty(); @@ -163,7 +172,8 @@ public List get() { List resources = Arrays.asList(new Resource[coveredDatasets.length]); for (int i = 0; i < coveredDatasets.length; i++) { int coveredDataset = coveredDatasets[i]; - resources.set(i, ResourceFactory.createResource(Integer.toString(nextId[coveredDataset]++ + coveredDataset * populationSize))); + resources.set(i, + ResourceFactory.createResource(Integer.toString(nextId[coveredDataset]++ + coveredDataset * populationSize))); } return resources; } diff --git a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplierTest.java b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplierTest.java new file mode 100644 index 0000000..442b1d1 --- /dev/null +++ b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/ComparisonBenchmarkDataSupplierTest.java @@ -0,0 +1,108 @@ +/** + * Copyright © 2019 Heinz Nixdorf Chair for Distributed Information Systems, Friedrich Schiller University Jena + * (http://www.fusion.uni-jena.de/) + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on + * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ +package de.uni_jena.cs.fusion.abecto.processor; + +import org.apache.jena.rdf.model.Resource; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +public class ComparisonBenchmarkDataSupplierTest { + + @ParameterizedTest(name = "[{index}] Data generation for {0} datasources.") + @ValueSource(ints = {2, 3, 4, 5, 6, 7, 8, 9, 10}) + public void supplierTest(int datasetCount) { + int populationSize = 1000; + double coverage = 0.75, errorRate = 0.1; + ComparisonBenchmarkDataSupplier supplier = new ComparisonBenchmarkDataSupplier(populationSize, datasetCount, + coverage, errorRate); + + // check dataset count + List dataset = new ArrayList<>(supplier.getDatasets()); + Assertions.assertEquals(datasetCount, dataset.size()); + + // check population size + Set[] population = new Set[datasetCount]; + for (int i = 0; i < datasetCount; i++) { + population[i] = supplier.getResourceKeys(dataset.get(i)).collect(Collectors.toSet()); + Assertions.assertEquals(populationSize, population[i].size()); + } + + // check values + for (int i = 0; i < datasetCount; i++) { + int rightValuesSingle = 0, wrongValuesSingle = 0, rightValuesCollection = 0, wrongValuesCollection = 0; + for (Resource resource : population[i]) { + // single resource method + Assertions.assertEquals(1, supplier.selectResourceValues(resource, dataset.get(i), + Collections.singletonList("var")).get("var").size()); + if (supplier.selectResourceValues(resource, dataset.get(i), Collections.singletonList("var")).get( + "var").contains(ComparisonBenchmarkDataSupplier.correctValue)) { + rightValuesSingle++; + } else { + wrongValuesSingle++; + } + + // resource collection method + Assertions.assertEquals(1, supplier.selectResourceValues(Collections.singletonList(resource), + dataset.get(i), Collections.singletonList("var")).get(resource).size()); + Assertions.assertEquals(1, supplier.selectResourceValues(Collections.singletonList(resource), + dataset.get(i), Collections.singletonList("var")).get(resource).get("var").size()); + if (supplier.selectResourceValues(Collections.singletonList(resource), dataset.get(i), + Collections.singletonList("var")).get(resource).get("var").contains(ComparisonBenchmarkDataSupplier.correctValue)) { + rightValuesCollection++; + } else { + wrongValuesCollection++; + } + } + Assertions.assertEquals((int) (populationSize * errorRate), wrongValuesSingle); + Assertions.assertEquals((int) (populationSize * errorRate), wrongValuesCollection); + Assertions.assertEquals(populationSize - (int) (populationSize * errorRate), rightValuesSingle); + Assertions.assertEquals(populationSize - (int) (populationSize * errorRate), rightValuesCollection); + } + + + // pairwise check coverage + int[][] pairs = new int[datasetCount-1][datasetCount]; + supplier.getCorrespondenceGroups().forEach(resources -> { + for (int i = 0; i < datasetCount; i++) { + if (resources.stream().anyMatch(population[i]::contains)) { + for (int j = i + 1; j < datasetCount; j++) { + if (resources.stream().anyMatch(population[j]::contains)) { + pairs[i][j]++; + } + } + } + } + }); + int expectedCoverageMax = (int) (populationSize * coverage); + // allow underrun by 1 per overlap case the pair is involved (2^{n-2}) and per right / wrong value case (x2) + int expectedCoverageMin = (int) (expectedCoverageMax - (Math.pow(2, datasetCount - 2) * 2)); + int exampleCoverage = pairs[0][1]; + for (int i = 0; i < datasetCount; i++) { + for (int j = i + 1; j < datasetCount; j++) { + // check for equal coverage of all pairs + Assertions.assertEquals(exampleCoverage,pairs[i][j]); + //check for a range to accept underrun due to rounding + Assertions.assertTrue(expectedCoverageMin <= pairs[i][j] && pairs[i][j] <= expectedCoverageMax, + String.format("%s <= %s <= %s", expectedCoverageMin, pairs[i][j], expectedCoverageMax)); + } + } + } +} diff --git a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PopulationComparisonProcessorBenchmark.java b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PopulationComparisonProcessorBenchmark.java index 8b43ec4..483b577 100644 --- a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PopulationComparisonProcessorBenchmark.java +++ b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PopulationComparisonProcessorBenchmark.java @@ -1,3 +1,18 @@ +/** + * Copyright © 2019 Heinz Nixdorf Chair for Distributed Information Systems, Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package de.uni_jena.cs.fusion.abecto.processor; import de.uni_jena.cs.fusion.abecto.Aspect; @@ -9,9 +24,9 @@ public class PopulationComparisonProcessorBenchmark { - private class IndependentPopulationComparisonProcessor extends PopulationComparisonProcessor { + private static class IndependentPopulationComparisonProcessor extends PopulationComparisonProcessor { - private ComparisonBenchmarkDataSupplier dataSupplier; + private final ComparisonBenchmarkDataSupplier dataSupplier; public IndependentPopulationComparisonProcessor(int populationSize, int datasetCount, double coverage) { this.dataSupplier = new ComparisonBenchmarkDataSupplier(populationSize, datasetCount, coverage, 0); @@ -24,7 +39,7 @@ public Set getDatasets() { @Override public Stream getResourceKeys(Aspect aspect, Resource dataset) throws NullPointerException { - return this.dataSupplier.getResourceKeys(aspect, dataset); + return this.dataSupplier.getResourceKeys(dataset); } public Stream> getCorrespondenceGroups() { diff --git a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PropertyComparisonProcessorBenchmark.java b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PropertyComparisonProcessorBenchmark.java index cfb8600..d667a86 100644 --- a/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PropertyComparisonProcessorBenchmark.java +++ b/src/test/java/de/uni_jena/cs/fusion/abecto/processor/PropertyComparisonProcessorBenchmark.java @@ -1,3 +1,18 @@ +/** + * Copyright © 2019 Heinz Nixdorf Chair for Distributed Information Systems, Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package de.uni_jena.cs.fusion.abecto.processor; import de.uni_jena.cs.fusion.abecto.Aspect; @@ -9,9 +24,9 @@ public class PropertyComparisonProcessorBenchmark { - private class IndependentPropertyComparisonProcessor extends PropertyComparisonProcessor { + private static class IndependentPropertyComparisonProcessor extends PropertyComparisonProcessor { - private ComparisonBenchmarkDataSupplier dataSupplier; + private final ComparisonBenchmarkDataSupplier dataSupplier; public IndependentPropertyComparisonProcessor(int populationSize, int datasetCount, double coverage) { this.dataSupplier = new ComparisonBenchmarkDataSupplier(populationSize, datasetCount, coverage, 0); @@ -23,21 +38,21 @@ public Set getDatasets() { } @Override - public Stream getResourceKeys(Aspect aspect, Resource dataset) throws NullPointerException { - return this.dataSupplier.getResourceKeys(aspect, dataset); + public Stream getResourceKeys(@SuppressWarnings("unused") Aspect aspect, Resource dataset) throws NullPointerException { + return this.dataSupplier.getResourceKeys(dataset); } public Map> selectResourceValues(Resource resource, Resource dataset, @SuppressWarnings("unused") Aspect aspect, Collection variables) { - return this.dataSupplier.selectResourceValues(resource, dataset, aspect, variables); + return this.dataSupplier.selectResourceValues(resource, dataset, variables); } public Map>> selectResourceValues(Collection resources, Resource dataset, @SuppressWarnings("unused") Aspect aspect, List variables) { - return this.dataSupplier.selectResourceValues(resources, dataset, aspect, variables); + return this.dataSupplier.selectResourceValues(resources, dataset, variables); }