Skip to content

Commit

Permalink
test for comparison processor benchmark data generation
Browse files Browse the repository at this point in the history
  • Loading branch information
jmkeil committed Nov 1, 2023
1 parent c535ef9 commit 3f91db8
Show file tree
Hide file tree
Showing 5 changed files with 202 additions and 33 deletions.
21 changes: 21 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,13 @@
<version>${junit.jupiter.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<!-- EPL 2.0 -->
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<version>${junit.jupiter.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<!-- Apache 2.0 -->
<groupId>com.github.tomakehurst</groupId>
Expand All @@ -134,6 +141,20 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<!-- GPL 2 with classpath exception -->
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
<version>1.36</version>
<scope>test</scope>
</dependency>
<dependency>
<!-- GPL 2 with classpath exception -->
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-generator-annprocess</artifactId>
<version>1.36</version>
<scope>test</scope>
</dependency>
<dependency>
<!-- Apache 2.0 -->
<groupId>org.apache.logging.log4j</groupId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
/**
* Copyright © 2019 Heinz Nixdorf Chair for Distributed Information Systems, Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.uni_jena.cs.fusion.abecto.processor;

import com.google.common.collect.Streams;
import de.uni_jena.cs.fusion.abecto.Aspect;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;
Expand All @@ -12,20 +26,19 @@
import java.util.stream.IntStream;
import java.util.stream.Stream;

public class ComparisonBenchmarkDataSupplier extends PopulationComparisonProcessor {
public class ComparisonBenchmarkDataSupplier {

protected static final RDFNode correctValue = ResourceFactory.createTypedLiteral(-1);
private final int populationSize, totalDatasetCount;
private final double coverage, errorRate;
private final RDFNode correctValue;
private final RDFNode[] wrongValues;

public ComparisonBenchmarkDataSupplier(int populationSize, int datasetCount, double coverage, double errorRate) {
this.populationSize = populationSize;
this.totalDatasetCount = datasetCount;
this.coverage = coverage;
this.errorRate = errorRate;
// generate correct and wrong values per dataset
this.correctValue = ResourceFactory.createTypedLiteral(-1);
// generate wrong values per dataset
// wrongValues[0] should not be used and exists to improve code readability by avoiding -1 shifts later on
this.wrongValues = new RDFNode[datasetCount + 1];
for (int i = 0; i <= datasetCount; i++) {
Expand All @@ -37,19 +50,17 @@ public Set<Resource> getDatasets() {
return IntStream.range(0, totalDatasetCount).mapToObj(i -> ResourceFactory.createResource(Integer.toString(i))).collect(Collectors.toSet());
}

public Stream<Resource> getResourceKeys(@SuppressWarnings("unused") Aspect aspect, Resource dataset) throws NullPointerException {
public Stream<Resource> getResourceKeys(Resource dataset) throws NullPointerException {
int datasetNumber = Integer.parseInt(dataset.getURI());
return IntStream.range(0, populationSize).map(i -> i + datasetNumber * populationSize).mapToObj(i -> ResourceFactory.createResource(Integer.toString(i)));
}

public Map<String, Set<RDFNode>> selectResourceValues(Resource resource, Resource dataset,
@SuppressWarnings("unused") Aspect aspect,
Collection<String> variables) {

int resourceNumber = Integer.parseInt(resource.getURI());
if (resourceNumber % populationSize >= populationSize * errorRate) {
return Collections.singletonMap(variables.iterator().next(),
Collections.singleton(this.correctValue));
return Collections.singletonMap(variables.iterator().next(), Collections.singleton(this.correctValue));
} else {
int datasetNumber = Integer.parseInt(dataset.getURI());
return Collections.singletonMap(variables.iterator().next(),
Expand All @@ -58,8 +69,7 @@ public Map<String, Set<RDFNode>> selectResourceValues(Resource resource, Resourc
}

public Map<Resource, Map<String, Set<RDFNode>>> selectResourceValues(Collection<Resource> resources,
Resource dataset,
@SuppressWarnings("unused") Aspect aspect, List<String> variables) {
Resource dataset, List<String> variables) {
int datasetNumber = Integer.parseInt(dataset.getURI());

Map<Resource, Map<String, Set<RDFNode>>> resourceValues = new HashMap<>();
Expand All @@ -68,7 +78,7 @@ public Map<Resource, Map<String, Set<RDFNode>>> selectResourceValues(Collection<
int resourceNumber = Integer.parseInt(resource.getURI());
if (resourceNumber % populationSize >= populationSize * errorRate) {
resourceValues.put(resource, Collections.singletonMap(variables.iterator().next(),
Collections.singleton(this.correctValue)));
Collections.singleton(correctValue)));
} else {
resourceValues.put(resource, Collections.singletonMap(variables.iterator().next(),
Collections.singleton(this.wrongValues[datasetNumber])));
Expand All @@ -78,7 +88,8 @@ public Map<Resource, Map<String, Set<RDFNode>>> selectResourceValues(Collection<
}

private double overlapShare(int overlappingDatasetCount, int totalDatasetCount, double coverage) {
return Math.pow(coverage, overlappingDatasetCount - 1) * Math.pow(1 - coverage, totalDatasetCount - overlappingDatasetCount);
return Math.pow(coverage, overlappingDatasetCount - 1) * Math.pow(1 - coverage,
totalDatasetCount - overlappingDatasetCount);
}


Expand All @@ -97,8 +108,7 @@ public Stream<List<Resource>> getCorrespondenceGroups() {
(int) (populationSize * errorRate), (int) (populationSize * (1 - errorRate)));

// join cases streams
@SuppressWarnings("unchecked")
Stream<List<Resource>>[] casesStreams = new Stream[(1 << totalDatasetCount) * 2];
@SuppressWarnings("unchecked") Stream<List<Resource>>[] casesStreams = new Stream[(1 << totalDatasetCount) * 2];
System.arraycopy(errorCasesStreams, 0, casesStreams, 0, 1 << totalDatasetCount);
System.arraycopy(correctCasesStreams, 0, casesStreams, 1 << totalDatasetCount, 1 << totalDatasetCount);
return Streams.concat(casesStreams);
Expand All @@ -119,19 +129,17 @@ private Stream<List<Resource>>[] generateCasesStream(double[] overlapShare, int
int[] nextId = new int[totalDatasetCount];
Arrays.fill(nextId, idOffset);

@SuppressWarnings("unchecked")
Stream<List<Resource>>[] casesStreams = new Stream[1 << totalDatasetCount];
@SuppressWarnings("unchecked") Stream<List<Resource>>[] casesStreams = new Stream[1 << totalDatasetCount];
// iterate through all subsets represented by the bits of an int, 0 = not contained, 1 = contained
for (int coveredDatasetsBits = 0; coveredDatasetsBits < 1 << totalDatasetCount /* = 2^{datasetCount}
*/; coveredDatasetsBits++) {
int subsetCount = 1 << totalDatasetCount; // = 2^{datasetCount}
for (int coveredDatasetsBits = 0; coveredDatasetsBits < subsetCount; coveredDatasetsBits++) {
int coveredDatasetsCount = Integer.bitCount(coveredDatasetsBits);


if (coveredDatasetsCount >= 2) {
// get array of numbers of covered datasets
// get array of covered datasets ids
int[] coveredDatasets = new int[coveredDatasetsCount];
int i = 0;
for (int dataset = 0; dataset < totalDatasetCount; dataset++) {
for (int dataset = 0, i = 0; dataset < totalDatasetCount; dataset++) {
if ((coveredDatasetsBits & (1 << dataset /* = 2^{dataset} */)) != 0) {
coveredDatasets[i++] = dataset;
}
Expand All @@ -140,7 +148,8 @@ private Stream<List<Resource>>[] generateCasesStream(double[] overlapShare, int
// calculate number of cases with covered datasets
int cases = (int) (overlapShare[coveredDatasetsCount] * totalCases);
// generate stream of cases with covered datasets
casesStreams[coveredDatasetsBits] = Stream.generate(new CorrespondenceGroupSupplier(coveredDatasets, nextId)).limit(cases);
casesStreams[coveredDatasetsBits] = Stream.generate(new CorrespondenceGroupSupplier(coveredDatasets,
nextId)).limit(cases);
} else {
// empty stream for combinations without correspondences
casesStreams[coveredDatasetsBits] = Stream.empty();
Expand All @@ -163,7 +172,8 @@ public List<Resource> get() {
List<Resource> resources = Arrays.asList(new Resource[coveredDatasets.length]);
for (int i = 0; i < coveredDatasets.length; i++) {
int coveredDataset = coveredDatasets[i];
resources.set(i, ResourceFactory.createResource(Integer.toString(nextId[coveredDataset]++ + coveredDataset * populationSize)));
resources.set(i,
ResourceFactory.createResource(Integer.toString(nextId[coveredDataset]++ + coveredDataset * populationSize)));
}
return resources;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/**
* Copyright © 2019 Heinz Nixdorf Chair for Distributed Information Systems, Friedrich Schiller University Jena
* (http://www.fusion.uni-jena.de/)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/
package de.uni_jena.cs.fusion.abecto.processor;

import org.apache.jena.rdf.model.Resource;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

public class ComparisonBenchmarkDataSupplierTest {

@ParameterizedTest(name = "[{index}] Data generation for {0} datasources.")
@ValueSource(ints = {2, 3, 4, 5, 6, 7, 8, 9, 10})
public void supplierTest(int datasetCount) {
int populationSize = 1000;
double coverage = 0.75, errorRate = 0.1;
ComparisonBenchmarkDataSupplier supplier = new ComparisonBenchmarkDataSupplier(populationSize, datasetCount,
coverage, errorRate);

// check dataset count
List<Resource> dataset = new ArrayList<>(supplier.getDatasets());
Assertions.assertEquals(datasetCount, dataset.size());

// check population size
Set<Resource>[] population = new Set[datasetCount];
for (int i = 0; i < datasetCount; i++) {
population[i] = supplier.getResourceKeys(dataset.get(i)).collect(Collectors.toSet());
Assertions.assertEquals(populationSize, population[i].size());
}

// check values
for (int i = 0; i < datasetCount; i++) {
int rightValuesSingle = 0, wrongValuesSingle = 0, rightValuesCollection = 0, wrongValuesCollection = 0;
for (Resource resource : population[i]) {
// single resource method
Assertions.assertEquals(1, supplier.selectResourceValues(resource, dataset.get(i),
Collections.singletonList("var")).get("var").size());
if (supplier.selectResourceValues(resource, dataset.get(i), Collections.singletonList("var")).get(
"var").contains(ComparisonBenchmarkDataSupplier.correctValue)) {
rightValuesSingle++;
} else {
wrongValuesSingle++;
}

// resource collection method
Assertions.assertEquals(1, supplier.selectResourceValues(Collections.singletonList(resource),
dataset.get(i), Collections.singletonList("var")).get(resource).size());
Assertions.assertEquals(1, supplier.selectResourceValues(Collections.singletonList(resource),
dataset.get(i), Collections.singletonList("var")).get(resource).get("var").size());
if (supplier.selectResourceValues(Collections.singletonList(resource), dataset.get(i),
Collections.singletonList("var")).get(resource).get("var").contains(ComparisonBenchmarkDataSupplier.correctValue)) {
rightValuesCollection++;
} else {
wrongValuesCollection++;
}
}
Assertions.assertEquals((int) (populationSize * errorRate), wrongValuesSingle);
Assertions.assertEquals((int) (populationSize * errorRate), wrongValuesCollection);
Assertions.assertEquals(populationSize - (int) (populationSize * errorRate), rightValuesSingle);
Assertions.assertEquals(populationSize - (int) (populationSize * errorRate), rightValuesCollection);
}


// pairwise check coverage
int[][] pairs = new int[datasetCount-1][datasetCount];
supplier.getCorrespondenceGroups().forEach(resources -> {
for (int i = 0; i < datasetCount; i++) {
if (resources.stream().anyMatch(population[i]::contains)) {
for (int j = i + 1; j < datasetCount; j++) {
if (resources.stream().anyMatch(population[j]::contains)) {
pairs[i][j]++;
}
}
}
}
});
int expectedCoverageMax = (int) (populationSize * coverage);
// allow underrun by 1 per overlap case the pair is involved (2^{n-2}) and per right / wrong value case (x2)
int expectedCoverageMin = (int) (expectedCoverageMax - (Math.pow(2, datasetCount - 2) * 2));
int exampleCoverage = pairs[0][1];
for (int i = 0; i < datasetCount; i++) {
for (int j = i + 1; j < datasetCount; j++) {
// check for equal coverage of all pairs
Assertions.assertEquals(exampleCoverage,pairs[i][j]);
//check for a range to accept underrun due to rounding
Assertions.assertTrue(expectedCoverageMin <= pairs[i][j] && pairs[i][j] <= expectedCoverageMax,
String.format("%s <= %s <= %s", expectedCoverageMin, pairs[i][j], expectedCoverageMax));
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
/**
* Copyright © 2019 Heinz Nixdorf Chair for Distributed Information Systems, Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.uni_jena.cs.fusion.abecto.processor;

import de.uni_jena.cs.fusion.abecto.Aspect;
Expand All @@ -9,9 +24,9 @@

public class PopulationComparisonProcessorBenchmark {

private class IndependentPopulationComparisonProcessor extends PopulationComparisonProcessor {
private static class IndependentPopulationComparisonProcessor extends PopulationComparisonProcessor {

private ComparisonBenchmarkDataSupplier dataSupplier;
private final ComparisonBenchmarkDataSupplier dataSupplier;

public IndependentPopulationComparisonProcessor(int populationSize, int datasetCount, double coverage) {
this.dataSupplier = new ComparisonBenchmarkDataSupplier(populationSize, datasetCount, coverage, 0);
Expand All @@ -24,7 +39,7 @@ public Set<Resource> getDatasets() {

@Override
public Stream<Resource> getResourceKeys(Aspect aspect, Resource dataset) throws NullPointerException {
return this.dataSupplier.getResourceKeys(aspect, dataset);
return this.dataSupplier.getResourceKeys(dataset);
}

public Stream<List<Resource>> getCorrespondenceGroups() {
Expand Down
Loading

0 comments on commit 3f91db8

Please sign in to comment.