Skip to content

Commit

Permalink
refactor measures
Browse files Browse the repository at this point in the history
  • Loading branch information
jmkeil committed Oct 2, 2024
1 parent 749cad4 commit 6381829
Show file tree
Hide file tree
Showing 11 changed files with 178 additions and 112 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import java.util.HashMap;
import java.util.Map;

public class AbsoluteCoverage extends Count<ResourcePair> {
public class AbsoluteCoverage extends LongMeasure<ResourcePair> {

public AbsoluteCoverage() {
super(AV.absoluteCoverage, OM.one);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,12 @@
import java.math.BigDecimal;
import java.math.RoundingMode;

public abstract class Ratio<K> extends Measure<K, BigDecimal> {
public abstract class BigDecimalMeasure<K> extends Measure<K, BigDecimal> {

public final static int SCALE = 16;
public final static RoundingMode ROUNDING_MODE = RoundingMode.HALF_UP;

public Ratio(Resource quantity, Resource unit) {
public BigDecimalMeasure(Resource quantity, Resource unit) {
super(quantity, unit);
}

public void setRatioOf(Count<K> numerators, Count<K> denominators) {
for (K key : numerators.keySet()) {
if (denominators.contains(key)) {
BigDecimal numerator = BigDecimal.valueOf(numerators.get(key));
BigDecimal denominator = BigDecimal.valueOf(denominators.get(key));
set(key, numerator.divide(denominator, SCALE, ROUNDING_MODE));
}
}
}

public void setRatioOf(Count<K> numerators, BigDecimal denominator) {
for (K key : numerators.keySet()) {
BigDecimal numerator = BigDecimal.valueOf(numerators.get(key));
set(key, numerator.divide(denominator, SCALE, ROUNDING_MODE));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@
import java.math.RoundingMode;
import java.util.*;

public class Completeness extends Ratio<Resource> {
public class Completeness extends BigDecimalMeasure<Resource> {

public Completeness() {
super(AV.marCompletenessThomas08, OM.one);
}

public static Completeness calculate(AbsoluteCoverage absoluteCoverage, PerDatasetCount deduplicatedCount) {
public static Completeness calculate(AbsoluteCoverage absoluteCoverage, DeduplicatedCount deduplicatedCount) {
Set<ResourcePair> datasetPairs = getDatasetPairsWithSufficientData(absoluteCoverage, deduplicatedCount);
long totalPairwiseOverlap = calculateTotalPairwiseOverlap(datasetPairs, absoluteCoverage);
if (totalPairwiseOverlap != 0) {
Expand All @@ -47,7 +47,7 @@ public static Completeness calculate(AbsoluteCoverage absoluteCoverage, PerDatas
return new Completeness(); // empty
}

private static Set<ResourcePair> getDatasetPairsWithSufficientData(AbsoluteCoverage absoluteCoverage, PerDatasetCount deduplicatedCount) {
private static Set<ResourcePair> getDatasetPairsWithSufficientData(AbsoluteCoverage absoluteCoverage, DeduplicatedCount deduplicatedCount) {
Set<ResourcePair> datasetPairs = absoluteCoverage.keySet();
Set<Resource> datasetsWithDeduplicatedCount = deduplicatedCount.keySet();
return ResourcePair.getPairsBothContainedIn(datasetPairs, datasetsWithDeduplicatedCount);
Expand All @@ -63,7 +63,7 @@ private static long calculateTotalPairwiseOverlap(Iterable<ResourcePair> dataset
return totalPairwiseOverlap;
}

private static BigDecimal calculateEstimatedPopulationSize(Iterable<ResourcePair> datasetPairs, PerDatasetCount deduplicatedCount, long totalPairwiseOverlap) {
private static BigDecimal calculateEstimatedPopulationSize(Iterable<ResourcePair> datasetPairs, DeduplicatedCount deduplicatedCount, long totalPairwiseOverlap) {
BigDecimal estimatedPopulationSize = BigDecimal.ZERO;
for (ResourcePair datasetPair : datasetPairs) {
BigDecimal deduplicatedCount1 = BigDecimal.valueOf(deduplicatedCount.get(datasetPair.first));
Expand All @@ -75,7 +75,7 @@ private static BigDecimal calculateEstimatedPopulationSize(Iterable<ResourcePair
return estimatedPopulationSize;
}

private static Completeness calculateCompleteness(Iterable<Resource> datasets, PerDatasetCount deduplicatedCount, BigDecimal estimatedPopulationSize) {
private static Completeness calculateCompleteness(Iterable<Resource> datasets, DeduplicatedCount deduplicatedCount, BigDecimal estimatedPopulationSize) {
Completeness completeness = new Completeness();
for (Resource dataset : datasets) {
BigDecimal numerator = BigDecimal.valueOf(deduplicatedCount.get(dataset));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,38 +18,26 @@

package de.uni_jena.cs.fusion.abecto.measure;

import org.apache.jena.rdf.model.Resource;
import de.uni_jena.cs.fusion.abecto.vocabulary.AV;
import de.uni_jena.cs.fusion.abecto.vocabulary.OM;

public abstract class Count<K> extends Measure<K, Long> {
import java.util.HashMap;
import java.util.Map;

public Count(Resource quantity, Resource unit) {
super(quantity, unit);
}

public void setAllZero(Iterable<K> keys) {
for (K key: keys) {
setZero(key);
}
}

public void setZero(K key) {
values.put(key, 0L);
}

public void incrementByOrSetOne(K key) {
incrementByOrSet(key, 1L);
}
public class Count extends PerDatasetLongMeasure {

public void incrementByOrSet(K key, long increment) {
values.merge(key, increment, Long::sum);
public Count() {
super(AV.count, OM.one);
}

public void setDifferenceOf(Count<K> minuend, Count<K> subtrahend) {
for (K key : minuend.keySet()) {
if (subtrahend.contains(key)) {
set(key, minuend.get(key) - subtrahend.get(key));
}
public static Map<String, Count> createMapByVariable(Iterable<String> variables) {
Map<String, Count> mapOfCounts = new HashMap<>();
for (String variable : variables) {
Count countOfVariable = new Count();
countOfVariable.setVariable(variable);
mapOfCounts.put(variable, countOfVariable);
}
return mapOfCounts;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*-
* Copyright © 2019-2022 Heinz Nixdorf Chair for Distributed Information Systems,
* Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/)
* Copyright © 2023-2024 Jan Martin Keil ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
-*/

package de.uni_jena.cs.fusion.abecto.measure;

import de.uni_jena.cs.fusion.abecto.vocabulary.AV;
import de.uni_jena.cs.fusion.abecto.vocabulary.OM;

import java.util.HashMap;
import java.util.Map;

public class DeduplicatedCount extends PerDatasetLongMeasure {

public DeduplicatedCount() {
super(AV.deduplicatedCount, OM.one);
}

public static DeduplicatedCount calculate(Count count, DuplicateCount duplicateCount) {
DeduplicatedCount deduplicatedCount = new DeduplicatedCount();
deduplicatedCount.setDifferenceOf(count, duplicateCount);
return deduplicatedCount;
}

public static Map<String, DeduplicatedCount> createMapByVariable(Iterable<String> variables) {
Map<String, DeduplicatedCount> mapOfCounts = new HashMap<>();
for (String variable : variables) {
DeduplicatedCount countOfVariable = new DeduplicatedCount();
countOfVariable.setVariable(variable);
mapOfCounts.put(variable, countOfVariable);
}
return mapOfCounts;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*-
* Copyright © 2019-2022 Heinz Nixdorf Chair for Distributed Information Systems,
* Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/)
* Copyright © 2023-2024 Jan Martin Keil ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
-*/

package de.uni_jena.cs.fusion.abecto.measure;

import de.uni_jena.cs.fusion.abecto.vocabulary.OM;

public class DuplicateCount extends PerDatasetLongMeasure {

public DuplicateCount() {
super(null, OM.one); // TODO define measure IRI
}

public static DuplicateCount calculate(Count count, DeduplicatedCount deduplicatedCount) {
DuplicateCount duplicateCount = new DuplicateCount();
duplicateCount.setDifferenceOf(count, deduplicatedCount);
return duplicateCount;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*-
* Copyright © 2019-2022 Heinz Nixdorf Chair for Distributed Information Systems,
* Friedrich Schiller University Jena (http://www.fusion.uni-jena.de/)
* Copyright © 2023-2024 Jan Martin Keil ([email protected])
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
-*/

package de.uni_jena.cs.fusion.abecto.measure;

import org.apache.jena.rdf.model.Resource;

public abstract class LongMeasure<K> extends Measure<K, Long> {

public LongMeasure(Resource quantity, Resource unit) {
super(quantity, unit);
}

public void setZero(K key) {
values.put(key, 0L);
}

public void incrementByOrSetOne(K key) {
incrementByOrSet(key, 1L);
}

public void incrementByOrSet(K key, long increment) {
values.merge(key, increment, Long::sum);
}

public void setDifferenceOf(LongMeasure<K> minuend, LongMeasure<K> subtrahend) {
for (K key : minuend.keySet()) {
if (subtrahend.contains(key)) {
set(key, minuend.get(key) - subtrahend.get(key));
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,14 @@
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Resource;

import java.util.HashMap;
import java.util.Map;

public class PerDatasetCount extends Count<Resource> {
public abstract class PerDatasetLongMeasure extends LongMeasure<Resource> {

public PerDatasetCount(Resource quantity, Resource unit) {
public PerDatasetLongMeasure(Resource quantity, Resource unit) {
super(quantity, unit);
}

public static Map<String, PerDatasetCount> createMapByVariable(Iterable<String> variables, Resource quantity, Resource unit) {
Map<String, PerDatasetCount> mapOfCounts = new HashMap<>();
for (String variable : variables) {
PerDatasetCount countOfVariable = new PerDatasetCount(quantity, unit);
countOfVariable.setVariable(variable);
mapOfCounts.put(variable, countOfVariable);
}
return mapOfCounts;
}

public void storeInModel(Aspect aspect, Map<Resource, Model> outputModelsMap) {
for (Resource dataset : keySet()) {
Metadata.addQualityMeasurement(quantity, get(dataset), unit, dataset, variable, aspect.getIri(), outputModelsMap.get(dataset));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@
import java.util.Map;
import java.util.Set;

public class RelativeCoverage extends Ratio<ResourceTupel> {
public class RelativeCoverage extends BigDecimalMeasure<ResourceTupel> {

public RelativeCoverage() {
super(AV.relativeCoverage, OM.one);
}

public static RelativeCoverage calculate(AbsoluteCoverage absoluteCoverage, PerDatasetCount deduplicatedCount) {
public static RelativeCoverage calculate(AbsoluteCoverage absoluteCoverage, DeduplicatedCount deduplicatedCount) {
RelativeCoverage relativeCoverage = new RelativeCoverage();
Set<ResourcePair> datasetPairs = getDatasetPairsWithSufficientData(absoluteCoverage, deduplicatedCount);
for (ResourcePair datasetPair : datasetPairs) {
Expand All @@ -48,13 +48,13 @@ public static RelativeCoverage calculate(AbsoluteCoverage absoluteCoverage, PerD
return relativeCoverage;
}

private static Set<ResourcePair> getDatasetPairsWithSufficientData(AbsoluteCoverage absoluteCoverage, PerDatasetCount deduplicatedCount) {
private static Set<ResourcePair> getDatasetPairsWithSufficientData(AbsoluteCoverage absoluteCoverage, DeduplicatedCount deduplicatedCount) {
Set<ResourcePair> datasetPairsWithAbsoluteCoverage = absoluteCoverage.keySet();
Set<Resource> datasetsWithDeduplicatedCount = deduplicatedCount.keySet();
return ResourcePair.getPairsBothContainedIn(datasetPairsWithAbsoluteCoverage, datasetsWithDeduplicatedCount);
}

void setRatioForTupel(BigDecimal numerator, PerDatasetCount denominators, Resource assessedDataset, Resource comparedDataset) {
void setRatioForTupel(BigDecimal numerator, DeduplicatedCount denominators, Resource assessedDataset, Resource comparedDataset) {
BigDecimal denominator = BigDecimal.valueOf(denominators.get(comparedDataset));
if (!denominator.equals(BigDecimal.ZERO)) {
BigDecimal value = numerator.divide(denominator, SCALE, ROUNDING_MODE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

package de.uni_jena.cs.fusion.abecto.processor;

import java.math.BigDecimal;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
Expand All @@ -28,9 +27,6 @@
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Resource;

import de.uni_jena.cs.fusion.abecto.vocabulary.AV;
import de.uni_jena.cs.fusion.abecto.vocabulary.OM;

/**
* Provides measurements for <strong>number of resources</strong>,
* <strong>absolute coverage</strong>, <strong>relative coverage</strong>, and
Expand All @@ -52,22 +48,11 @@ public class PopulationComparisonProcessor extends ComparisonProcessor<Populatio
Set<ResourcePair> datasetPairs;
Set<ResourceTupel> datasetTupels;
Map<Resource, Model> outputMetaModelByDataset;
/**
* Number of covered resources of another dataset, excluding duplicates.
*/

AbsoluteCoverage absoluteCoverage = new AbsoluteCoverage();
/**
* Number of resources in this dataset including duplicates.
*/
PerDatasetCount count = new PerDatasetCount(AV.count, OM.one);
/**
* Number of resource duplicates in this dataset.
*/
PerDatasetCount duplicateCount = new PerDatasetCount(null, OM.one);// TODO define measure IRI
/**
* Number of resources in this dataset excluding duplicates.
*/
PerDatasetCount deduplicatedCount = new PerDatasetCount(AV.deduplicatedCount, OM.one);
Count count = new Count();
DuplicateCount duplicateCount = new DuplicateCount();
DeduplicatedCount deduplicatedCount = new DeduplicatedCount();
RelativeCoverage relativeCoverage;
Completeness completeness;

Expand All @@ -88,7 +73,7 @@ void compareAspectPopulation(Aspect aspect) {

measureResourceCounts();
countAndReportCoverageAndDuplicatesAndOmissions(getCorrespondenceGroups());
calculateDeduplicatedCount();
deduplicatedCount = DeduplicatedCount.calculate(count, duplicateCount);
relativeCoverage = RelativeCoverage.calculate(absoluteCoverage, deduplicatedCount);
completeness = Completeness.calculate(absoluteCoverage, deduplicatedCount);

Expand Down Expand Up @@ -208,10 +193,6 @@ private void removeFromUnprocessedResources(Map<Resource, Set<Resource>> covered
}
}

private void calculateDeduplicatedCount() {
deduplicatedCount.setDifferenceOf(count, duplicateCount);
}

private void reportOmissionsOfUnprocessedResources() {
for (ResourcePair datasetPair : datasetPairs) {
reportOmissionsOfUnprocessedResourcesForResource(datasetPair.first, datasetPair.second);
Expand Down
Loading

0 comments on commit 6381829

Please sign in to comment.