Skip to content

Commit

Permalink
Merge pull request #2050 from jplag/reverse-distribution
Browse files Browse the repository at this point in the history
Reverse distribution
  • Loading branch information
tsaglam authored Nov 6, 2024
2 parents 91c14f4 + 911ef5a commit 27cd733
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 28 deletions.
21 changes: 16 additions & 5 deletions core/src/main/java/de/jplag/JPlagResult.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package de.jplag;

import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.function.ToDoubleFunction;
Expand Down Expand Up @@ -98,8 +99,8 @@ public JPlagOptions getOptions() {

/**
* For the {@link SimilarityMetric} JPlag was run with, this returns the similarity distribution of detected matches in
* a 10-element array. Each entry represents the absolute frequency of matches whose similarity lies within the
* respective interval. Intervals: 0: [0% - 10%), 1: [10% - 20%), 2: [20% - 30%), ..., 9: [90% - 100%]
* a 100-element array. Each entry represents the absolute frequency of matches whose similarity lies within the
* respective interval. Intervals: 0: [0% - 1%), 1: [1% - 2%), 2: [2% - 3%), ..., 99: [99% - 100%]
* @return the similarity distribution array.
*/
public int[] getSimilarityDistribution() {
Expand All @@ -108,9 +109,9 @@ public int[] getSimilarityDistribution() {

/**
* For the {@link SimilarityMetric#MAX} that is built in to every {@link JPlagComparison}, this returns the similarity
* distribution of detected matches in a 10-element array. Each entry represents the absolute frequency of matches whose
* similarity lies within the respective interval. Intervals: 0: [0% - 10%), 1: [10% - 20%), 2: [20% - 30%), ..., 9:
* [90% - 100%]
* distribution of detected matches in a 100-element array. Each entry represents the absolute frequency of matches
* whose similarity lies within the respective interval. Intervals: 0: [0% - 1%), 1: [1% - 20%), 2: [2% - 3%), ..., 99:
* [99% - 100%]
* @return the similarity distribution array. When JPlag was run with the {@link SimilarityMetric#MAX}, this will return
* the same distribution as {@link JPlagResult#getSimilarityDistribution()}
*/
Expand All @@ -122,6 +123,16 @@ public List<ClusteringResult<Submission>> getClusteringResult() {
return this.clusteringResult;
}

/**
* Calculates the distribution of all comparisons. The distribution is boxed to a 100-Element Array, index with ranges:
* 0: [0%, 1%), 1: [1%, 2%), ..., 99: [99%, 100%]
* @param similarityMetric Metric to use
* @return the similarity distribution
*/
public List<Integer> calculateDistributionFor(ToDoubleFunction<JPlagComparison> similarityMetric) {
return Arrays.stream(calculateDistributionFor(this.comparisons, similarityMetric)).boxed().toList();
}

@Override
public String toString() {
return String.format("JPlagResult { comparisons: %d, duration: %d ms, language: %s, submissions: %d }", getAllComparisons().size(),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package de.jplag.reporting.reportobject.mapper;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
Expand All @@ -29,8 +27,11 @@ public MetricMapper(Function<Submission, String> submissionToIdFunction) {
* @return Map with key as name of metric and value as distribution
*/
public static Map<String, List<Integer>> getDistributions(JPlagResult result) {
return Map.of(SimilarityMetric.AVG.name(), convertDistribution(result.getSimilarityDistribution()), SimilarityMetric.MAX.name(),
convertDistribution(result.getMaxSimilarityDistribution()));
Map<String, List<Integer>> distributions = new HashMap<>();
for (SimilarityMetric metric : SimilarityMetric.values()) {
distributions.put(metric.name(), result.calculateDistributionFor(metric));
}
return distributions;
}

/**
Expand All @@ -48,10 +49,4 @@ public List<TopComparison> getTopComparisons(JPlagResult result) {
private Map<String, Double> getComparisonMetricMap(JPlagComparison comparison) {
return Map.of(SimilarityMetric.AVG.name(), comparison.similarity(), SimilarityMetric.MAX.name(), comparison.maximalSimilarity());
}

private static List<Integer> convertDistribution(int[] array) {
List<Integer> list = new ArrayList<>(Arrays.stream(array).boxed().toList());
Collections.reverse(list);
return list;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import static org.mockito.Mockito.mock;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

Expand All @@ -16,6 +16,7 @@
import de.jplag.JPlagResult;
import de.jplag.Submission;
import de.jplag.options.JPlagOptions;
import de.jplag.options.SimilarityMetric;
import de.jplag.reporting.reportobject.model.TopComparison;

public class MetricMapperTest {
Expand All @@ -39,7 +40,8 @@ public void test_getDistributions() {
Map<String, List<Integer>> result = MetricMapper.getDistributions(jPlagResult);

// then
Assertions.assertEquals(Map.of("AVG", EXPECTED_AVG_DISTRIBUTION, "MAX", EXPECTED_MAX_DISTRIBUTION), result);
Assertions.assertEquals(EXPECTED_AVG_DISTRIBUTION, result.get("AVG"));
Assertions.assertEquals(EXPECTED_MAX_DISTRIBUTION, result.get("MAX"));
}

@Test
Expand All @@ -58,9 +60,8 @@ public void test_getTopComparisons() {
}

private int[] distribution(List<Integer> expectedDistribution) {
var reversedDistribution = new ArrayList<>(expectedDistribution);
Collections.reverse(reversedDistribution);
return reversedDistribution.stream().mapToInt(Integer::intValue).toArray();
var distribution = new ArrayList<>(expectedDistribution);
return distribution.stream().mapToInt(Integer::intValue).toArray();
}

private CreateSubmission submission(String name) {
Expand All @@ -73,8 +74,8 @@ private Comparison comparison(CreateSubmission submission1, CreateSubmission sub

private JPlagResult createJPlagResult(int[] avgDistribution, int[] maxDistribution, Comparison... createComparisonsDto) {
JPlagResult jPlagResult = mock(JPlagResult.class);
doReturn(avgDistribution).when(jPlagResult).getSimilarityDistribution();
doReturn(maxDistribution).when(jPlagResult).getMaxSimilarityDistribution();
doReturn(Arrays.stream(avgDistribution).boxed().toList()).when(jPlagResult).calculateDistributionFor(SimilarityMetric.AVG);
doReturn(Arrays.stream(maxDistribution).boxed().toList()).when(jPlagResult).calculateDistributionFor(SimilarityMetric.MAX);

JPlagOptions options = mock(JPlagOptions.class);
doReturn(createComparisonsDto.length).when(options).maximumNumberOfComparisons();
Expand Down
3 changes: 2 additions & 1 deletion report-viewer/src/model/Distribution.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ export class Distribution {
public splitIntoBuckets(bucketCount: BucketOptions): number[] {
const bucketArray = new Array<number>(bucketCount).fill(0)
const divisor = 100 / bucketCount
const reversedDistribution = Array.from(this._distribution).reverse()
for (let i = 99; i >= 0; i--) {
bucketArray[Math.floor(i / divisor)] += this._distribution[i]
bucketArray[Math.floor(i / divisor)] += reversedDistribution[i]
}
return bucketArray
}
Expand Down
9 changes: 5 additions & 4 deletions report-viewer/tests/unit/model/Distribution.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ import { describe, expect, it } from 'vitest'
import { Distribution } from '@/model/Distribution'

const distributionData = [
0, 7, 15, 42, 109, 225, 470, 869, 1442, 2052, 3025, 4056, 5091, 6130, 7023, 7292, 7445, 7177,
6343, 5373, 4309, 3163, 2244, 1544, 923, 493, 273, 168, 61, 31, 8, 12, 2, 1, 0, 1, 2, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0,
1, 2, 12, 8, 31, 61, 168, 273, 493, 923, 1544, 2244, 3163, 4309, 5373, 6343, 7177, 7445, 7292,
7023, 6130, 5091, 4056, 3025, 2052, 1442, 869, 470, 225, 109, 42, 15, 7, 0
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
]
const distribution = new Distribution(distributionData)

Expand All @@ -14,7 +14,8 @@ describe('Distribution', () => {
expect(distribution.splitIntoBuckets(10)).toEqual([0, 0, 0, 0, 0, 0, 26, 13209, 58955, 5231])
}),
it('get in 100 Buckets', () => {
expect(distribution.splitIntoBuckets(100)).toEqual(distributionData)
const reversedOriginal = Array.from(distributionData).reverse()
expect(distribution.splitIntoBuckets(100)).toEqual(reversedOriginal)
}),
it('get in 25 Buckets', () => {
expect(distribution.splitIntoBuckets(25)).toEqual([
Expand Down

0 comments on commit 27cd733

Please sign in to comment.