Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reverse distribution #2050

Merged
merged 3 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions core/src/main/java/de/jplag/JPlagResult.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package de.jplag;

import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.function.ToDoubleFunction;
Expand Down Expand Up @@ -98,8 +99,8 @@ public JPlagOptions getOptions() {

/**
* For the {@link SimilarityMetric} JPlag was run with, this returns the similarity distribution of detected matches in
* a 10-element array. Each entry represents the absolute frequency of matches whose similarity lies within the
* respective interval. Intervals: 0: [0% - 10%), 1: [10% - 20%), 2: [20% - 30%), ..., 9: [90% - 100%]
* a 100-element array. Each entry represents the absolute frequency of matches whose similarity lies within the
* respective interval. Intervals: 0: [0% - 1%), 1: [1% - 2%), 2: [2% - 3%), ..., 99: [99% - 100%]
* @return the similarity distribution array.
*/
public int[] getSimilarityDistribution() {
Expand All @@ -108,9 +109,9 @@ public int[] getSimilarityDistribution() {

/**
* For the {@link SimilarityMetric#MAX} that is built in to every {@link JPlagComparison}, this returns the similarity
* distribution of detected matches in a 10-element array. Each entry represents the absolute frequency of matches whose
* similarity lies within the respective interval. Intervals: 0: [0% - 10%), 1: [10% - 20%), 2: [20% - 30%), ..., 9:
* [90% - 100%]
* distribution of detected matches in a 100-element array. Each entry represents the absolute frequency of matches
* whose similarity lies within the respective interval. Intervals: 0: [0% - 1%), 1: [1% - 20%), 2: [2% - 3%), ..., 99:
* [99% - 100%]
* @return the similarity distribution array. When JPlag was run with the {@link SimilarityMetric#MAX}, this will return
* the same distribution as {@link JPlagResult#getSimilarityDistribution()}
*/
Expand All @@ -122,6 +123,16 @@ public List<ClusteringResult<Submission>> getClusteringResult() {
return this.clusteringResult;
}

/**
* Calculates the distribution of all comparisons. The distribution is boxed to a 100-Element Array, index with ranges:
* 0: [0%, 1%), 1: [1%, 2%), ..., 99: [99%, 100%]
* @param similarityMetric Metric to use
* @return the similarity distribution
*/
public List<Integer> calculateDistributionFor(ToDoubleFunction<JPlagComparison> similarityMetric) {
return Arrays.stream(calculateDistributionFor(this.comparisons, similarityMetric)).boxed().toList();
}

@Override
public String toString() {
return String.format("JPlagResult { comparisons: %d, duration: %d ms, language: %s, submissions: %d }", getAllComparisons().size(),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package de.jplag.reporting.reportobject.mapper;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
Expand All @@ -29,8 +27,11 @@ public MetricMapper(Function<Submission, String> submissionToIdFunction) {
* @return Map with key as name of metric and value as distribution
*/
public static Map<String, List<Integer>> getDistributions(JPlagResult result) {
return Map.of(SimilarityMetric.AVG.name(), convertDistribution(result.getSimilarityDistribution()), SimilarityMetric.MAX.name(),
convertDistribution(result.getMaxSimilarityDistribution()));
Map<String, List<Integer>> distributions = new HashMap<>();
for (SimilarityMetric metric : SimilarityMetric.values()) {
distributions.put(metric.name(), result.calculateDistributionFor(metric));
}
return distributions;
}

/**
Expand All @@ -48,10 +49,4 @@ public List<TopComparison> getTopComparisons(JPlagResult result) {
private Map<String, Double> getComparisonMetricMap(JPlagComparison comparison) {
return Map.of(SimilarityMetric.AVG.name(), comparison.similarity(), SimilarityMetric.MAX.name(), comparison.maximalSimilarity());
}

private static List<Integer> convertDistribution(int[] array) {
List<Integer> list = new ArrayList<>(Arrays.stream(array).boxed().toList());
Collections.reverse(list);
return list;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import static org.mockito.Mockito.mock;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

Expand All @@ -16,6 +16,7 @@
import de.jplag.JPlagResult;
import de.jplag.Submission;
import de.jplag.options.JPlagOptions;
import de.jplag.options.SimilarityMetric;
import de.jplag.reporting.reportobject.model.TopComparison;

public class MetricMapperTest {
Expand All @@ -39,7 +40,8 @@ public void test_getDistributions() {
Map<String, List<Integer>> result = MetricMapper.getDistributions(jPlagResult);

// then
Assertions.assertEquals(Map.of("AVG", EXPECTED_AVG_DISTRIBUTION, "MAX", EXPECTED_MAX_DISTRIBUTION), result);
Assertions.assertEquals(EXPECTED_AVG_DISTRIBUTION, result.get("AVG"));
Assertions.assertEquals(EXPECTED_MAX_DISTRIBUTION, result.get("MAX"));
}

@Test
Expand All @@ -58,9 +60,8 @@ public void test_getTopComparisons() {
}

private int[] distribution(List<Integer> expectedDistribution) {
var reversedDistribution = new ArrayList<>(expectedDistribution);
Collections.reverse(reversedDistribution);
return reversedDistribution.stream().mapToInt(Integer::intValue).toArray();
var distribution = new ArrayList<>(expectedDistribution);
return distribution.stream().mapToInt(Integer::intValue).toArray();
}

private CreateSubmission submission(String name) {
Expand All @@ -73,8 +74,8 @@ private Comparison comparison(CreateSubmission submission1, CreateSubmission sub

private JPlagResult createJPlagResult(int[] avgDistribution, int[] maxDistribution, Comparison... createComparisonsDto) {
JPlagResult jPlagResult = mock(JPlagResult.class);
doReturn(avgDistribution).when(jPlagResult).getSimilarityDistribution();
doReturn(maxDistribution).when(jPlagResult).getMaxSimilarityDistribution();
doReturn(Arrays.stream(avgDistribution).boxed().toList()).when(jPlagResult).calculateDistributionFor(SimilarityMetric.AVG);
doReturn(Arrays.stream(maxDistribution).boxed().toList()).when(jPlagResult).calculateDistributionFor(SimilarityMetric.MAX);

JPlagOptions options = mock(JPlagOptions.class);
doReturn(createComparisonsDto.length).when(options).maximumNumberOfComparisons();
Expand Down
3 changes: 2 additions & 1 deletion report-viewer/src/model/Distribution.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ export class Distribution {
public splitIntoBuckets(bucketCount: BucketOptions): number[] {
const bucketArray = new Array<number>(bucketCount).fill(0)
const divisor = 100 / bucketCount
const reversedDistribution = Array.from(this._distribution).reverse()
for (let i = 99; i >= 0; i--) {
bucketArray[Math.floor(i / divisor)] += this._distribution[i]
bucketArray[Math.floor(i / divisor)] += reversedDistribution[i]
}
return bucketArray
}
Expand Down
9 changes: 5 additions & 4 deletions report-viewer/tests/unit/model/Distribution.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ import { describe, expect, it } from 'vitest'
import { Distribution } from '@/model/Distribution'

const distributionData = [
0, 7, 15, 42, 109, 225, 470, 869, 1442, 2052, 3025, 4056, 5091, 6130, 7023, 7292, 7445, 7177,
6343, 5373, 4309, 3163, 2244, 1544, 923, 493, 273, 168, 61, 31, 8, 12, 2, 1, 0, 1, 2, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0,
1, 2, 12, 8, 31, 61, 168, 273, 493, 923, 1544, 2244, 3163, 4309, 5373, 6343, 7177, 7445, 7292,
7023, 6130, 5091, 4056, 3025, 2052, 1442, 869, 470, 225, 109, 42, 15, 7, 0
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
]
const distribution = new Distribution(distributionData)

Expand All @@ -14,7 +14,8 @@ describe('Distribution', () => {
expect(distribution.splitIntoBuckets(10)).toEqual([0, 0, 0, 0, 0, 0, 26, 13209, 58955, 5231])
}),
it('get in 100 Buckets', () => {
expect(distribution.splitIntoBuckets(100)).toEqual(distributionData)
const reversedOriginal = Array.from(distributionData).reverse()
expect(distribution.splitIntoBuckets(100)).toEqual(reversedOriginal)
}),
it('get in 25 Buckets', () => {
expect(distribution.splitIntoBuckets(25)).toEqual([
Expand Down
Loading