Skip to content

Commit

Permalink
Merge pull request #18 from aboutcode-org/add-test
Browse files Browse the repository at this point in the history
Create snippet test for similarity ratio
  • Loading branch information
JonoYang authored Dec 11, 2024
2 parents 7dcee68 + 32066cb commit 8e23645
Show file tree
Hide file tree
Showing 3 changed files with 210 additions and 0 deletions.
26 changes: 26 additions & 0 deletions tests/test_fingerprinting.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,3 +234,29 @@ def test_snippets_similarity(self, regen=False):
)
expected_results_loc = self.get_test_loc("snippet-similarity-expected.json")
check_against_expected_json_file(results, expected_results_loc, regen=regen)

def test_snippets_similarity_2(self, regen=False):
# index-modified.js is index.js with a function removed
test_file1 = self.get_test_loc("snippets/index.js")
test_file2 = self.get_test_loc("snippets/index-modified.js")
results1 = get_file_fingerprint_hashes(test_file1, include_ngrams=True)
results2 = get_file_fingerprint_hashes(test_file2, include_ngrams=True)
results1_snippets = results1.get("snippets")
results2_snippets = results2.get("snippets")

results1_snippet_mappings_by_snippets = self._create_snippet_mappings_by_snippets(
results1_snippets
)
results2_snippet_mappings_by_snippets = self._create_snippet_mappings_by_snippets(results2_snippets)

matching_snippets = (
results1_snippet_mappings_by_snippets.keys() & results2_snippet_mappings_by_snippets.keys()
)

# jaccard coefficient
jc = len(matching_snippets) / ((len(results1_snippets) + len(results2_snippets)) / 2)

assert jc == 0.9666666666666667
assert len(results1_snippets) == 61
assert len(results2_snippets) == 59
assert len(matching_snippets) == 58
90 changes: 90 additions & 0 deletions tests/testfiles/fingerprinting/snippets/index-modified.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
var pSlice = Array.prototype.slice;
var objectKeys = require('./lib/keys.js');
var isArguments = require('./lib/is_arguments.js');

var deepEqual = module.exports = function (actual, expected, opts) {
if (!opts) opts = {};
// 7.1. All identical values are equivalent, as determined by ===.
if (actual === expected) {
return true;

} else if (actual instanceof Date && expected instanceof Date) {
return actual.getTime() === expected.getTime();

// 7.3. Other pairs that do not both pass typeof value == 'object',
// equivalence is determined by ==.
} else if (!actual || !expected || typeof actual != 'object' && typeof expected != 'object') {
return opts.strict ? actual === expected : actual == expected;

// 7.4. For all other Object pairs, including Array objects, equivalence is
// determined by having the same number of owned properties (as verified
// with Object.prototype.hasOwnProperty.call), the same set of keys
// (although not necessarily the same order), equivalent values for every
// corresponding key, and an identical 'prototype' property. Note: this
// accounts for both named and indexed properties on Arrays.
} else {
return objEquiv(actual, expected, opts);
}
}

function isBuffer (x) {
if (!x || typeof x !== 'object' || typeof x.length !== 'number') return false;
if (typeof x.copy !== 'function' || typeof x.slice !== 'function') {
return false;
}
if (x.length > 0 && typeof x[0] !== 'number') return false;
return true;
}

function objEquiv(a, b, opts) {
var i, key;
if (isUndefinedOrNull(a) || isUndefinedOrNull(b))
return false;
// an identical 'prototype' property.
if (a.prototype !== b.prototype) return false;
//~~~I've managed to break Object.keys through screwy arguments passing.
// Converting to array solves the problem.
if (isArguments(a)) {
if (!isArguments(b)) {
return false;
}
a = pSlice.call(a);
b = pSlice.call(b);
return deepEqual(a, b, opts);
}
if (isBuffer(a)) {
if (!isBuffer(b)) {
return false;
}
if (a.length !== b.length) return false;
for (i = 0; i < a.length; i++) {
if (a[i] !== b[i]) return false;
}
return true;
}
try {
var ka = objectKeys(a),
kb = objectKeys(b);
} catch (e) {//happens when one is a string literal and the other isn't
return false;
}
// having the same number of owned properties (keys incorporates
// hasOwnProperty)
if (ka.length != kb.length)
return false;
//the same set of keys (although not necessarily the same order),
ka.sort();
kb.sort();
//~~~cheap key test
for (i = ka.length - 1; i >= 0; i--) {
if (ka[i] != kb[i])
return false;
}
//equivalent values for every corresponding key, and
//~~~possibly expensive deep test
for (i = ka.length - 1; i >= 0; i--) {
key = ka[i];
if (!deepEqual(a[key], b[key], opts)) return false;
}
return typeof a === typeof b;
}
94 changes: 94 additions & 0 deletions tests/testfiles/fingerprinting/snippets/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
var pSlice = Array.prototype.slice;
var objectKeys = require('./lib/keys.js');
var isArguments = require('./lib/is_arguments.js');

var deepEqual = module.exports = function (actual, expected, opts) {
if (!opts) opts = {};
// 7.1. All identical values are equivalent, as determined by ===.
if (actual === expected) {
return true;

} else if (actual instanceof Date && expected instanceof Date) {
return actual.getTime() === expected.getTime();

// 7.3. Other pairs that do not both pass typeof value == 'object',
// equivalence is determined by ==.
} else if (!actual || !expected || typeof actual != 'object' && typeof expected != 'object') {
return opts.strict ? actual === expected : actual == expected;

// 7.4. For all other Object pairs, including Array objects, equivalence is
// determined by having the same number of owned properties (as verified
// with Object.prototype.hasOwnProperty.call), the same set of keys
// (although not necessarily the same order), equivalent values for every
// corresponding key, and an identical 'prototype' property. Note: this
// accounts for both named and indexed properties on Arrays.
} else {
return objEquiv(actual, expected, opts);
}
}

function isUndefinedOrNull(value) {
return value === null || value === undefined;
}

function isBuffer (x) {
if (!x || typeof x !== 'object' || typeof x.length !== 'number') return false;
if (typeof x.copy !== 'function' || typeof x.slice !== 'function') {
return false;
}
if (x.length > 0 && typeof x[0] !== 'number') return false;
return true;
}

function objEquiv(a, b, opts) {
var i, key;
if (isUndefinedOrNull(a) || isUndefinedOrNull(b))
return false;
// an identical 'prototype' property.
if (a.prototype !== b.prototype) return false;
//~~~I've managed to break Object.keys through screwy arguments passing.
// Converting to array solves the problem.
if (isArguments(a)) {
if (!isArguments(b)) {
return false;
}
a = pSlice.call(a);
b = pSlice.call(b);
return deepEqual(a, b, opts);
}
if (isBuffer(a)) {
if (!isBuffer(b)) {
return false;
}
if (a.length !== b.length) return false;
for (i = 0; i < a.length; i++) {
if (a[i] !== b[i]) return false;
}
return true;
}
try {
var ka = objectKeys(a),
kb = objectKeys(b);
} catch (e) {//happens when one is a string literal and the other isn't
return false;
}
// having the same number of owned properties (keys incorporates
// hasOwnProperty)
if (ka.length != kb.length)
return false;
//the same set of keys (although not necessarily the same order),
ka.sort();
kb.sort();
//~~~cheap key test
for (i = ka.length - 1; i >= 0; i--) {
if (ka[i] != kb[i])
return false;
}
//equivalent values for every corresponding key, and
//~~~possibly expensive deep test
for (i = ka.length - 1; i >= 0; i--) {
key = ka[i];
if (!deepEqual(a[key], b[key], opts)) return false;
}
return typeof a === typeof b;
}

0 comments on commit 8e23645

Please sign in to comment.