Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of radix trie #153

Merged
merged 9 commits into from
Jan 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion build.gradle.kts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import net.ltgt.gradle.errorprone.errorprone
import org.gradle.nativeplatform.platform.internal.DefaultNativePlatform
import org.sonarqube.gradle.SonarTask

plugins {
Expand Down Expand Up @@ -174,14 +175,22 @@ tasks {
"RedundantOverride",
"RedundantThrows",
"RemoveUnusedImports",
"DefaultCharset",
"UnnecessarilyFullyQualified",
"UnnecessarilyUsedValue",
"UnnecessaryBoxedAssignment",
"UnnecessaryBoxedVariable",
"UnnecessaryFinal",
"UnusedException",
"UnusedLabel",
Breus marked this conversation as resolved.
Show resolved Hide resolved
"UnusedMethod",
"UnusedNestedClass",
"UnusedVariable",
"WildcardImport",
)
if (DefaultNativePlatform.getCurrentOperatingSystem().isWindows) {
disable("MisleadingEscapedSpace") // good stuff
Breus marked this conversation as resolved.
Show resolved Hide resolved
}
disable(
"StringCaseLocaleUsage",
"MissingSummary",
Expand All @@ -208,4 +217,4 @@ tasks {
withType<SonarTask> {
dependsOn(jacocoTestReport)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public int countBytes(State state) {

@Benchmark
public void writeFile(State state) throws IOException {
try (FileWriter fileWriter = new FileWriter("file.json")) {
try (FileWriter fileWriter = new FileWriter("file.json", StandardCharsets.UTF_8)) {
fileWriter.write(state.jsonString);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public synchronized void setup() throws IOException {
json = BenchmarkUtils.randomJson(targetKeys, jsonSize, "unicode", 0.1).getBytes(StandardCharsets.UTF_8);

// prepare an input file for FileStreams
try (FileWriter inputFileWriter = new FileWriter(INPUT_FILE_STREAM_NAME)) {
try (FileWriter inputFileWriter = new FileWriter(INPUT_FILE_STREAM_NAME, StandardCharsets.UTF_8)) {
inputFileWriter.write(new String(json, StandardCharsets.UTF_8));
inputFileWriter.flush();
}
Expand Down
117 changes: 117 additions & 0 deletions src/main/java/dev/blaauwendraad/masker/json/JsonPathTracker.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package dev.blaauwendraad.masker.json;

import org.jspecify.annotations.Nullable;

import java.util.ArrayDeque;

/**
* Tracks the currently matched JSONPath segments in the radix trie during masking.
*/
class JsonPathTracker {
/**
* A node representing {@code null} to satisfy {@link ArrayDeque}, which does not support it.
*/
private static final KeyMatcher.RadixTriePointer NULL_NODE = new KeyMatcher.RadixTriePointer(new KeyMatcher.RadixTrieNode(new byte[0], new byte[0]), 0);

private final KeyMatcher keyMatcher;
/**
* Stack of the segments that reflects the JSON nesting level during matching. If the current JSON key has been
* matched against the current JSONPath segment, the {@link KeyMatcher.RadixTriePointer} node is added on top of the
* stack. If the current JSON key didn't match the current JSONPath segment, the {@link JsonPathTracker#NULL_NODE}
* is added instead to keep track of the nesting level.
*/
private final ArrayDeque<KeyMatcher.RadixTriePointer> jsonPathSegments = new ArrayDeque<>();

JsonPathTracker(KeyMatcher keyMatcher) {
this.keyMatcher = keyMatcher;
var root = keyMatcher.getRootNode();
// The first character is always the '$' character, which is essentially skipped here.
if (!root.descent((byte) '$')) {
throw new IllegalStateException("JSONPath root node is null");
}
Breus marked this conversation as resolved.
Show resolved Hide resolved
this.jsonPathSegments.push(root.checkpoint());
root.reset();
}

/**
* Expands the current tracked JSONPath with an array segment.
*/
void pushArraySegment() {
jsonPathSegments.push(getWildcardNodeOrNullNode());
}

/**
* Expands the current tracked JSONPath with a value segment.
*/
void pushKeyValueSegment(byte[] bytes, int keyOffset, int keyLength) {
jsonPathSegments.push(getKeyValueNodeOrNullNode(bytes, keyOffset, keyLength));
}

/**
* Backtracks the current tracked JSONPath to the previous segment.
*/
void backtrack() {
jsonPathSegments.pop();
}

/**
* Traverse the trie node when entering an array. In order to match the array it has to be a wildcard.
* <p>For example:
* For a JSON like this {@code { "holder": [ { "maskMe": "secret" } } } the matching JSONPath has to be
* {@code '$.holder.*.maskMe'}, so that entering the array requires a wildcard node.
*/
private KeyMatcher.RadixTriePointer getWildcardNodeOrNullNode() {
var current = currentNode();
if (current == null) {
return NULL_NODE;
}
try {
if (!current.descent((byte) '.')) {
return NULL_NODE;
}
if (current.isJsonPathWildcard()) {
current.descent((byte) '*');
return current.checkpoint();
}
return NULL_NODE;
} finally {
current.reset();
}
}

/**
* Traverse the trie node when entering a key-value. The matching can be done for the matching key, or through a
* wildcard ('*') JSONPath.
*/
private KeyMatcher.RadixTriePointer getKeyValueNodeOrNullNode(byte[] bytes, int keyOffset, int keyLength) {
var current = currentNode();
if (current == null) {
return NULL_NODE;
}
try {
if (!current.descent((byte) '.')) {
return NULL_NODE;
}
if (current.isJsonPathWildcard()) {
current.descent((byte) '*');
return current.checkpoint();
} else {
var child = keyMatcher.traverseFrom(current, bytes, keyOffset, keyLength);
if (child != null) {
return child.checkpoint();
}
}
return NULL_NODE;
} finally {
current.reset();
}
}

KeyMatcher.@Nullable RadixTriePointer currentNode() {
var peek = jsonPathSegments.peek();
if (peek == NULL_NODE) {
return null;
}
return peek;
}
}
69 changes: 43 additions & 26 deletions src/main/java/dev/blaauwendraad/masker/json/KeyContainsMasker.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,18 @@ public void mask(InputStream inputStream, OutputStream outputStream) {
private void mask(MaskingState maskingState) {
try {
KeyMaskingConfig keyMaskingConfig = maskingConfig.isInAllowMode() ? maskingConfig.getDefaultConfig() : null;
if (maskingState.jsonPathEnabled()) {
maskingState.expandCurrentJsonPath(keyMatcher.getJsonPathRootNode());
keyMaskingConfig = keyMatcher.getMaskConfigIfMatched(maskingState.getMessage(), -1, -1, maskingState.getCurrentJsonPathNode());

JsonPathTracker jsonPathTracker;
if (!maskingConfig.getTargetJsonPaths().isEmpty()) {
jsonPathTracker = new JsonPathTracker(keyMatcher);
keyMaskingConfig = keyMatcher.getMaskConfigIfMatched(maskingState.getMessage(), -1, -1, jsonPathTracker.currentNode());
} else {
jsonPathTracker = null;
}

while (!maskingState.endOfJson()) {
stepOverWhitespaceCharacters(maskingState);
if (!visitValue(maskingState, keyMaskingConfig)) {
if (!visitValue(maskingState, jsonPathTracker, keyMaskingConfig)) {
maskingState.next();
}
}
Expand All @@ -86,19 +90,19 @@ private void mask(MaskingState maskingState) {
* Entrypoint of visiting any value (object, array or primitive) in the JSON.
*
* @param maskingState the current masking state
* @param jsonPathTracker the current {@link JsonPathTracker}
* @param keyMaskingConfig if not null it means that the current value is being masked otherwise the value is not
* being masked
*
* @return whether a value was found, if returned false the calling code must advance to avoid infinite loops
*/
private boolean visitValue(MaskingState maskingState, @Nullable KeyMaskingConfig keyMaskingConfig) {
private boolean visitValue(MaskingState maskingState, @Nullable JsonPathTracker jsonPathTracker, @Nullable KeyMaskingConfig keyMaskingConfig) {
if (maskingState.endOfJson()) {
return true;
}
// using switch-case over 'if'-statements to improve performance by ~20% (measured in benchmarks)
switch (maskingState.byteAtCurrentIndex()) {
case '[' -> visitArray(maskingState, keyMaskingConfig);
case '{' -> visitObject(maskingState, keyMaskingConfig);
case '[' -> visitArray(maskingState, jsonPathTracker, keyMaskingConfig);
case '{' -> visitObject(maskingState, jsonPathTracker, keyMaskingConfig);
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> {
if (keyMaskingConfig != null) {
maskNumber(maskingState, keyMaskingConfig);
Expand Down Expand Up @@ -136,23 +140,26 @@ private boolean visitValue(MaskingState maskingState, @Nullable KeyMaskingConfig
}

/**
* Visits an array of unknown values (or empty) and invokes {@link #visitValue(MaskingState, KeyMaskingConfig)} on
* each element while propagating the {@link KeyMaskingConfig}.
* Visits an array of unknown values (or empty) and invokes {@link #visitValue(MaskingState, JsonPathTracker, KeyMaskingConfig)}
* on each element while propagating the {@link KeyMaskingConfig}.
*
* @param maskingState the current {@link MaskingState}
* @param keyMaskingConfig if not null it means that the current value is being masked according to the
* @param jsonPathTracker the current {@link JsonPathTracker}
* @param keyMaskingConfig if not {@code null}, it means that the current value is being masked according to the
* {@link KeyMaskingConfig}. Otherwise, the value is not masked
*/
private void visitArray(MaskingState maskingState, @Nullable KeyMaskingConfig keyMaskingConfig) {
maskingState.expandCurrentJsonPath(keyMatcher.traverseJsonPathSegment(maskingState.getMessage(), maskingState.getCurrentJsonPathNode(), -1, -1));
private void visitArray(MaskingState maskingState, @Nullable JsonPathTracker jsonPathTracker, @Nullable KeyMaskingConfig keyMaskingConfig) {
if (jsonPathTracker != null) {
jsonPathTracker.pushArraySegment();
}
while (maskingState.next()) {
stepOverWhitespaceCharacters(maskingState);
// check if we're in an empty array
if (maskingState.byteAtCurrentIndex() == ']') {
break;
}

visitValue(maskingState, keyMaskingConfig);
visitValue(maskingState, jsonPathTracker, keyMaskingConfig);

stepOverWhitespaceCharacters(maskingState);
// check if we're at the end of a (non-empty) array
Expand All @@ -161,22 +168,25 @@ private void visitArray(MaskingState maskingState, @Nullable KeyMaskingConfig ke
}
}
maskingState.next(); // step over array closing square bracket
maskingState.backtrackCurrentJsonPath();
if (jsonPathTracker != null) {
jsonPathTracker.backtrack();
}
}

/**
* Visits an object, iterates over the keys and checks whether key needs to be masked (if
* {@link JsonMaskingConfig.TargetKeyMode#MASK}) or allowed (if {@link JsonMaskingConfig.TargetKeyMode#ALLOW}). For
* each value, invokes {@link #visitValue(MaskingState, KeyMaskingConfig)} with a non-null {@link KeyMaskingConfig}
* each value, invokes {@link #visitValue(MaskingState, JsonPathTracker, KeyMaskingConfig)} with a non-null {@link KeyMaskingConfig}
* (when key needs to be masked) or {@code null} (when key is allowed). Whenever 'parentKeyMaskingConfig' is
* supplied, it means that the object with all its keys is being masked. The only situation when the individual
* values do not need to be masked is when the key is explicitly allowed (in allow mode).
*
* @param maskingState the current {@link MaskingState}
* @param jsonPathTracker the current {@link JsonPathTracker}
* @param parentKeyMaskingConfig if not null it means that the current value is being masked according to the
* {@link KeyMaskingConfig}. Otherwise, the value is not being masked
*/
private void visitObject(MaskingState maskingState, @Nullable KeyMaskingConfig parentKeyMaskingConfig) {
private void visitObject(MaskingState maskingState, @Nullable JsonPathTracker jsonPathTracker, @Nullable KeyMaskingConfig parentKeyMaskingConfig) {
while (maskingState.next()) {
stepOverWhitespaceCharacters(maskingState);
// check if we're in an empty object
Expand All @@ -188,12 +198,17 @@ private void visitObject(MaskingState maskingState, @Nullable KeyMaskingConfig p

stepOverStringValue(maskingState);

int keyStartIndex = maskingState.getCurrentTokenStartIndex();
int afterClosingQuoteIndex = maskingState.currentIndex();
int keyLength = afterClosingQuoteIndex - keyStartIndex - 2; // minus the opening and closing quotes
maskingState.expandCurrentJsonPath(keyMatcher.traverseJsonPathSegment(maskingState.getMessage(), maskingState.getCurrentJsonPathNode(), keyStartIndex + 1, keyLength));
KeyMaskingConfig keyMaskingConfig = keyMatcher.getMaskConfigIfMatched(maskingState.getMessage(), keyStartIndex + 1, // plus one for the opening quote
keyLength, maskingState.getCurrentJsonPathNode());
int keyStartIndex = maskingState.getCurrentTokenStartIndex() + 1; // plus the opening quote
int keyLength = maskingState.currentIndex() - keyStartIndex - 1; // minus the closing quote
KeyMaskingConfig keyMaskingConfig;
if (jsonPathTracker != null) {
jsonPathTracker.pushKeyValueSegment(maskingState.getMessage(), keyStartIndex, keyLength);
keyMaskingConfig = keyMatcher.getMaskConfigIfMatched(maskingState.getMessage(), keyStartIndex, keyLength, jsonPathTracker.currentNode());
} else {
keyMaskingConfig = keyMatcher.getMaskConfigIfMatched(maskingState.getMessage(), keyStartIndex, keyLength, null);
}


maskingState.clearTokenStartIndex();
stepOverWhitespaceCharacters(maskingState);
// step over the colon ':'
Expand All @@ -208,15 +223,17 @@ private void visitObject(MaskingState maskingState, @Nullable KeyMaskingConfig p
} else {
// this is where it might get confusing - this method is called when the whole object is being masked
// if we got a maskingConfig for the key - we need to mask this key with that config. However, if the config
// we got was the default config, then it means that the key doesn't have a specific configuration and
// we got was the default config, then it means that the key doesn't have a specific configuration, and
// we should fall back to key specific config that the object is being masked with.
// E.g.: '{ "a": { "b": "value" } }' we want to use config of 'b' if any, but fallback to config of 'a'
if (parentKeyMaskingConfig != null && (keyMaskingConfig == null || keyMaskingConfig == maskingConfig.getDefaultConfig())) {
keyMaskingConfig = parentKeyMaskingConfig;
}
visitValue(maskingState, keyMaskingConfig);
visitValue(maskingState, jsonPathTracker, keyMaskingConfig);
}
if (jsonPathTracker != null) {
jsonPathTracker.backtrack();
}
maskingState.backtrackCurrentJsonPath();

stepOverWhitespaceCharacters(maskingState);
// check if we're at the end of a (non-empty) object
Expand Down
Loading
Loading