Skip to content

Commit

Permalink
Add support for null value cells in PseudoField´s process and preproc…
Browse files Browse the repository at this point in the history
…essor streams (#77)

* Add examples calls with cells containing null values

* Handle null values in process and preprocessor

* Add tests with null values in process and preprocessor

* Update example description (fnr -> value)

* Remove unused import 'io.micronaut.core.async.publisher.Publishers'
  • Loading branch information
Andilun authored Jan 9, 2024
1 parent 328e78f commit 01afcc7
Show file tree
Hide file tree
Showing 7 changed files with 178 additions and 25 deletions.
25 changes: 25 additions & 0 deletions doc/requests/examples-daead.http
Original file line number Diff line number Diff line change
Expand Up @@ -337,4 +337,29 @@ Content-Type: application/json
< ../../src/test/resources/data/person_3.json
--separator--

### Null value: Pseudonymize using DAEAD and default key
POST {{base_url}}/pseudonymize/file
Content-Type: multipart/form-data; boundary=separator
Authorization: Bearer {{keycloak_token}}

--separator
Content-Disposition: form-data; name="request"
Content-Type: application/json

{
"targetContentType": "application/json",
"pseudoConfig": {
"rules": [
{
"pattern": "**/{fnr,fornavn,etternavn}",
"func": "daead(keyId=ssb-common-key-1)"
}
]
}
}
--separator
Content-Disposition: form-data; name="data"; filename="null_fnr.json"
Content-Type: application/json

< ../../src/test/resources/data/null_fnr.json
--separator--
34 changes: 33 additions & 1 deletion doc/requests/examples-field.http
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,36 @@ Authorization: Bearer {{keycloak_token}}
],
"pseudoFunc": "map-sid(keyId=papis-key-1)"
}
}
}

### Null value: DEAD with default keyset
POST {{base_url}}/pseudonymize/field
Content-Type: application/json
Authorization: Bearer {{keycloak_token}}

{
"request": {
"name": "empty",
"values": [
"20859374701",
null
]
}
}

### Null value: Map field to SID and pseudonymize SID values
POST {{base_url}}/pseudonymize/field
Content-Type: application/json
Authorization: Bearer {{keycloak_token}}

{
"request": {
"name": "fnr",
"values": [
"20859374701",
null
],
"pseudoFunc": "map-sid(keyId=papis-key-1)"
}
}

26 changes: 26 additions & 0 deletions doc/requests/examples-sid.http
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,29 @@ Content-Type: application/json
< ../../src/test/resources/data/person_3_sid_deid.json
--separator--

### Null value: Map to SID and pseudonymize SID values with papis-key
POST {{base_url}}/pseudonymize/file
Content-Type: multipart/form-data; boundary=separator
Authorization: Bearer {{keycloak_token}}

--separator
Content-Disposition: form-data; name="request"
Content-Type: application/json

{
"targetContentType": "application/json",
"pseudoConfig": {
"rules": [
{
"pattern": "**/{fnr,fornavn,etternavn}",
"func": "map-sid(keyId=papis-key-1)"
}
]
}
}
--separator
Content-Disposition: form-data; name="data"; filename="null_fnr.json"
Content-Type: application/json

< ../../src/test/resources/data/null_fnr.json
--separator--
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import io.micronaut.scheduling.TaskExecutors;
import io.micronaut.scheduling.annotation.ExecuteOn;
import io.micronaut.security.annotation.Secured;
import io.micronaut.security.rules.SecurityRule;
import io.reactivex.Completable;
import io.reactivex.Flowable;
import io.reactivex.Single;
Expand Down Expand Up @@ -73,13 +72,13 @@ public class PseudoController {
@Produces(MediaType.APPLICATION_JSON)
@Post(value = "/pseudonymize/field", consumes = MediaType.APPLICATION_JSON)
@ExecuteOn(TaskExecutors.IO)
public HttpResponse<Flowable> pseudonymizeField(@Schema(implementation = PseudoFieldRequest.class) String request) {
public HttpResponse<Flowable<List<Object>>> pseudonymizeField(@Schema(implementation = PseudoFieldRequest.class) String request) {
try {
PseudoFieldRequest req = Json.toObject(PseudoFieldRequest.class, request);
log.info(Strings.padEnd(String.format("*** Pseudonymize field: %s ", req.getName()), 80, '*'));
PseudoField pseudoField = new PseudoField(req.getName(), req.getPseudoFunc(), req.getKeyset());

MutableHttpResponse mutableHttpResponse = HttpResponse.ok(pseudoField.process(pseudoConfigSplitter, recordProcessorFactory,req.values));
MutableHttpResponse<Flowable<List<Object>>> mutableHttpResponse = HttpResponse.ok(pseudoField.process(pseudoConfigSplitter, recordProcessorFactory,req.values));

// Add metadata to header
mutableHttpResponse.getHeaders().add("metadata", pseudoField
Expand Down
41 changes: 23 additions & 18 deletions src/main/java/no/ssb/dlp/pseudo/service/pseudo/PseudoField.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
import no.ssb.dlp.pseudo.core.map.RecordMapProcessor;
import no.ssb.dlp.pseudo.core.tink.model.EncryptedKeysetWrapper;

import java.util.List;
import java.util.Map;
import java.util.*;

/**
* Represents a field to be pseudonymized.
Expand Down Expand Up @@ -55,29 +54,35 @@ public PseudoField(String name, String pseudoFunc, EncryptedKeysetWrapper keyset
* @param recordProcessorFactory The RecordMapProcessorFactory instance to use for creating a new PseudonymizeRecordProcessor.
* @return A Flowable stream that processes the field values by applying the configured pseudo rules, and returns them as a lists of strings.
*/
public Flowable<List<String>> process(PseudoConfigSplitter pseudoConfigSplitter, RecordMapProcessorFactory recordProcessorFactory, List<String> values) {
public Flowable<List<Object>> process(PseudoConfigSplitter pseudoConfigSplitter, RecordMapProcessorFactory recordProcessorFactory, List<String> values) {
List<PseudoConfig> pseudoConfigs = pseudoConfigSplitter.splitIfNecessary(this.getPseudoConfig());

RecordMapProcessor recordMapProcessor = recordProcessorFactory.newPseudonymizeRecordProcessor(pseudoConfigs);
Completable preprocessor = getPreprocessor(values, recordMapProcessor);

return preprocessor.andThen(Flowable.fromIterable(() -> values.stream().iterator())
.map(value -> recordMapProcessor.process(Map.of(this.getName(), value))
.get(this.getName()).toString())
.buffer(BUFFER_SIZE));
return preprocessor.andThen(Flowable.fromIterable(() ->
values.stream().map(value -> {
if (value == null) {
return Optional.ofNullable(null);
}
return recordMapProcessor.process(Map.of(this.getName(), value)).get(this.getName()).toString();
}).iterator()).buffer(BUFFER_SIZE));
}

private Completable getPreprocessor(List<String> values, RecordMapProcessor recordMapProcessor) {
protected Completable getPreprocessor(List<String> values, RecordMapProcessor recordMapProcessor) {
if (recordMapProcessor.hasPreprocessors()) {
return Completable.fromPublisher(Flowable.fromIterable(() -> values.stream().iterator())
.map(value -> recordMapProcessor.init(Map.of(this.getName(), value))
.get(this.getName()).toString()));
return Completable.fromPublisher(Flowable.fromIterable(() ->
values.stream().map(value -> {
if (value == null) {
return Optional.ofNullable(null);
}
return recordMapProcessor.init(Map.of(this.getName(), value));
}).iterator()));
} else {
return Completable.complete();
}
}


/**
* Creates a {@link PseudoFieldMetadata} object with the metadata about the preformed pseudo operations.
*
Expand All @@ -96,13 +101,13 @@ class PseudoFieldMetadata {
private String fieldName;
private PseudoConfig pseudoRules;

/**
* Converts the {@link PseudoFieldMetadata} object to a JSON string.
*
* @return A {@link String} representing the JSON representation of the PseudoFieldMetadata
* @throws JsonProcessingException if an error occurs during JSON processing
*/
public String toJsonString() throws JsonProcessingException {
/**
* Converts the {@link PseudoFieldMetadata} object to a JSON string.
*
* @return A {@link String} representing the JSON representation of the PseudoFieldMetadata
* @throws JsonProcessingException if an error occurs during JSON processing
*/
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(this);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,32 @@
package no.ssb.dlp.pseudo.service.pseudo;

import io.micronaut.test.extensions.junit5.annotation.MicronautTest;
import io.reactivex.Completable;
import io.reactivex.Flowable;
import io.reactivex.Single;
import no.ssb.dlp.pseudo.core.field.FieldDescriptor;
import no.ssb.dlp.pseudo.core.field.FieldPseudonymizer;
import io.reactivex.observers.TestObserver;
import no.ssb.dlp.pseudo.core.map.RecordMapProcessor;
import no.ssb.dlp.pseudo.core.tink.model.EncryptedKeysetWrapper;
import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;

import java.util.*;

import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.*;

@MicronautTest
class PseudoFieldTest {

@Mock
private PseudoConfigSplitter pseudoConfigSplitter;

@Mock
private RecordMapProcessorFactory recordProcessorFactory;

@Mock
private RecordMapProcessor recordMapProcessor;

@Test
void UsesDefaultPseudoConfigWhenNoKeysetIsSupplied() {
PseudoField pseudoField = new PseudoField(null, null, null);
Expand All @@ -38,4 +51,56 @@ void setCustomKeysetWhenKeysetIsSupplied() {
assertEquals(encryptedKeysetWrapper,
pseudoField.getPseudoConfig().getKeysets().get(0));
}

void setUpProcessorMocks() {
MockitoAnnotations.openMocks(this);
when(pseudoConfigSplitter.splitIfNecessary(any())).thenReturn(Collections.singletonList(new PseudoConfig()));
when(recordProcessorFactory.newPseudonymizeRecordProcessor(any())).thenReturn(recordMapProcessor);
}

@Test
void processWithNullValues() {
setUpProcessorMocks();

//Preprocessor logic is covered in #preprocessorWithNullValues
when(recordMapProcessor.hasPreprocessors()).thenReturn(false);

when(recordMapProcessor.process(any())).thenAnswer(invocation -> {
Map<String, String> argument = invocation.getArgument(0);
String originalValue = argument.get("testField");
return Collections.singletonMap("testField", "processedValue " + originalValue);
});

PseudoField pseudoField = new PseudoField("testField", null, null);
List<String> values = Arrays.asList("v1", null, "v2");

Flowable<List<Object>> result = pseudoField.process(pseudoConfigSplitter, recordProcessorFactory, values);
List<List<Object>> resultList = result.toList().blockingGet();
assertEquals(List.of(List.of("processedValue v1", Optional.empty(), "processedValue v2")), resultList);
assertEquals(3, resultList.get(0).size());

// Verify that recordMapProcessor was called once for each non-null value
verify(recordMapProcessor, times(2)).process(any());
}

@Test
void preprocessorWithNullValues() {
setUpProcessorMocks();

when(recordMapProcessor.hasPreprocessors()).thenReturn(true);
when(recordMapProcessor.init(any())).thenReturn(Collections.singletonMap("testField", "initializedValue"));

PseudoField pseudoField = new PseudoField("testField", null, null);
List<String> values = Arrays.asList("v1", null, "v2");

Completable result = pseudoField.getPreprocessor(values, recordMapProcessor);

TestObserver<Void> testObserver = result.test();
testObserver.assertComplete();
testObserver.assertNoErrors();

// Verify that recordMapProcessor was called once for each non-null value
verify(recordMapProcessor, times(2)).init(any());
}

}
1 change: 1 addition & 0 deletions src/test/resources/data/null_fnr.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"fnr":{"0":"11854898347","1":"16910599481","2":null},"fornavn":{"0":"Donald","1":"Mikke","2":"Anton"},"etternavn":{"0":"Duck","1":"Mus","2":"Duck"},"kjonn":{"0":"M","1":"M","2":"M"},"fodselsdato":{"0":20995,"1":60970,"2":180999}}

0 comments on commit 01afcc7

Please sign in to comment.