Skip to content

Commit

Permalink
adds allowed phrases to transform function
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzelei committed Oct 23, 2024
1 parent ec45be0 commit f0fbbc7
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 1 deletion.
27 changes: 26 additions & 1 deletion internal/ee/transformers/functions/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

mgmtv1alpha1 "github.com/nucleuscloud/neosync/backend/gen/go/protos/mgmt/v1alpha1"
presidioapi "github.com/nucleuscloud/neosync/internal/ee/presidio"
transformer_utils "github.com/nucleuscloud/neosync/worker/pkg/benthos/transformers/utils"
)

var (
Expand Down Expand Up @@ -40,6 +41,8 @@ func TransformPiiText(
return "", fmt.Errorf("received non-200 response from analyzer: %s %d %s", analyzeResp.Status(), analyzeResp.StatusCode(), string(analyzeResp.Body))
}

analysisResults := removeAllowedPhrases(*analyzeResp.JSON200, value, config.GetAllowedPhrases())

defaultAnon, ok, err := getDefaultAnonymizer(config.GetDefaultAnonymizer())
if err != nil {
return "", fmt.Errorf("unable to build default anonymizer: %w", err)
Expand All @@ -49,7 +52,7 @@ func TransformPiiText(
anonymizers["DEFAULT"] = *defaultAnon
}
anonResp, err := anonymizeClient.PostAnonymizeWithResponse(ctx, presidioapi.AnonymizeRequest{
AnalyzerResults: presidioapi.ToAnonymizeRecognizerResults(*analyzeResp.JSON200),
AnalyzerResults: presidioapi.ToAnonymizeRecognizerResults(analysisResults),
Text: value,
Anonymizers: &anonymizers,
})
Expand All @@ -63,6 +66,28 @@ func TransformPiiText(
return *anonResp.JSON200.Text, nil
}

func removeAllowedPhrases(
results []presidioapi.RecognizerResultWithAnaysisExplanation,
text string,
allowedPhrases []string,
) []presidioapi.RecognizerResultWithAnaysisExplanation {
output := []presidioapi.RecognizerResultWithAnaysisExplanation{}
uniquePhrases := transformer_utils.ToSet(allowedPhrases)
textLen := len(text)
for _, result := range results {
if result.Start < 0 || result.End > textLen {
continue // Skip invalid ranges
}

phrase := text[result.Start:result.End]
if _, ok := uniquePhrases[phrase]; !ok {
output = append(output, result)
}
}

return output
}

func buildAdhocRecognizers(dtos []*mgmtv1alpha1.PiiDenyRecognizer) []presidioapi.PatternRecognizer {
output := []presidioapi.PatternRecognizer{}
for _, dto := range dtos {
Expand Down
54 changes: 54 additions & 0 deletions internal/ee/transformers/functions/functions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,60 @@ func Test_TransformPiiText(t *testing.T) {
})
}

func Test_removeAllowedPhrases(t *testing.T) {
t.Run("exact", func(t *testing.T) {
actual := removeAllowedPhrases(
[]presidioapi.RecognizerResultWithAnaysisExplanation{
{
Start: 11,
End: 24,
Score: 0.85,
EntityType: "person",
RecognitionMetadata: &presidioapi.RecognizedMetadata{},
},
},
"My name is Inigo Montoya prepare to die",
[]string{"Inigo Montoya"},
)
require.Empty(t, actual)
})

t.Run("invalid_range_skip", func(t *testing.T) {
actual := removeAllowedPhrases(
[]presidioapi.RecognizerResultWithAnaysisExplanation{
{
Start: 500,
End: 600,
Score: 0.85,
EntityType: "person",
RecognitionMetadata: &presidioapi.RecognizedMetadata{},
},
},
"My name is Inigo Montoya prepare to die",
[]string{"Inigo Montoya"},
)
require.Empty(t, actual)
})

t.Run("not_found", func(t *testing.T) {
input := []presidioapi.RecognizerResultWithAnaysisExplanation{
{
Start: 11,
End: 24,
Score: 0.85,
EntityType: "person",
RecognitionMetadata: &presidioapi.RecognizedMetadata{},
},
}
actual := removeAllowedPhrases(
input,
"My name is Inigo Montoya prepare to die",
[]string{"Inigo"},
)
require.Equal(t, input, actual)
})
}

// func Test_TransformBloblPiiText(t *testing.T) {
// env := bloblang.NewEmptyEnvironment()
// mockanalyze := presidioapi.NewMockAnalyzeInterface(t)
Expand Down

0 comments on commit f0fbbc7

Please sign in to comment.