Skip to content

Commit

Permalink
Remove tokens and access keys from test data and docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
Raj725 committed Sep 13, 2024
1 parent ad4e67b commit 91da2ad
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 115 deletions.
10 changes: 4 additions & 6 deletions pebblo/entity_classifier/entity_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,12 @@ def presidio_entity_classifier_and_anonymizer(
Example:
input_text = " My SSN is 222-85-4836.
ITIN number 993-77 0690
And AWS Access Key is: AKIAQIPT4PDORIRTV6PH."
ITIN number 993-77 0690"
response:
entities = {'aws-access-key': 1, 'us-itin': 1, 'us-ssn': 1}
total_count = 3
entities = {'us-itin': 1, 'us-ssn': 1}
total_count = 2
anonymized_text = "My SSN is <US_SSN>.
ITIN number <US_ITIN>
And AWS Access Key is: <AWS_ACCESS_KEY>."
ITIN number <US_ITIN>"
"""
entities = {}
total_count = 0
Expand Down
3 changes: 0 additions & 3 deletions tests/entity_classifier/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Sachin's SSN is 222-85-4836.
ITIN number 993-77 0690
His AWS Access Key is: AKIAQIPT4PDORIRTV6PH.
And Github Token is: ghp_hgu657yiujgwfrtigu3ver238765tyuhygvtrder6t7gyvhbuy5e676578976tyghy76578uygfyfgcyturtdf
"""

input_text2 = """
Expand All @@ -19,11 +18,9 @@
Azure client secret : c4cb6f91-15a7-4e6d-a824-abcdef012345.
AWS Access Key is: AKIAQIPT4PDORIRTV6PH
AWS Secret Key is : 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
Github Token is: ghp_hgu657yiujgwfrtigu3ver238765tyuhygvtrder6t7gyvhbuy5e676578976tyghy76578uygfyfgcyturtdf
Google API key: zaCELgL0imfnc8mVLWwsAawjYr4Rx-Af50DDqtlx
Slack Token is: xoxp-7676545380258-uygh
Azure Client Secret - c4cb6f91-15a7-4e6d-a824-abcdef012345
Slack Token - xoxb-3204014939555-4519358291237-TTIf0243T8YFSAGEVr1wBrWE
Google API key- KLzaSyB_tWrbmfWx8g2bzL7Vhq7znuTUn0JPKmY"
My IP Address - 10.55.60.61
"""
Expand Down
110 changes: 4 additions & 106 deletions tests/entity_classifier/test_entity_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def mocked_entity_classifier_response(mocker):

anonymize_response1: Tuple[list, str] = (
[
TestAnonymizerResult("GITHUB_TOKEN"),
TestAnonymizerResult("AWS_ACCESS_KEY"),
TestAnonymizerResult("US_ITIN"),
TestAnonymizerResult("US_SSN"),
Expand All @@ -53,9 +52,6 @@ def mocked_entity_classifier_response(mocker):
)
anonymize_response2: Tuple[list, str] = (
[
TestAnonymizerResult("SLACK_TOKEN"),
TestAnonymizerResult("SLACK_TOKEN"),
TestAnonymizerResult("GITHUB_TOKEN"),
TestAnonymizerResult("AWS_SECRET_KEY"),
TestAnonymizerResult("AWS_ACCESS_KEY"),
TestAnonymizerResult("US_ITIN"),
Expand Down Expand Up @@ -87,11 +83,6 @@ def mocked_entity_classifier_response(mocker):
"location": "77_97",
"confidence_score": 0.8,
},
{
"entity_type": "GITHUB_TOKEN",
"location": "120_210",
"confidence_score": 0.8,
},
]
analyzed_entities_response2: List[dict] = [
{"entity_type": "US_SSN", "location": "17_25", "confidence_score": 0.85},
Expand All @@ -101,11 +92,6 @@ def mocked_entity_classifier_response(mocker):
"location": "72_88",
"confidence_score": 0.8,
},
{
"entity_type": "GITHUB_TOKEN",
"location": "111_125",
"confidence_score": 0.8,
},
]
analyzed_entities_response3: List[dict] = [
{
Expand All @@ -130,21 +116,6 @@ def mocked_entity_classifier_response(mocker):
"location": "1587_1628",
"confidence_score": 0.8,
},
{
"entity_type": "GITHUB_TOKEN",
"location": "1646_1736",
"confidence_score": 0.8,
},
{
"entity_type": "SLACK_TOKEN",
"location": "1812_1835",
"confidence_score": 0.8,
},
{
"entity_type": "SLACK_TOKEN",
"location": "1911_1968",
"confidence_score": 0.8,
},
{"entity_type": "IP_ADDRESS", "location": "1339_1355", "confidence_score": 0.8},
]
analyzed_entities_response4: List[dict] = [
Expand All @@ -170,21 +141,6 @@ def mocked_entity_classifier_response(mocker):
"location": "1559_1575",
"confidence_score": 0.8,
},
{
"entity_type": "GITHUB_TOKEN",
"location": "1593_1607",
"confidence_score": 0.8,
},
{
"entity_type": "SLACK_TOKEN",
"location": "1683_1696",
"confidence_score": 0.8,
},
{
"entity_type": "SLACK_TOKEN",
"location": "1772_1785",
"confidence_score": 0.8,
},
{"entity_type": "IP_ADDRESS", "location": "1339_1355", "confidence_score": 0.8},
]
analyzed_entities_negative_response1: List = []
Expand Down Expand Up @@ -230,12 +186,11 @@ def test_presidio_entity_classifier_and_anonymizer(
entity_details,
) = entity_classifier.presidio_entity_classifier_and_anonymizer(input_text1)
assert entities == {
"github-token": 1,
"aws-access-key": 1,
"us-itin": 1,
"us-ssn": 1,
}
assert total_count == 4
assert total_count == 3
assert anonymized_text == input_text1
assert entity_details == {
"us-ssn": [
Expand All @@ -259,13 +214,6 @@ def test_presidio_entity_classifier_and_anonymizer(
"entity_group": "secrets_and_tokens",
}
],
"github-token": [
{
"location": "120_210",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
}
],
}

(
Expand All @@ -275,12 +223,11 @@ def test_presidio_entity_classifier_and_anonymizer(
entity_details,
) = entity_classifier.presidio_entity_classifier_and_anonymizer(input_text1, True)
assert entities == {
"github-token": 1,
"aws-access-key": 1,
"us-itin": 1,
"us-ssn": 1,
}
assert total_count == 4
assert total_count == 3
assert anonymized_text == mock_input_text1_anonymize_snippet_true
assert entity_details == {
"us-ssn": [
Expand All @@ -304,13 +251,6 @@ def test_presidio_entity_classifier_and_anonymizer(
"entity_group": "secrets_and_tokens",
}
],
"github-token": [
{
"location": "111_125",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
}
],
}

(
Expand All @@ -320,8 +260,6 @@ def test_presidio_entity_classifier_and_anonymizer(
entity_details,
) = entity_classifier.presidio_entity_classifier_and_anonymizer(input_text2)
assert entities == {
"slack-token": 2,
"github-token": 1,
"aws-access-key": 1,
"aws-secret-key": 1,
"us-itin": 1,
Expand All @@ -330,7 +268,7 @@ def test_presidio_entity_classifier_and_anonymizer(
"us-ssn": 1,
"ip-address": 1,
}
assert total_count == 10
assert total_count == 7
assert anonymized_text == input_text2
assert entity_details == {
"credit-card-number": [
Expand Down Expand Up @@ -375,25 +313,6 @@ def test_presidio_entity_classifier_and_anonymizer(
"entity_group": "secrets_and_tokens",
}
],
"github-token": [
{
"location": "1646_1736",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
}
],
"slack-token": [
{
"location": "1812_1835",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
},
{
"location": "1911_1968",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
},
],
"ip-address": [
{
"location": "1339_1355",
Expand All @@ -412,8 +331,6 @@ def test_presidio_entity_classifier_and_anonymizer(
input_text2, anonymize_snippets=True
)
assert entities == {
"slack-token": 2,
"github-token": 1,
"aws-access-key": 1,
"aws-secret-key": 1,
"us-itin": 1,
Expand All @@ -422,7 +339,7 @@ def test_presidio_entity_classifier_and_anonymizer(
"us-ssn": 1,
"ip-address": 1,
}
assert total_count == 10
assert total_count == 7
assert anonymized_text == mock_input_text2_anonymize_snippet_true
assert entity_details == {
"credit-card-number": [
Expand Down Expand Up @@ -467,25 +384,6 @@ def test_presidio_entity_classifier_and_anonymizer(
"entity_group": "secrets_and_tokens",
}
],
"github-token": [
{
"location": "1593_1607",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
}
],
"slack-token": [
{
"location": "1683_1696",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
},
{
"location": "1772_1785",
"confidence_score": "HIGH",
"entity_group": "secrets_and_tokens",
},
],
"ip-address": [
{
"location": "1339_1355",
Expand Down

0 comments on commit 91da2ad

Please sign in to comment.