From 1213013ac230e09b728581d7e00174e31f44fbb8 Mon Sep 17 00:00:00 2001 From: OthmanEmpire Date: Tue, 21 Nov 2023 11:35:58 +0300 Subject: [PATCH 1/2] added regex for saudi arabian passports --- definitions.json | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/definitions.json b/definitions.json index 2633140..6728686 100644 --- a/definitions.json +++ b/definitions.json @@ -141,6 +141,16 @@ "<<<<" ] }, + "Saudi Arabian Passport": { + "regex":"(?:P Date: Tue, 21 Nov 2023 12:44:09 +0300 Subject: [PATCH 2/2] Fixed bug where definitions.json would not open properly on Windows --- text_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/text_utils.py b/text_utils.py index 6d27239..9d1fe42 100644 --- a/text_utils.py +++ b/text_utils.py @@ -39,7 +39,7 @@ def string_tokenizer(text): def similarity(a, b): return difflib.SequenceMatcher(None, a, b).ratio() * 100 def get_regexes(): - with open('definitions.json') as json_file: + with open('definitions.json', "r", encoding='utf-8') as json_file: _rules = json.load(json_file) return _rules @@ -133,4 +133,4 @@ def keywords_classify_pii(rules, intelligible_text_list): ) > 80: scores[key] += 1 return scores - \ No newline at end of file +