From 653112f291ba2030a80defd671e428142b3fb027 Mon Sep 17 00:00:00 2001 From: 0x4f53 <71916237+0x4f53@users.noreply.github.com> Date: Mon, 13 Nov 2023 01:06:39 +0300 Subject: [PATCH] replaced locationtagger with geotext --- CONTRIBUTING.md | 2 ++ LICENSE | 2 +- README.md | 4 +++- file_utils.py | 2 +- image_utils.py | 2 +- octopii.py | 4 ++-- requirements.txt | 5 +++-- text_utils.py | 15 ++++----------- webhook.py | 41 +++++++++++++++++++++++++++++++++++++++++ 9 files changed, 58 insertions(+), 19 deletions(-) create mode 100644 webhook.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6c0fa33..27ebb99 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -36,3 +36,5 @@ Keep the following rules in mind when writing your own definitions: ### 4. Pull request Submit a pull request and we'll pick it up and merge it if the changes look good. + +For any queries, feel free to contact the developers and maintainers of this project \ No newline at end of file diff --git a/LICENSE b/LICENSE index 9afb18c..384ff1f 100644 --- a/LICENSE +++ b/LICENSE @@ -2,7 +2,7 @@ MIT License Copyright (c) 2023 Owais Shaikh Research @ RedHunt Labs Pvt Ltd -Email: owais.shaikh@redhuntlabs.com | 0x4f@tuta.io +Email: owais.shaikh@redhuntlabs.com | me@0x4f.in Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index b286d6f..a7eaed2 100644 --- a/README.md +++ b/README.md @@ -163,4 +163,6 @@ This tool is intended for research and educational purposes only. RedHunt Labs a Copyright © 2023 RedHunt Labs Private Limited. -By Owais Shaikh (owais.shaikh@redhuntlabs.com | 0x4f@tuta.io) +By Owais Shaikh +- Work: owais.shaikh@redhuntlabs.com +- Personal: me@0x4f.in diff --git a/file_utils.py b/file_utils.py index 737eab4..4aea8a3 100644 --- a/file_utils.py +++ b/file_utils.py @@ -3,7 +3,7 @@ Copyright (c) Research @ RedHunt Labs Pvt Ltd Written by Owais Shaikh -Email: owais.shaikh@redhuntlabs.com | 0x4f@tuta.io +Email: owais.shaikh@redhuntlabs.com | me@0x4f.in Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/image_utils.py b/image_utils.py index f7b2520..aa5f314 100644 --- a/image_utils.py +++ b/image_utils.py @@ -3,7 +3,7 @@ Copyright (c) Research @ RedHunt Labs Pvt Ltd Written by Owais Shaikh -Email: owais.shaikh@redhuntlabs.com | 0x4f@tuta.io +Email: owais.shaikh@redhuntlabs.com | me@0x4f.in Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/octopii.py b/octopii.py index 16403d0..af60499 100644 --- a/octopii.py +++ b/octopii.py @@ -3,7 +3,7 @@ Copyright (c) Research @ RedHunt Labs Pvt Ltd Written by Owais Shaikh -Email: owais.shaikh@redhuntlabs.com | 0x4f@tuta.io +Email: owais.shaikh@redhuntlabs.com | me@0x4f.in Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -42,7 +42,7 @@ def print_logo(): ⠀⠀⠀⠀⠀⡳⠀⠧⣤⡳⣝⢤⠼⠀⡯⠀⠀⠈⠀ A PII scanner ⠀⠀⠀⠀⢀⣈⣋⣋⠮⡻⡪⢯⣋⢓⣉⡀ ______________ ⠀⠀⠀⢀⣳⡁⡡⣅⠀⡗⣝⠀⡨⣅⢁⣗⠀⠀ (c) 2023 RedHunt Labs Pvt Ltd -⠀⠀⠀⠀⠈⠀⠸⣊⣀⡝⢸⣀⣸⠊⠀⠉⠀⠀⠀⠀by Owais Shaikh +⠀⠀⠀⠀⠈⠀⠸⣊⣀⡝⢸⣀⣸⠊⠀⠉⠀⠀⠀⠀by Owais Shaikh (owais.shaikh@redhuntlabs.com | me@0x4f.in) ⠀⠀⠀⠀⠀⠀⠀⠈⠈⠀⠀⠈⠈''' print (logo) diff --git a/requirements.txt b/requirements.txt index a1d3ca2..5b08efc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,6 @@ deskew tensorflow textract pdf2image -locationtagger keras matplotlib pytesseract # along with: sudo apt install tesseract-ocr -y @@ -15,4 +14,6 @@ pillow imutils xmltodict nltk -bs4 \ No newline at end of file +bs4 +requests +geotext diff --git a/text_utils.py b/text_utils.py index 501fc1f..2fe0b33 100644 --- a/text_utils.py +++ b/text_utils.py @@ -3,7 +3,7 @@ Copyright (c) Research @ RedHunt Labs Pvt Ltd Written by Owais Shaikh -Email: owais.shaikh@redhuntlabs.com | 0x4f@tuta.io +Email: owais.shaikh@redhuntlabs.com | me@0x4f.in Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -101,17 +101,10 @@ def read_pdf(pdf): # python -m spacy download en_core_web_sm def regional_pii(text): - import locationtagger - try: - place_entity = locationtagger.find_locations(text = text) - except LookupError: - nltk.downloader.download('punkt') - nltk.download('averaged_perceptron_tagger') - nltk.download('maxent_ne_chunker') - nltk.download('words') - place_entity = locationtagger.find_locations(text = text) + from geotext import GeoText + place_entity = GeoText(text) - final_output = place_entity.address_strings + place_entity.regions + place_entity.countries + final_output = list(set(place_entity.cities + place_entity.countries) return final_output def keywords_classify_pii(rules, intelligible_text_list): diff --git a/webhook.py b/webhook.py new file mode 100644 index 0000000..d60f87e --- /dev/null +++ b/webhook.py @@ -0,0 +1,41 @@ +""" +MIT License + +Copyright (c) Research @ RedHunt Labs Pvt Ltd +Written by Owais Shaikh +Email: owais.shaikh@redhuntlabs.com | me@0x4f.in + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +import requests + +def push_data(data: str, url: str): + headers = {'Content-type': 'application/json'} + data = f"{"text":'{data}'}" + + req = requests.post ( + url, # Example: https://hooks.slack.com/services/<>/<>/<> + headers=headers, + data=data, + timeout=7 + ) + + if req is not None and req.status_code == 200: print('Scan results sent to webhook.') + else: print('Couldn\'t send scan results to webhook. Reason: ' + req.text)