forked from nayanmapara/PhishBuster
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathphishbuster.py
56 lines (50 loc) · 2.92 KB
/
phishbuster.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from urllib.parse import urlparse
import re
import tldextract
import requests
def url_syntax(url_changes):
url_search_http = re.search("http", url_changes) # finds if there is http in url using regular expressions
if url_search_http is None:
url_http = "http://" + url_changes # adds http:// if not there in the url
else:
url_http = url_changes # returns the input url
return url_http # Returns the url with 'http://'
def subdomain_re(domain_url):
sub_addr = tldextract.extract(domain_url).subdomain # removing sub domain from the url
if sub_addr is not None:
extract_domain = tldextract.extract(domain_url).domain # returns domain
extract_ext = tldextract.extract(domain_url).suffix # returns domain extention
filtered_sub = extract_domain + '.' + extract_ext # combines domain and the extention
else:
filtered_sub = domain_url # returning input if no sub domain is found
return filtered_sub # Removes subdomain and returns the value
def unshorten_url(url): # unshortens the url
try:
session = requests.Session() # so connections are recycled
unshort_url = session.head(url, allow_redirects=True) # send a head request to the url
return unshort_url.url # return the final url
except:
return url # returns url when the website is offline
def phishbuster_url(url_input): # removes ~@ (which are used for disgusing the url) and path
corrected_url = url_syntax(url_input) # removing path and hinding characters from the url
unshorten_url_input = unshorten_url(corrected_url) # unshortening the url
url_search = re.search("~@", unshorten_url_input) # Finding the hiding characters using regular expressions
if url_search is not None:
domain = urlparse(unshorten_url_input).netloc # removing https:// and path from the url
remove_to_hide_element = re.split("~@", domain) # removing hiding character from the url
domain_url = remove_to_hide_element[1]
else:
domain = urlparse(unshorten_url_input).netloc # removing https:// and path from the url
domain_url = domain
return domain_url # returns a domain name eg. google.com / with sub domain www.google.com
def comparing_url(url_phish,url_org,country="com"):
input_url = phishbuster_url(url_phish) # removing path and hinding characters from the url
final_url = subdomain_re(input_url) # removing sub domain from the url
regional = tldextract.extract(url_org).domain +'.'+ country.lower() # adding country to the url
if final_url == url_org or final_url == regional:
output_comparison = bool(False) # Returns False for non-phishing sites
else:
output_comparison = bool(True) # Returns True for phishing sites
return output_comparison # Returns 'True' / 'False'
if __name__ == "__main__":
print(comparing_url('input.url','orginal_domain.name','country'))