From ae7d3cc46c43604de09f77ccc0be5b6aa1fc8d62 Mon Sep 17 00:00:00 2001 From: Sander Date: Mon, 6 Jun 2016 09:57:09 +0200 Subject: [PATCH 01/40] ADD: Check if a referral WHOIS server is actually alive ADD: Check to ignore www. WHOIS servers (none start with www.) ADD: Fallback regexes, such as 'Admin .... Name', where the dots can be anything. They apply to a single field instead of a whole block. ADD: Every contacts empty fields are checked against the fall_back regexes, to see if a, for example, registrant name can still be found even if the pattern is not known. REF: 'facsimile' is now extracted as 'fax', since they mean the same thing (Fax is short for facsimile) FIX: Tabs to spaces (PEP-8) --- pythonwhois/net.py | 225 ++-- pythonwhois/parse.py | 2625 +++++++++++++++++++++++------------------- 2 files changed, 1581 insertions(+), 1269 deletions(-) diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 7d71b87..4d7a2d0 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -1,111 +1,132 @@ -import socket, re, sys +import os +import re +import socket +import subprocess +import sys from codecs import encode, decode + from . import shared -def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=False, with_server_list=False, server_list=None): - previous = previous or [] - server_list = server_list or [] - # Sometimes IANA simply won't give us the right root WHOIS server - exceptions = { - ".ac.uk": "whois.ja.net", - ".ps": "whois.pnina.ps", - ".buzz": "whois.nic.buzz", - ".moe": "whois.nic.moe", - ".arpa": "whois.iana.org", - ".bid": "whois.nic.bid", - ".int": "whois.iana.org", - ".kred": "whois.nic.kred", - ".nagoya": "whois.gmoregistry.net", - ".nyc": "whois.nic.nyc", - ".okinawa": "whois.gmoregistry.net", - ".qpon": "whois.nic.qpon", - ".sohu": "whois.gtld.knet.cn", - ".tokyo": "whois.nic.tokyo", - ".trade": "whois.nic.trade", - ".webcam": "whois.nic.webcam", - ".xn--rhqv96g": "whois.nic.xn--rhqv96g", - # The following is a bit hacky, but IANA won't return the right answer for example.com because it's a direct registration. - "example.com": "whois.verisign-grs.com" - } - if rfc3490: - if sys.version_info < (3, 0): - domain = encode( domain if type(domain) is unicode else decode(domain, "utf8"), "idna" ) - else: - domain = encode(domain, "idna").decode("ascii") +def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=False, with_server_list=False, + server_list=None): + previous = previous or [] + server_list = server_list or [] + # Sometimes IANA simply won't give us the right root WHOIS server + exceptions = { + ".ac.uk": "whois.ja.net", + ".ps": "whois.pnina.ps", + ".buzz": "whois.nic.buzz", + ".moe": "whois.nic.moe", + ".arpa": "whois.iana.org", + ".bid": "whois.nic.bid", + ".int": "whois.iana.org", + ".kred": "whois.nic.kred", + ".nagoya": "whois.gmoregistry.net", + ".nyc": "whois.nic.nyc", + ".okinawa": "whois.gmoregistry.net", + ".qpon": "whois.nic.qpon", + ".sohu": "whois.gtld.knet.cn", + ".tokyo": "whois.nic.tokyo", + ".trade": "whois.nic.trade", + ".webcam": "whois.nic.webcam", + ".xn--rhqv96g": "whois.nic.xn--rhqv96g", + # The following is a bit hacky, but IANA won't return the right answer for example.com because it's a direct registration. + "example.com": "whois.verisign-grs.com" + } + + if rfc3490: + if sys.version_info < (3, 0): + domain = encode(domain if type(domain) is unicode else decode(domain, "utf8"), "idna") + else: + domain = encode(domain, "idna").decode("ascii") + + if len(previous) == 0 and server == "": + # Root query + is_exception = False + for exception, exc_serv in exceptions.items(): + if domain.endswith(exception): + is_exception = True + target_server = exc_serv + break + if is_exception == False: + target_server = get_root_server(domain) + else: + target_server = server + if target_server == "whois.jprs.jp": + request_domain = "%s/e" % domain # Suppress Japanese output + elif domain.endswith(".de") and (target_server == "whois.denic.de" or target_server == "de.whois-servers.net"): + request_domain = "-T dn,ace %s" % domain # regional specific stuff + elif target_server == "whois.verisign-grs.com": + request_domain = "=%s" % domain # Avoid partial matches + else: + request_domain = domain + response = whois_request(request_domain, target_server) + if never_cut: + # If the caller has requested to 'never cut' responses, he will get the original response from the server (this is + # useful for callers that are only interested in the raw data). Otherwise, if the target is verisign-grs, we will + # select the data relevant to the requested domain, and discard the rest, so that in a multiple-option response the + # parsing code will only touch the information relevant to the requested domain. The side-effect of this is that + # when `never_cut` is set to False, any verisign-grs responses in the raw data will be missing header, footer, and + # alternative domain options (this is handled a few lines below, after the verisign-grs processing). + new_list = [response] + previous + if target_server == "whois.verisign-grs.com": + # VeriSign is a little... special. As it may return multiple full records and there's no way to do an exact query, + # we need to actually find the correct record in the list. + for record in response.split("\n\n"): + if re.search("Domain Name: %s\n" % domain.upper(), record): + response = record + break + if never_cut == False: + new_list = [response] + previous + server_list.append(target_server) + + # Ignore redirects from registries who publish the registrar data themselves + if target_server not in ('whois.nic.xyz',): + for line in [x.strip() for x in response.splitlines()]: + match = re.match("(refer|whois server|referral url|whois server|registrar whois):\s*([^\s]+\.[^\s]+)", line, + re.IGNORECASE) + if match is not None: + referal_server = match.group(2) + if referal_server != server and "://" not in referal_server \ + and "www." not in referal_server and server_is_alive(referal_server): + # We want to ignore anything non-WHOIS (eg. HTTP) for now, and servers that are not reachable + # Referal to another WHOIS server... + return get_whois_raw(domain, referal_server, new_list, server_list=server_list, + with_server_list=with_server_list) + + if with_server_list: + return (new_list, server_list) + else: + return new_list + + +def server_is_alive(server): + response = subprocess.call(["ping", "-c 1", "-w2", server], stdout=open(os.devnull, "w"), + stderr=subprocess.STDOUT) + if response != 0: + return False + return True - if len(previous) == 0 and server == "": - # Root query - is_exception = False - for exception, exc_serv in exceptions.items(): - if domain.endswith(exception): - is_exception = True - target_server = exc_serv - break - if is_exception == False: - target_server = get_root_server(domain) - else: - target_server = server - if target_server == "whois.jprs.jp": - request_domain = "%s/e" % domain # Suppress Japanese output - elif domain.endswith(".de") and ( target_server == "whois.denic.de" or target_server == "de.whois-servers.net" ): - request_domain = "-T dn,ace %s" % domain # regional specific stuff - elif target_server == "whois.verisign-grs.com": - request_domain = "=%s" % domain # Avoid partial matches - else: - request_domain = domain - response = whois_request(request_domain, target_server) - if never_cut: - # If the caller has requested to 'never cut' responses, he will get the original response from the server (this is - # useful for callers that are only interested in the raw data). Otherwise, if the target is verisign-grs, we will - # select the data relevant to the requested domain, and discard the rest, so that in a multiple-option response the - # parsing code will only touch the information relevant to the requested domain. The side-effect of this is that - # when `never_cut` is set to False, any verisign-grs responses in the raw data will be missing header, footer, and - # alternative domain options (this is handled a few lines below, after the verisign-grs processing). - new_list = [response] + previous - if target_server == "whois.verisign-grs.com": - # VeriSign is a little... special. As it may return multiple full records and there's no way to do an exact query, - # we need to actually find the correct record in the list. - for record in response.split("\n\n"): - if re.search("Domain Name: %s\n" % domain.upper(), record): - response = record - break - if never_cut == False: - new_list = [response] + previous - server_list.append(target_server) - - # Ignore redirects from registries who publish the registrar data themselves - if target_server not in ('whois.nic.xyz',): - for line in [x.strip() for x in response.splitlines()]: - match = re.match("(refer|whois server|referral url|whois server|registrar whois):\s*([^\s]+\.[^\s]+)", line, re.IGNORECASE) - if match is not None: - referal_server = match.group(2) - if referal_server != server and "://" not in referal_server: # We want to ignore anything non-WHOIS (eg. HTTP) for now. - # Referal to another WHOIS server... - return get_whois_raw(domain, referal_server, new_list, server_list=server_list, with_server_list=with_server_list) - - if with_server_list: - return (new_list, server_list) - else: - return new_list def get_root_server(domain): - data = whois_request(domain, "whois.iana.org") - for line in [x.strip() for x in data.splitlines()]: - match = re.match("refer:\s*([^\s]+)", line) - if match is None: - continue - return match.group(1) - raise shared.WhoisException("No root WHOIS server found for domain.") + data = whois_request(domain, "whois.iana.org") + for line in [x.strip() for x in data.splitlines()]: + match = re.match("refer:\s*([^\s]+)", line) + if match is None: + continue + return match.group(1) + raise shared.WhoisException("No root WHOIS server found for domain.") + def whois_request(domain, server, port=43): - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.connect((server, port)) - sock.send(("%s\r\n" % domain).encode("utf-8")) - buff = b"" - while True: - data = sock.recv(1024) - if len(data) == 0: - break - buff += data - return buff.decode("utf-8", "replace") + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect((server, port)) + sock.send(("%s\r\n" % domain).encode("utf-8")) + buff = b"" + while True: + data = sock.recv(1024) + if len(data) == 0: + break + buff += data + return buff.decode("utf-8", "replace") diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index cb3286f..8341257 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -1,30 +1,40 @@ from __future__ import print_function -import re, sys, datetime, csv, pkgutil + +import copy +import csv +import datetime +import pkgutil +import re +import sys + from . import net, shared try: - from io import StringIO + from io import StringIO except ImportError: - from cStringIO import StringIO + from cStringIO import StringIO + def pkgdata(name): - data = pkgutil.get_data("pythonwhois", name) - if sys.version_info < (3, 0): - return data - else: - return data.decode("utf-8") + data = pkgutil.get_data("pythonwhois", name) + if sys.version_info < (3, 0): + return data + else: + return data.decode("utf-8") + def read_dataset(filename, destination, abbrev_key, name_key, is_dict=False): - try: - if is_dict: - reader = csv.DictReader(pkgdata(filename).splitlines()) - else: - reader = csv.reader(pkgdata(filename).splitlines()) + try: + if is_dict: + reader = csv.DictReader(pkgdata(filename).splitlines()) + else: + reader = csv.reader(pkgdata(filename).splitlines()) + + for line in reader: + destination[line[abbrev_key]] = line[name_key] + except IOError as e: + pass - for line in reader: - destination[line[abbrev_key]] = line[name_key] - except IOError as e: - pass common_first_names = set() airports = {} @@ -34,22 +44,22 @@ def read_dataset(filename, destination, abbrev_key, name_key, is_dict=False): states_ca = {} try: - reader = csv.DictReader(pkgdata("common_first_names.dat").splitlines()) + reader = csv.DictReader(pkgdata("common_first_names.dat").splitlines()) - for line in reader: - common_first_names.add(line["name"].lower()) + for line in reader: + common_first_names.add(line["name"].lower()) except IOError as e: - pass + pass try: - reader = csv.reader(pkgdata("airports.dat").splitlines()) + reader = csv.reader(pkgdata("airports.dat").splitlines()) - for line in reader: - airports[line[4]] = line[2] - airports[line[5]] = line[2] + for line in reader: + airports[line[4]] = line[2] + airports[line[5]] = line[2] except IOError as e: - # The distributor likely removed airports.dat for licensing reasons. We'll just leave an empty dict. - pass + # The distributor likely removed airports.dat for licensing reasons. We'll just leave an empty dict. + pass read_dataset("countries.dat", countries, "iso", "name", is_dict=True) read_dataset("countries3.dat", countries, "iso3", "name", is_dict=True) @@ -62,331 +72,505 @@ def read_dataset(filename, destination, abbrev_key, name_key, is_dict=False): country_names = set([name.lower() for name in countries.values()]) + def precompile_regexes(source, flags=0): - return [re.compile(regex, flags) for regex in source] + return [re.compile(regex, flags) for regex in source] + def precompile_regexes_dict(source, flags=0): - return dict((key, re.compile(regex, flags)) for (key, regex) in source.items()) + return dict((key, re.compile(regex, flags)) for (key, regex) in source.items()) + grammar = { - "_data": { - 'id': ['Domain ID:[ ]*(?P.+)'], - 'status': ['\[Status\]\s*(?P.+)', - 'Status\s*:\s?(?P.+)', - '\[State\]\s*(?P.+)', - '^state:\s*(?P.+)'], - 'creation_date': ['\[Created on\]\s*(?P.+)', - 'Created on[.]*: [a-zA-Z]+, (?P.+)', - 'Creation Date:\s?(?P.+)', - 'Creation date\s*:\s?(?P.+)', - 'Registration Date:\s?(?P.+)', - 'Created Date:\s?(?P.+)', - 'Created on:\s?(?P.+)', - 'Created on\s?[.]*:\s?(?P.+)\.', - 'Date Registered\s?[.]*:\s?(?P.+)', - 'Domain Created\s?[.]*:\s?(?P.+)', - 'Domain registered\s?[.]*:\s?(?P.+)', - 'Domain record activated\s?[.]*:\s*?(?P.+)', - 'Record created on\s?[.]*:?\s*?(?P.+)', - 'Record created\s?[.]*:?\s*?(?P.+)', - 'Created\s?[.]*:?\s*?(?P.+)', - 'Registered on\s?[.]*:?\s*?(?P.+)', - 'Registered\s?[.]*:?\s*?(?P.+)', - 'Domain Create Date\s?[.]*:?\s*?(?P.+)', - 'Domain Registration Date\s?[.]*:?\s*?(?P.+)', - 'created:\s*(?P.+)', - '\[Registered Date\]\s*(?P.+)', - 'created-date:\s*(?P.+)', - 'Domain Name Commencement Date: (?P.+)', - 'registered:\s*(?P.+)', - 'registration:\s*(?P.+)'], - 'expiration_date': ['\[Expires on\]\s*(?P.+)', - 'Registrar Registration Expiration Date:[ ]*(?P.+)-[0-9]{4}', - 'Expires on[.]*: [a-zA-Z]+, (?P.+)', - 'Expiration Date:\s?(?P.+)', - 'Expiration date\s*:\s?(?P.+)', - 'Expires on:\s?(?P.+)', - 'Expires on\s?[.]*:\s?(?P.+)\.', - 'Exp(?:iry)? Date\s?[.]*:\s?(?P.+)', - 'Expiry\s*:\s?(?P.+)', - 'Domain Currently Expires\s?[.]*:\s?(?P.+)', - 'Record will expire on\s?[.]*:\s?(?P.+)', - 'Domain expires\s?[.]*:\s*?(?P.+)', - 'Record expires on\s?[.]*:?\s*?(?P.+)', - 'Record expires\s?[.]*:?\s*?(?P.+)', - 'Expires\s?[.]*:?\s*?(?P.+)', - 'Expire Date\s?[.]*:?\s*?(?P.+)', - 'Expired\s?[.]*:?\s*?(?P.+)', - 'Domain Expiration Date\s?[.]*:?\s*?(?P.+)', - 'paid-till:\s*(?P.+)', - 'expiration_date:\s*(?P.+)', - 'expire-date:\s*(?P.+)', - 'renewal:\s*(?P.+)', - 'expire:\s*(?P.+)'], - 'updated_date': ['\[Last Updated\]\s*(?P.+)', - 'Record modified on[.]*: (?P.+) [a-zA-Z]+', - 'Record last updated on[.]*: [a-zA-Z]+, (?P.+)', - 'Updated Date:\s?(?P.+)', - 'Updated date\s*:\s?(?P.+)', - #'Database last updated on\s?[.]*:?\s*?(?P.+)\s[a-z]+\.?', - 'Record last updated on\s?[.]*:?\s?(?P.+)\.', - 'Domain record last updated\s?[.]*:\s*?(?P.+)', - 'Domain Last Updated\s?[.]*:\s*?(?P.+)', - 'Last updated on:\s?(?P.+)', - 'Date Modified\s?[.]*:\s?(?P.+)', - 'Last Modified\s?[.]*:\s?(?P.+)', - 'Domain Last Updated Date\s?[.]*:\s?(?P.+)', - 'Record last updated\s?[.]*:\s?(?P.+)', - 'Modified\s?[.]*:\s?(?P.+)', - '(C|c)hanged:\s*(?P.+)', - 'last_update:\s*(?P.+)', - 'Last Update\s?[.]*:\s?(?P.+)', - 'Last updated on (?P.+) [a-z]{3,4}', - 'Last updated:\s*(?P.+)', - 'last-updated:\s*(?P.+)', - '\[Last Update\]\s*(?P.+) \([A-Z]+\)'], - 'registrar': ['registrar:\s*(?P.+)', - 'Registrar:\s*(?P.+)', - 'Sponsoring Registrar Organization:\s*(?P.+)', - 'Registered through:\s?(?P.+)', - 'Registrar Name[.]*:\s?(?P.+)', - 'Record maintained by:\s?(?P.+)', - 'Registration Service Provided By:\s?(?P.+)', - 'Registrar of Record:\s?(?P.+)', - 'Domain Registrar :\s?(?P.+)', - 'Registration Service Provider: (?P.+)', - '\tName:\t\s(?P.+)'], - 'whois_server': ['Whois Server:\s?(?P.+)', - 'Registrar Whois:\s?(?P.+)'], - 'nameservers': ['Name Server:[ ]*(?P[^ ]+)', - 'Nameservers:[ ]*(?P[^ ]+)', - '(?<=[ .]{2})(?P([a-z0-9-]+\.)+[a-z0-9]+)(\s+([0-9]{1,3}\.){3}[0-9]{1,3})', - 'nameserver:\s*(?P.+)', - 'nserver:\s*(?P[^[\s]+)', - 'Name Server[.]+ (?P[^[\s]+)', - 'Hostname:\s*(?P[^\s]+)', - 'DNS[0-9]+:\s*(?P.+)', - ' DNS:\s*(?P.+)', - 'ns[0-9]+:\s*(?P.+)', - 'NS [0-9]+\s*:\s*(?P.+)', - '\[Name Server\]\s*(?P.+)', - '(?<=[ .]{2})(?P[a-z0-9-]+\.d?ns[0-9]*\.([a-z0-9-]+\.)+[a-z0-9]+)', - '(?<=[ .]{2})(?P([a-z0-9-]+\.)+[a-z0-9]+)(\s+([0-9]{1,3}\.){3}[0-9]{1,3})', - '(?<=[ .]{2})[^a-z0-9.-](?Pd?ns\.([a-z0-9-]+\.)+[a-z0-9]+)', - '^ *(?:Primary|Secondary|Third|Fourth) Server Hostname\.*: +(?P.+)$', - 'Nserver:\s*(?P.+)'], - 'emails': ['(?P[\w.-]+@[\w.-]+\.[\w]{2,6})', # Really need to fix this, much longer TLDs now exist... - '(?P[\w.-]+\sAT\s[\w.-]+\sDOT\s[\w]{2,6})'] - }, - "_dateformats": ( - '(?P[0-9]{1,2})[./ -](?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[./ -](?P[0-9]{4}|[0-9]{2})' - '(\s+(?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))?', - '[a-z]{3}\s(?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[./ -](?P[0-9]{1,2})(\s+(?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))?\s[a-z]{3}\s(?P[0-9]{4}|[0-9]{2})', - '[a-zA-Z]+\s(?P[0-9]{1,2})(?:st|nd|rd|th)\s(?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|December)\s(?P[0-9]{4})', - '(?P[0-9]{4})[./-]?(?P[0-9]{2})[./-]?(?P[0-9]{2})(\s|T|/)((?P[0-9]{1,2})[:.-](?P[0-9]{1,2})[:.-](?P[0-9]{1,2}))', - '(?P[0-9]{4})[./-](?P[0-9]{1,2})[./-](?P[0-9]{1,2})', - '(?P[0-9]{1,2})[./ -](?P[0-9]{1,2})[./ -](?P[0-9]{4}|[0-9]{2})', - '(?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (?P[0-9]{1,2}),? (?P[0-9]{4})', - '(?P[0-9]{1,2})-(?PJanuary|February|March|April|May|June|July|August|September|October|November|December)-(?P[0-9]{4})', - ), - "_months": { - 'jan': 1, - 'january': 1, - 'feb': 2, - 'february': 2, - 'mar': 3, - 'march': 3, - 'apr': 4, - 'april': 4, - 'may': 5, - 'jun': 6, - 'june': 6, - 'jul': 7, - 'july': 7, - 'aug': 8, - 'august': 8, - 'sep': 9, - 'sept': 9, - 'september': 9, - 'oct': 10, - 'october': 10, - 'nov': 11, - 'november': 11, - 'dec': 12, - 'december': 12 - } + "_data": { + 'id': ['Domain ID:[ ]*(?P.+)'], + 'status': ['\[Status\]\s*(?P.+)', + 'Status\s*:\s?(?P.+)', + '\[State\]\s*(?P.+)', + '^state:\s*(?P.+)'], + 'creation_date': ['\[Created on\]\s*(?P.+)', + 'Created on[.]*: [a-zA-Z]+, (?P.+)', + 'Creation Date:\s?(?P.+)', + 'Creation date\s*:\s?(?P.+)', + 'Registration Date:\s?(?P.+)', + 'Created Date:\s?(?P.+)', + 'Created on:\s?(?P.+)', + 'Created on\s?[.]*:\s?(?P.+)\.', + 'Date Registered\s?[.]*:\s?(?P.+)', + 'Domain Created\s?[.]*:\s?(?P.+)', + 'Domain registered\s?[.]*:\s?(?P.+)', + 'Domain record activated\s?[.]*:\s*?(?P.+)', + 'Record created on\s?[.]*:?\s*?(?P.+)', + 'Record created\s?[.]*:?\s*?(?P.+)', + 'Created\s?[.]*:?\s*?(?P.+)', + 'Registered on\s?[.]*:?\s*?(?P.+)', + 'Registered\s?[.]*:?\s*?(?P.+)', + 'Domain Create Date\s?[.]*:?\s*?(?P.+)', + 'Domain Registration Date\s?[.]*:?\s*?(?P.+)', + 'created:\s*(?P.+)', + '\[Registered Date\]\s*(?P.+)', + 'created-date:\s*(?P.+)', + 'Domain Name Commencement Date: (?P.+)', + 'registered:\s*(?P.+)', + 'registration:\s*(?P.+)'], + 'expiration_date': ['\[Expires on\]\s*(?P.+)', + 'Registrar Registration Expiration Date:[ ]*(?P.+)-[0-9]{4}', + 'Expires on[.]*: [a-zA-Z]+, (?P.+)', + 'Expiration Date:\s?(?P.+)', + 'Expiration date\s*:\s?(?P.+)', + 'Expires on:\s?(?P.+)', + 'Expires on\s?[.]*:\s?(?P.+)\.', + 'Exp(?:iry)? Date\s?[.]*:\s?(?P.+)', + 'Expiry\s*:\s?(?P.+)', + 'Domain Currently Expires\s?[.]*:\s?(?P.+)', + 'Record will expire on\s?[.]*:\s?(?P.+)', + 'Domain expires\s?[.]*:\s*?(?P.+)', + 'Record expires on\s?[.]*:?\s*?(?P.+)', + 'Record expires\s?[.]*:?\s*?(?P.+)', + 'Expires\s?[.]*:?\s*?(?P.+)', + 'Expire Date\s?[.]*:?\s*?(?P.+)', + 'Expired\s?[.]*:?\s*?(?P.+)', + 'Domain Expiration Date\s?[.]*:?\s*?(?P.+)', + 'paid-till:\s*(?P.+)', + 'expiration_date:\s*(?P.+)', + 'expire-date:\s*(?P.+)', + 'renewal:\s*(?P.+)', + 'expire:\s*(?P.+)'], + 'updated_date': ['\[Last Updated\]\s*(?P.+)', + 'Record modified on[.]*: (?P.+) [a-zA-Z]+', + 'Record last updated on[.]*: [a-zA-Z]+, (?P.+)', + 'Updated Date:\s?(?P.+)', + 'Updated date\s*:\s?(?P.+)', + # 'Database last updated on\s?[.]*:?\s*?(?P.+)\s[a-z]+\.?', + 'Record last updated on\s?[.]*:?\s?(?P.+)\.', + 'Domain record last updated\s?[.]*:\s*?(?P.+)', + 'Domain Last Updated\s?[.]*:\s*?(?P.+)', + 'Last updated on:\s?(?P.+)', + 'Date Modified\s?[.]*:\s?(?P.+)', + 'Last Modified\s?[.]*:\s?(?P.+)', + 'Domain Last Updated Date\s?[.]*:\s?(?P.+)', + 'Record last updated\s?[.]*:\s?(?P.+)', + 'Modified\s?[.]*:\s?(?P.+)', + '(C|c)hanged:\s*(?P.+)', + 'last_update:\s*(?P.+)', + 'Last Update\s?[.]*:\s?(?P.+)', + 'Last updated on (?P.+) [a-z]{3,4}', + 'Last updated:\s*(?P.+)', + 'last-updated:\s*(?P.+)', + '\[Last Update\]\s*(?P.+) \([A-Z]+\)'], + 'registrar': ['registrar:\s*(?P.+)', + 'Registrar:\s*(?P.+)', + 'Sponsoring Registrar Organization:\s*(?P.+)', + 'Registered through:\s?(?P.+)', + 'Registrar Name[.]*:\s?(?P.+)', + 'Record maintained by:\s?(?P.+)', + 'Registration Service Provided By:\s?(?P.+)', + 'Registrar of Record:\s?(?P.+)', + 'Domain Registrar :\s?(?P.+)', + 'Registration Service Provider: (?P.+)', + '\tName:\t\s(?P.+)'], + 'whois_server': ['Whois Server:\s?(?P.+)', + 'Registrar Whois:\s?(?P.+)'], + 'nameservers': ['Name Server:[ ]*(?P[^ ]+)', + 'Nameservers:[ ]*(?P[^ ]+)', + '(?<=[ .]{2})(?P([a-z0-9-]+\.)+[a-z0-9]+)(\s+([0-9]{1,3}\.){3}[0-9]{1,3})', + 'nameserver:\s*(?P.+)', + 'nserver:\s*(?P[^[\s]+)', + 'Name Server[.]+ (?P[^[\s]+)', + 'Hostname:\s*(?P[^\s]+)', + 'DNS[0-9]+:\s*(?P.+)', + ' DNS:\s*(?P.+)', + 'ns[0-9]+:\s*(?P.+)', + 'NS [0-9]+\s*:\s*(?P.+)', + '\[Name Server\]\s*(?P.+)', + '(?<=[ .]{2})(?P[a-z0-9-]+\.d?ns[0-9]*\.([a-z0-9-]+\.)+[a-z0-9]+)', + '(?<=[ .]{2})(?P([a-z0-9-]+\.)+[a-z0-9]+)(\s+([0-9]{1,3}\.){3}[0-9]{1,3})', + '(?<=[ .]{2})[^a-z0-9.-](?Pd?ns\.([a-z0-9-]+\.)+[a-z0-9]+)', + '^ *(?:Primary|Secondary|Third|Fourth) Server Hostname\.*: +(?P.+)$', + 'Nserver:\s*(?P.+)'], + 'emails': ['(?P[\w.-]+@[\w.-]+\.[\w]{2,6})', # Really need to fix this, much longer TLDs now exist... + '(?P[\w.-]+\sAT\s[\w.-]+\sDOT\s[\w]{2,6})'] + }, + "_dateformats": ( + '(?P[0-9]{1,2})[./ -](?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[./ -](?P[0-9]{4}|[0-9]{2})' + '(\s+(?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))?', + '[a-z]{3}\s(?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[./ -](?P[0-9]{1,2})(\s+(?P[0-9]{1,2})[:.](?P[0-9]{1,2})[:.](?P[0-9]{1,2}))?\s[a-z]{3}\s(?P[0-9]{4}|[0-9]{2})', + '[a-zA-Z]+\s(?P[0-9]{1,2})(?:st|nd|rd|th)\s(?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|December)\s(?P[0-9]{4})', + '(?P[0-9]{4})[./-]?(?P[0-9]{2})[./-]?(?P[0-9]{2})(\s|T|/)((?P[0-9]{1,2})[:.-](?P[0-9]{1,2})[:.-](?P[0-9]{1,2}))', + '(?P[0-9]{4})[./-](?P[0-9]{1,2})[./-](?P[0-9]{1,2})', + '(?P[0-9]{1,2})[./ -](?P[0-9]{1,2})[./ -](?P[0-9]{4}|[0-9]{2})', + '(?PJan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (?P[0-9]{1,2}),? (?P[0-9]{4})', + '(?P[0-9]{1,2})-(?PJanuary|February|March|April|May|June|July|August|September|October|November|December)-(?P[0-9]{4})', + ), + "_months": { + 'jan': 1, + 'january': 1, + 'feb': 2, + 'february': 2, + 'mar': 3, + 'march': 3, + 'apr': 4, + 'april': 4, + 'may': 5, + 'jun': 6, + 'june': 6, + 'jul': 7, + 'july': 7, + 'aug': 8, + 'august': 8, + 'sep': 9, + 'sept': 9, + 'september': 9, + 'oct': 10, + 'october': 10, + 'nov': 11, + 'november': 11, + 'dec': 12, + 'december': 12 + } } + # Regex modification utilities def preprocess_regex(regex): - # Fix for #2; prevents a ridiculous amount of varying size permutations. - regex = re.sub(r"\\s\*\(\?P<([^>]+)>\.\+\)", r"\s*(?P<\1>\S.*)", regex) - # Experimental fix for #18; removes unnecessary variable-size whitespace - # matching, since we're stripping results anyway. - regex = re.sub(r"\[ \]\*\(\?P<([^>]+)>\.\*\)", r"(?P<\1>.*)", regex) - return regex + # Fix for #2; prevents a ridiculous amount of varying size permutations. + regex = re.sub(r"\\s\*\(\?P<([^>]+)>\.\+\)", r"\s*(?P<\1>\S.*)", regex) + # Experimental fix for #18; removes unnecessary variable-size whitespace + # matching, since we're stripping results anyway. + regex = re.sub(r"\[ \]\*\(\?P<([^>]+)>\.\*\)", r"(?P<\1>.*)", regex) + return regex + def dotify(string): - return "".join([char + r"\.?" for char in string]) + return "".join([char + r"\.?" for char in string]) + def commaify_dict(source): - return dict((key + ",", regex.replace("$", ",$")) for (key, regex) in source.items()) + return dict((key + ",", regex.replace("$", ",$")) for (key, regex) in source.items()) + def allow_trailing_comma_dict(regexes): - combined_dict = dict() - combined_dict.update(regexes) - combined_dict.update(commaify_dict(regexes)) - return combined_dict + combined_dict = dict() + combined_dict.update(regexes) + combined_dict.update(commaify_dict(regexes)) + return combined_dict + registrant_regexes = [ - " Registrant:[ ]*\n (?P.*)\n (?P.*)\n (?P.*)\n (?P.*), (?P.*) (?P.*)\n (?P.*)\n(?: Phone: (?P.*)\n)? Email: (?P.*)\n", # Corporate Domains, Inc. - "Registrant:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", # OVH - "(?:Registrant ID:(?P.+)\n)?Registrant Name:(?P.*)\n(?:Registrant Organization:(?P.*)\n)?Registrant Street1?:(?P.*)\n(?:Registrant Street2:(?P.*)\n)?(?:Registrant Street3:(?P.*)\n)?Registrant City:(?P.*)\nRegistrant State/Province:(?P.*)\nRegistrant Postal Code:(?P.*)\nRegistrant Country:(?P.*)\nRegistrant Phone:(?P.*)\n(?:Registrant Phone Ext.:(?P.*)\n)?(?:Registrant FAX:(?P.*)\n)?(?:Registrant FAX Ext.:(?P.*)\n)?Registrant Email:(?P.*)", # Public Interest Registry (.org), nic.pw, No-IP.com - "Registrant ID:(?P.+)\nRegistrant Name:(?P.*)\n(?:Registrant Organization:(?P.*)\n)?Registrant Address1?:(?P.*)\n(?:Registrant Address2:(?P.*)\n)?(?:Registrant Address3:(?P.*)\n)?Registrant City:(?P.*)\nRegistrant State/Province:(?P.*)\nRegistrant Country/Economy:(?P.*)\nRegistrant Postal Code:(?P.*)\nRegistrant Phone:(?P.*)\n(?:Registrant Phone Ext.:(?P.*)\n)?(?:Registrant FAX:(?P.*)\n)?(?:Registrant FAX Ext.:(?P.*)\n)?Registrant E-mail:(?P.*)", # .ME, DotAsia - "Registrant ID:\s*(?P.+)\nRegistrant Name:\s*(?P.+)\nRegistrant Organization:\s*(?P.*)\nRegistrant Address1:\s*(?P.+)\nRegistrant Address2:\s*(?P.*)\nRegistrant City:\s*(?P.+)\nRegistrant State/Province:\s*(?P.+)\nRegistrant Postal Code:\s*(?P.+)\nRegistrant Country:\s*(?P.+)\nRegistrant Country Code:\s*(?P.+)\nRegistrant Phone Number:\s*(?P.+)\nRegistrant Email:\s*(?P.+)\n", # .CO Internet - "Registrant Contact: (?P.+)\nRegistrant Organization: (?P.+)\nRegistrant Name: (?P.+)\nRegistrant Street: (?P.+)\nRegistrant City: (?P.+)\nRegistrant Postal Code: (?P.+)\nRegistrant State: (?P.+)\nRegistrant Country: (?P.+)\nRegistrant Phone: (?P.*)\nRegistrant Phone Ext: (?P.*)\nRegistrant Fax: (?P.*)\nRegistrant Fax Ext: (?P.*)\nRegistrant Email: (?P.*)\n", # Key-Systems GmbH - "(?:Registrant ID:[ ]*(?P.*)\n)?Registrant Name:[ ]*(?P.*)\n(?:Registrant Organization:[ ]*(?P.*)\n)?Registrant Street:[ ]*(?P.+)\n(?:Registrant Street:[ ]*(?P.+)\n)?(?:Registrant Street:[ ]*(?P.+)\n)?Registrant City:[ ]*(?P.+)\nRegistrant State(?:\/Province)?:[ ]*(?P.*)\nRegistrant Postal Code:[ ]*(?P.+)\nRegistrant Country:[ ]*(?P.+)\n(?:Registrant Phone:[ ]*(?P.*)\n)?(?:Registrant Phone Ext:[ ]*(?P.*)\n)?(?:Registrant Fax:[ ]*(?P.*)\n)?(?:Registrant Fax Ext:[ ]*(?P.*)\n)?(?:Registrant Email:[ ]*(?P.+)\n)?", # WildWestDomains, GoDaddy, Namecheap/eNom, Ascio, Musedoma (.museum), EuroDNS, nic.ps - "Registrant\n(?: (?P.+)\n)? (?P.+)\n Email:(?P.+)\n (?P.+)\n(?: (?P.+)\n)? (?P.+) (?P.+)\n (?P.+)\n Tel: (?P.+)\n\n", # internet.bs - " Registrant Contact Details:[ ]*\n (?P.*)\n (?P.*)[ ]{2,}\((?P.*)\)\n (?P.*)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.*)\n (?P.*),(?P.*)\n (?P.*)\n Tel. (?P.*)", # Whois.com - "owner-id:[ ]*(?P.*)\n(?:owner-organization:[ ]*(?P.*)\n)?owner-name:[ ]*(?P.*)\nowner-street:[ ]*(?P.*)\nowner-city:[ ]*(?P.*)\nowner-zip:[ ]*(?P.*)\nowner-country:[ ]*(?P.*)\n(?:owner-phone:[ ]*(?P.*)\n)?(?:owner-fax:[ ]*(?P.*)\n)?owner-email:[ ]*(?P.*)", # InterNetworX - "Registrant:\n registrant_org: (?P.*)\n registrant_name: (?P.*)\n registrant_email: (?P.*)\n registrant_address: (?P
.*)\n registrant_city: (?P.*)\n registrant_state: (?P.*)\n registrant_zip: (?P.*)\n registrant_country: (?P.*)\n registrant_phone: (?P.*)", # Bellnames - "Holder of domain name:\n(?P[\S\s]+)\n(?P.+)\n(?P[A-Z0-9-]+)\s+(?P.+)\n(?P.+)\nContractual Language", # nic.ch - "\n\n(?:Owner)?\s+: (?P.*)\n(?:\s+: (?P.*)\n)?\s+: (?P.*)\n\s+: (?P.*)\n\s+: (?P.*)\n\s+: (?P.*)\n", # nic.io - "Contact Information:\n\[Name\]\s*(?P.*)\n\[Email\]\s*(?P.*)\n\[Web Page\]\s*(?P.*)\n\[Postal code\]\s*(?P.*)\n\[Postal Address\]\s*(?P.*)\n(?:\s+(?P.*)\n)?(?:\s+(?P.*)\n)?\[Phone\]\s*(?P.*)\n\[Fax\]\s*(?P.*)\n", # jprs.jp - "g\. \[Organization\] (?P.+)\n", # .co.jp registrations at jprs.jp - "Registrant ID:(?P.*)\nRegistrant Name:(?P.*)\n(?:Registrant Organization:(?P.*)\n)?Registrant Address1:(?P.*)\n(?:Registrant Address2:(?P.*)\n)?(?:Registrant Address3:(?P.*)\n)?Registrant City:(?P.*)\n(?:Registrant State/Province:(?P.*)\n)?(?:Registrant Postal Code:(?P.*)\n)?Registrant Country:(?P.*)\nRegistrant Country Code:.*\nRegistrant Phone Number:(?P.*)\n(?:Registrant Facsimile Number:(?P.*)\n)?Registrant Email:(?P.*)", # .US, .biz (NeuStar), .buzz, .moe (Interlink Co. Ltd.) - "Registrant\n Name: (?P.+)\n(?: Organization: (?P.+)\n)? ContactID: (?P.+)\n(?: Address: (?P.+)\n(?: (?P.+)\n(?: (?P.+)\n)?)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n)?(?: Created: (?P.+)\n)?(?: Last Update: (?P.+)\n)?", # nic.it - " Organisation Name[.]* (?P.*)\n Organisation Address[.]* (?P.*)\n Organisation Address[.]* (?P.*)\n(?: Organisation Address[.]* (?P.*)\n)? Organisation Address[.]* (?P.*)\n Organisation Address[.]* (?P.*)\n Organisation Address[.]* (?P.*)\n Organisation Address[.]* (?P.*)", # Melbourne IT (what a horrid format...) - "Registrant:[ ]*(?P.+)\n[\s\S]*Eligibility Name:[ ]*(?P.+)\n[\s\S]*Registrant Contact ID:[ ]*(?P.+)\n", # .au business - "Eligibility Type:[ ]*Citizen\/Resident\n[\s\S]*Registrant Contact ID:[ ]*(?P.+)\n[\s\S]*Registrant Contact Name:[ ]*(?P.+)\n", # .au individual - "Registrant:[ ]*(?P.+)\n[\s\S]*Eligibility Type:[ ]*(Higher Education Institution|Company|Incorporated Association|Other)\n[\s\S]*Registrant Contact ID:[ ]*(?P.+)\n[\s\S]*Registrant Contact Name:[ ]*(?P.+)\n", # .au educational, company, 'incorporated association' (non-profit?), other (spotted for linux.conf.au, unsure if also for others) - " Registrant:\n (?P.+)\n\n(?: Trading as:\s?\n (?P.+)\n\n)?[\s\S]* Registrant type:\n .*\n\n Registrant's address:\n The registrant .* opted to have", # Nominet (.uk) with hidden address - " Registrant:\n (?P.+)\n\n(?: Trading as:\s?\n (?P.+)\n\n)?[\s\S]* Registrant type:\n .*\n\n Registrant's address:\n (?P.+)\n(?: (?P.+)\n(?: (?P.+)\n)??)?? (?P[^0-9\n]+)\n(?: (?P.+)\n)? (?P.+)\n (?P.+)\n\n", # Nominet (.uk) with visible address - "Domain Owner:\n\t(?P.+)\n\n[\s\S]*?(?:Registrant Contact:\n\t(?P.+))?\n\nRegistrant(?:'s)? (?:a|A)ddress:(?:\n\t(?P.+)\n(?:\t(?P.+)\n)?(?:\t(?P.+)\n)?\t(?P.+)\n\t(?P.+))?\n\t(?P.+)(?:\n\t(?P.+) \(Phone\)\n\t(?P.+) \(FAX\)\n\t(?P.+))?\n\n", # .ac.uk - what a mess... - "Registrant ID: (?P.+)\nRegistrant: (?P.+)\nRegistrant Contact Email: (?P.+)", # .cn (CNNIC) - "Registrant contact:\n (?P.+)\n (?P.*)\n (?P.+), (?P.+) (?P.+) (?P.+)\n\n", # Fabulous.com - "registrant-name:\s*(?P.+)\n(?:registrant-organization:\s*(?P.*)\n)?registrant-type:\s*(?P.+)\nregistrant-address:\s*(?P.+)\nregistrant-postcode:\s*(?P.+)\nregistrant-city:\s*(?P.+)\nregistrant-country:\s*(?P.+)\n(?:registrant-phone:\s*(?P.+)\n)?(?:registrant-email:\s*(?P.+)\n)?", # Hetzner - "Registrant Contact Information :[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", # GAL Communication - "Contact Information : For Customer # [0-9]+[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", # GAL Communication alternative (private WHOIS) format? - "Registrant:\n Name: (?P.+)\n City: (?P.+)\n State: (?P.+)\n Country: (?P.+)\n", # Akky (.com.mx) - " Registrant:\n (?P.+)\n (?P.+)\n (?P.+) (?P\S+),[ ]+(?P.+)\n (?P.+)", # .am - "Domain Holder: (?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?, (?P[^.,]+), (?P.+), (?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 1 - "Domain Holder: (?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?, (?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 2 - "Domain Holder: (?P.+)\n(?P.+)\n(?:(?P.+)\n)?(?:(?P.+)\n)?.+?, (?P.+)\n(?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 3 - "Domain Holder: (?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?\n(?P.+),? (?P[A-Z]{2,3})(?: [A-Z0-9]+)?\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 4 - " Registrant:\n (?P.+)\n (?P.+) (?P.+)\n (?P.*)\n (?P.*)\n (?P.*)\n (?P.+), (?P[^,\n]*)\n (?P.+)\n", # .com.tw (Western registrars) - "Registrant:\n(?P.+)\n(?P.+)\n(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?))?)?)?)?)?)?,(?P.+),(?P.+)\n\n Contact:\n (?P.+) (?P.+)\n TEL: (?P.+?)(?:(?:#|ext.?)(?P.+))?\n FAX: (?P.+)(?:(?:#|ext.?)(?P.+))?\n", # .com.tw (TWNIC/SEEDNET, Taiwanese companies only?) - "Registrant Contact Information:\n\nCompany English Name \(It should be the same as the registered/corporation name on your Business Register Certificate or relevant documents\):(?P.+)\nCompany Chinese name:(?P.+)\nAddress: (?P.+)\nCountry: (?P.+)\nEmail: (?P.+)\n", # HKDNR (.hk) - "Registrant ID:(?P.+)\nRegistrant Name:(?P.*)\n(?:Registrant Organization:(?P.*)\n)?Registrant Street1:(?P.+?)\n(?:Registrant Street2:(?P.+?)\n(?:Registrant Street3:(?P.+?)\n)?)?Registrant City:(?P.+)\nRegistrant State:(?P.*)\nRegistrant Postal Code:(?P.+)\nRegistrant Country:(?P[A-Z]+)\nRegistrant Phone:(?P.*?)\nRegistrant Fax:(?P.*)\nRegistrant Email:(?P.+)\n", # Realtime Register - "Organization Using Domain Name\n Organization Name\.+:(?P.*)\n Street Address\.+:(?P.*)\n City\.+:(?P.*)\n State\.+:(?P.*)\n Postal Code\.+:(?P.*)\n Country\.+:(?P.*)", # .ai - "owner:\s+(?P.+)", # .br - "person:\s+(?P.+)", # nic.ru (person) - "org:\s+(?P.+)", # nic.ru (organization) + " Registrant:[ ]*\n (?P.*)\n (?P.*)\n (?P.*)\n (?P.*), (?P.*) (?P.*)\n (?P.*)\n(?: Phone: (?P.*)\n)? Email: (?P.*)\n", + # Corporate Domains, Inc. + "Registrant:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", + # OVH + "(?:Registrant ID:(?P.+)\n)?Registrant Name:(?P.*)\n(?:Registrant Organization:(?P.*)\n)?Registrant Street1?:(?P.*)\n(?:Registrant Street2:(?P.*)\n)?(?:Registrant Street3:(?P.*)\n)?Registrant City:(?P.*)\nRegistrant State/Province:(?P.*)\nRegistrant Postal Code:(?P.*)\nRegistrant Country:(?P.*)\nRegistrant Phone:(?P.*)\n(?:Registrant Phone Ext.:(?P.*)\n)?(?:Registrant FAX:(?P.*)\n)?(?:Registrant FAX Ext.:(?P.*)\n)?Registrant Email:(?P.*)", + # Public Interest Registry (.org), nic.pw, No-IP.com + "Registrant ID:(?P.+)\nRegistrant Name:(?P.*)\n(?:Registrant Organization:(?P.*)\n)?Registrant Address1?:(?P.*)\n(?:Registrant Address2:(?P.*)\n)?(?:Registrant Address3:(?P.*)\n)?Registrant City:(?P.*)\nRegistrant State/Province:(?P.*)\nRegistrant Country/Economy:(?P.*)\nRegistrant Postal Code:(?P.*)\nRegistrant Phone:(?P.*)\n(?:Registrant Phone Ext.:(?P.*)\n)?(?:Registrant FAX:(?P.*)\n)?(?:Registrant FAX Ext.:(?P.*)\n)?Registrant E-mail:(?P.*)", + # .ME, DotAsia + "Registrant ID:\s*(?P.+)\nRegistrant Name:\s*(?P.+)\nRegistrant Organization:\s*(?P.*)\nRegistrant Address1:\s*(?P.+)\nRegistrant Address2:\s*(?P.*)\nRegistrant City:\s*(?P.+)\nRegistrant State/Province:\s*(?P.+)\nRegistrant Postal Code:\s*(?P.+)\nRegistrant Country:\s*(?P.+)\nRegistrant Country Code:\s*(?P.+)\nRegistrant Phone Number:\s*(?P.+)\nRegistrant Email:\s*(?P.+)\n", + # .CO Internet + "Registrant Contact: (?P.+)\nRegistrant Organization: (?P.+)\nRegistrant Name: (?P.+)\nRegistrant Street: (?P.+)\nRegistrant City: (?P.+)\nRegistrant Postal Code: (?P.+)\nRegistrant State: (?P.+)\nRegistrant Country: (?P.+)\nRegistrant Phone: (?P.*)\nRegistrant Phone Ext: (?P.*)\nRegistrant Fax: (?P.*)\nRegistrant Fax Ext: (?P.*)\nRegistrant Email: (?P.*)\n", + # Key-Systems GmbH + "(?:Registrant ID:[ ]*(?P.*)\n)?Registrant Name:[ ]*(?P.*)\n(?:Registrant Organization:[ ]*(?P.*)\n)?Registrant Street:[ ]*(?P.+)\n(?:Registrant Street:[ ]*(?P.+)\n)?(?:Registrant Street:[ ]*(?P.+)\n)?Registrant City:[ ]*(?P.+)\nRegistrant State(?:\/Province)?:[ ]*(?P.*)\nRegistrant Postal Code:[ ]*(?P.+)\nRegistrant Country:[ ]*(?P.+)\n(?:Registrant Phone:[ ]*(?P.*)\n)?(?:Registrant Phone Ext:[ ]*(?P.*)\n)?(?:Registrant Fax:[ ]*(?P.*)\n)?(?:Registrant Fax Ext:[ ]*(?P.*)\n)?(?:Registrant Email:[ ]*(?P.+)\n)?", + # WildWestDomains, GoDaddy, Namecheap/eNom, Ascio, Musedoma (.museum), EuroDNS, nic.ps + "Registrant\n(?: (?P.+)\n)? (?P.+)\n Email:(?P.+)\n (?P.+)\n(?: (?P.+)\n)? (?P.+) (?P.+)\n (?P.+)\n Tel: (?P.+)\n\n", + # internet.bs + " Registrant Contact Details:[ ]*\n (?P.*)\n (?P.*)[ ]{2,}\((?P.*)\)\n (?P.*)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.*)\n (?P.*),(?P.*)\n (?P.*)\n Tel. (?P.*)", + # Whois.com + "owner-id:[ ]*(?P.*)\n(?:owner-organization:[ ]*(?P.*)\n)?owner-name:[ ]*(?P.*)\nowner-street:[ ]*(?P.*)\nowner-city:[ ]*(?P.*)\nowner-zip:[ ]*(?P.*)\nowner-country:[ ]*(?P.*)\n(?:owner-phone:[ ]*(?P.*)\n)?(?:owner-fax:[ ]*(?P.*)\n)?owner-email:[ ]*(?P.*)", + # InterNetworX + "Registrant:\n registrant_org: (?P.*)\n registrant_name: (?P.*)\n registrant_email: (?P.*)\n registrant_address: (?P
.*)\n registrant_city: (?P.*)\n registrant_state: (?P.*)\n registrant_zip: (?P.*)\n registrant_country: (?P.*)\n registrant_phone: (?P.*)", + # Bellnames + "Holder of domain name:\n(?P[\S\s]+)\n(?P.+)\n(?P[A-Z0-9-]+)\s+(?P.+)\n(?P.+)\nContractual Language", + # nic.ch + "\n\n(?:Owner)?\s+: (?P.*)\n(?:\s+: (?P.*)\n)?\s+: (?P.*)\n\s+: (?P.*)\n\s+: (?P.*)\n\s+: (?P.*)\n", + # nic.io + "Contact Information:\n\[Name\]\s*(?P.*)\n\[Email\]\s*(?P.*)\n\[Web Page\]\s*(?P.*)\n\[Postal code\]\s*(?P.*)\n\[Postal Address\]\s*(?P.*)\n(?:\s+(?P.*)\n)?(?:\s+(?P.*)\n)?\[Phone\]\s*(?P.*)\n\[Fax\]\s*(?P.*)\n", + # jprs.jp + "g\. \[Organization\] (?P.+)\n", # .co.jp registrations at jprs.jp + "Registrant ID:(?P.*)\nRegistrant Name:(?P.*)\n(?:Registrant Organization:(?P.*)\n)?Registrant Address1:(?P.*)\n(?:Registrant Address2:(?P.*)\n)?(?:Registrant Address3:(?P.*)\n)?Registrant City:(?P.*)\n(?:Registrant State/Province:(?P.*)\n)?(?:Registrant Postal Code:(?P.*)\n)?Registrant Country:(?P.*)\nRegistrant Country Code:.*\nRegistrant Phone Number:(?P.*)\n(?:Registrant Facsimile Number:(?P.*)\n)?Registrant Email:(?P.*)", + # .US, .biz (NeuStar), .buzz, .moe (Interlink Co. Ltd.) + "Registrant\n Name: (?P.+)\n(?: Organization: (?P.+)\n)? ContactID: (?P.+)\n(?: Address: (?P.+)\n(?: (?P.+)\n(?: (?P.+)\n)?)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n)?(?: Created: (?P.+)\n)?(?: Last Update: (?P.+)\n)?", + # nic.it + " Organisation Name[.]* (?P.*)\n Organisation Address[.]* (?P.*)\n Organisation Address[.]* (?P.*)\n(?: Organisation Address[.]* (?P.*)\n)? Organisation Address[.]* (?P.*)\n Organisation Address[.]* (?P.*)\n Organisation Address[.]* (?P.*)\n Organisation Address[.]* (?P.*)", + # Melbourne IT (what a horrid format...) + "Registrant:[ ]*(?P.+)\n[\s\S]*Eligibility Name:[ ]*(?P.+)\n[\s\S]*Registrant Contact ID:[ ]*(?P.+)\n", + # .au business + "Eligibility Type:[ ]*Citizen\/Resident\n[\s\S]*Registrant Contact ID:[ ]*(?P.+)\n[\s\S]*Registrant Contact Name:[ ]*(?P.+)\n", + # .au individual + "Registrant:[ ]*(?P.+)\n[\s\S]*Eligibility Type:[ ]*(Higher Education Institution|Company|Incorporated Association|Other)\n[\s\S]*Registrant Contact ID:[ ]*(?P.+)\n[\s\S]*Registrant Contact Name:[ ]*(?P.+)\n", + # .au educational, company, 'incorporated association' (non-profit?), other (spotted for linux.conf.au, unsure if also for others) + " Registrant:\n (?P.+)\n\n(?: Trading as:\s?\n (?P.+)\n\n)?[\s\S]* Registrant type:\n .*\n\n Registrant's address:\n The registrant .* opted to have", + # Nominet (.uk) with hidden address + " Registrant:\n (?P.+)\n\n(?: Trading as:\s?\n (?P.+)\n\n)?[\s\S]* Registrant type:\n .*\n\n Registrant's address:\n (?P.+)\n(?: (?P.+)\n(?: (?P.+)\n)??)?? (?P[^0-9\n]+)\n(?: (?P.+)\n)? (?P.+)\n (?P.+)\n\n", + # Nominet (.uk) with visible address + "Domain Owner:\n\t(?P.+)\n\n[\s\S]*?(?:Registrant Contact:\n\t(?P.+))?\n\nRegistrant(?:'s)? (?:a|A)ddress:(?:\n\t(?P.+)\n(?:\t(?P.+)\n)?(?:\t(?P.+)\n)?\t(?P.+)\n\t(?P.+))?\n\t(?P.+)(?:\n\t(?P.+) \(Phone\)\n\t(?P.+) \(FAX\)\n\t(?P.+))?\n\n", + # .ac.uk - what a mess... + "Registrant ID: (?P.+)\nRegistrant: (?P.+)\nRegistrant Contact Email: (?P.+)", # .cn (CNNIC) + "Registrant contact:\n (?P.+)\n (?P.*)\n (?P.+), (?P.+) (?P.+) (?P.+)\n\n", + # Fabulous.com + "registrant-name:\s*(?P.+)\n(?:registrant-organization:\s*(?P.*)\n)?registrant-type:\s*(?P.+)\nregistrant-address:\s*(?P.+)\nregistrant-postcode:\s*(?P.+)\nregistrant-city:\s*(?P.+)\nregistrant-country:\s*(?P.+)\n(?:registrant-phone:\s*(?P.+)\n)?(?:registrant-email:\s*(?P.+)\n)?", + # Hetzner + "Registrant Contact Information :[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", + # GAL Communication + "Contact Information : For Customer # [0-9]+[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", + # GAL Communication alternative (private WHOIS) format? + "Registrant:\n Name: (?P.+)\n City: (?P.+)\n State: (?P.+)\n Country: (?P.+)\n", + # Akky (.com.mx) + " Registrant:\n (?P.+)\n (?P.+)\n (?P.+) (?P\S+),[ ]+(?P.+)\n (?P.+)", + # .am + "Domain Holder: (?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?, (?P[^.,]+), (?P.+), (?P.+)\n(?P.+)\n(?P[A-Z]+)\n", + # .co.th, format 1 + "Domain Holder: (?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?, (?P.+)\n(?P.+)\n(?P[A-Z]+)\n", + # .co.th, format 2 + "Domain Holder: (?P.+)\n(?P.+)\n(?:(?P.+)\n)?(?:(?P.+)\n)?.+?, (?P.+)\n(?P.+)\n(?P.+)\n(?P[A-Z]+)\n", + # .co.th, format 3 + "Domain Holder: (?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?\n(?P.+),? (?P[A-Z]{2,3})(?: [A-Z0-9]+)?\n(?P.+)\n(?P[A-Z]+)\n", + # .co.th, format 4 + " Registrant:\n (?P.+)\n (?P.+) (?P.+)\n (?P.*)\n (?P.*)\n (?P.*)\n (?P.+), (?P[^,\n]*)\n (?P.+)\n", + # .com.tw (Western registrars) + "Registrant:\n(?P.+)\n(?P.+)\n(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?)(?:,+(?P.+?))?)?)?)?)?)?,(?P.+),(?P.+)\n\n Contact:\n (?P.+) (?P.+)\n TEL: (?P.+?)(?:(?:#|ext.?)(?P.+))?\n FAX: (?P.+)(?:(?:#|ext.?)(?P.+))?\n", + # .com.tw (TWNIC/SEEDNET, Taiwanese companies only?) + "Registrant Contact Information:\n\nCompany English Name \(It should be the same as the registered/corporation name on your Business Register Certificate or relevant documents\):(?P.+)\nCompany Chinese name:(?P.+)\nAddress: (?P.+)\nCountry: (?P.+)\nEmail: (?P.+)\n", + # HKDNR (.hk) + "Registrant ID:(?P.+)\nRegistrant Name:(?P.*)\n(?:Registrant Organization:(?P.*)\n)?Registrant Street1:(?P.+?)\n(?:Registrant Street2:(?P.+?)\n(?:Registrant Street3:(?P.+?)\n)?)?Registrant City:(?P.+)\nRegistrant State:(?P.*)\nRegistrant Postal Code:(?P.+)\nRegistrant Country:(?P[A-Z]+)\nRegistrant Phone:(?P.*?)\nRegistrant Fax:(?P.*)\nRegistrant Email:(?P.+)\n", + # Realtime Register + "Organization Using Domain Name\n Organization Name\.+:(?P.*)\n Street Address\.+:(?P.*)\n City\.+:(?P.*)\n State\.+:(?P.*)\n Postal Code\.+:(?P.*)\n Country\.+:(?P.*)", + # .ai + "owner:\s+(?P.+)", # .br + "person:\s+(?P.+)", # nic.ru (person) + "org:\s+(?P.+)", # nic.ru (organization) ] tech_contact_regexes = [ - " Technical Contact:[ ]*\n (?P.*)\n (?P.*)\n (?P.*)\n (?P.*), (?P.*) (?P.*)\n (?P.*)\n(?: Phone: (?P.*)\n)? Email: (?P.*)\n", # Corporate Domains, Inc. - "Technical Contact:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", # OVH - "(?:Tech ID:(?P.+)\n)?Tech Name:(?P.*)\n(:?Tech Organization:(?P.*)\n)?Tech Street1?:(?P.*)\n(?:Tech Street2:(?P.*)\n)?(?:Tech Street3:(?P.*)\n)?Tech City:(?P.*)\nTech State/Province:(?P.*)\nTech Postal Code:(?P.*)\nTech Country:(?P.*)\nTech Phone:(?P.*)\n(?:Tech Phone Ext.:(?P.*)\n)?(?:Tech FAX:(?P.*)\n)?(?:Tech FAX Ext.:(?P.*)\n)?Tech Email:(?P.*)", # Public Interest Registry (.org), nic.pw, No-IP.com - "Tech(?:nical)? ID:(?P.+)\nTech(?:nical)? Name:(?P.*)\n(?:Tech(?:nical)? Organization:(?P.*)\n)?Tech(?:nical)? Address1?:(?P.*)\n(?:Tech(?:nical)? Address2:(?P.*)\n)?(?:Tech(?:nical)? Address3:(?P.*)\n)?Tech(?:nical)? City:(?P.*)\nTech(?:nical)? State/Province:(?P.*)\nTech(?:nical)? Country/Economy:(?P.*)\nTech(?:nical)? Postal Code:(?P.*)\nTech(?:nical)? Phone:(?P.*)\n(?:Tech(?:nical)? Phone Ext.:(?P.*)\n)?(?:Tech(?:nical)? FAX:(?P.*)\n)?(?:Tech(?:nical)? FAX Ext.:(?P.*)\n)?Tech(?:nical)? E-mail:(?P.*)", # .ME, DotAsia - "Technical Contact ID:\s*(?P.+)\nTechnical Contact Name:\s*(?P.+)\nTechnical Contact Organization:\s*(?P.*)\nTechnical Contact Address1:\s*(?P.+)\nTechnical Contact Address2:\s*(?P.*)\nTechnical Contact City:\s*(?P.+)\nTechnical Contact State/Province:\s*(?P.+)\nTechnical Contact Postal Code:\s*(?P.+)\nTechnical Contact Country:\s*(?P.+)\nTechnical Contact Country Code:\s*(?P.+)\nTechnical Contact Phone Number:\s*(?P.+)\nTechnical Contact Email:\s*(?P.+)\n", # .CO Internet - "Tech Contact: (?P.+)\nTech Organization: (?P.+)\nTech Name: (?P.+)\nTech Street: (?P.+)\nTech City: (?P.+)\nTech Postal Code: (?P.+)\nTech State: (?P.+)\nTech Country: (?P.+)\nTech Phone: (?P.*)\nTech Phone Ext: (?P.*)\nTech Fax: (?P.*)\nTech Fax Ext: (?P.*)\nTech Email: (?P.*)\n", # Key-Systems GmbH - "(?:Tech ID:[ ]*(?P.*)\n)?Tech[ ]*Name:[ ]*(?P.*)\n(?:Tech[ ]*Organization:[ ]*(?P.*)\n)?Tech[ ]*Street:[ ]*(?P.+)\n(?:Tech[ ]*Street:[ ]*(?P.+)\n)?(?:Tech[ ]*Street:[ ]*(?P.+)\n)?Tech[ ]*City:[ ]*(?P.+)\nTech[ ]*State(?:\/Province)?:[ ]*(?P.*)\nTech[ ]*Postal[ ]*Code:[ ]*(?P.+)\nTech[ ]*Country:[ ]*(?P.+)\n(?:Tech[ ]*Phone:[ ]*(?P.*)\n)?(?:Tech[ ]*Phone[ ]*Ext:[ ]*(?P.*)\n)?(?:Tech[ ]*Fax:[ ]*(?P.*)\n)?(?:Tech[ ]*Fax[ ]*Ext:\s*?(?P.*)\n)?(?:Tech[ ]*Email:[ ]*(?P.+)\n)?", # WildWestDomains, GoDaddy, Namecheap/eNom, Ascio, Musedoma (.museum), EuroDNS, nic.ps - "Technical Contact\n(?: (?P.+)\n)? (?P.+)\n Email:(?P.+)\n (?P.+)\n(?: (?P.+)\n)? (?P.+) (?P.+)\n (?P.+)\n Tel: (?P.+)\n\n", # internet.bs - " Technical Contact Details:[ ]*\n (?P.*)\n (?P.*)[ ]{2,}\((?P.*)\)\n (?P.*)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.*)\n (?P.*),(?P.*)\n (?P.*)\n Tel. (?P.*)", # Whois.com - "tech-id:[ ]*(?P.*)\n(?:tech-organization:[ ]*(?P.*)\n)?tech-name:[ ]*(?P.*)\ntech-street:[ ]*(?P.*)\ntech-city:[ ]*(?P.*)\ntech-zip:[ ]*(?P.*)\ntech-country:[ ]*(?P.*)\n(?:tech-phone:[ ]*(?P.*)\n)?(?:tech-fax:[ ]*(?P.*)\n)?tech-email:[ ]*(?P.*)", # InterNetworX - "Technical Contact:\n tech_org: (?P.*)\n tech_name: (?P.*)\n tech_email: (?P.*)\n tech_address: (?P
.*)\n tech_city: (?P.*)\n tech_state: (?P.*)\n tech_zip: (?P.*)\n tech_country: (?P.*)\n tech_phone: (?P.*)", # Bellnames - "Technical contact:\n(?P[\S\s]+)\n(?P.+)\n(?P[A-Z0-9-]+)\s+(?P.+)\n(?P.+)\n\n", # nic.ch - "Tech Contact ID:[ ]*(?P.+)\nTech Contact Name:[ ]*(?P.+)", # .au - "Technical Contact ID:(?P.*)\nTechnical Contact Name:(?P.*)\n(?:Technical Contact Organization:(?P.*)\n)?Technical Contact Address1:(?P.*)\n(?:Technical Contact Address2:(?P.*)\n)?(?:Technical Contact Address3:(?P.*)\n)?Technical Contact City:(?P.*)\n(?:Technical Contact State/Province:(?P.*)\n)?(?:Technical Contact Postal Code:(?P.*)\n)?Technical Contact Country:(?P.*)\nTechnical Contact Country Code:.*\nTechnical Contact Phone Number:(?P.*)\n(?:Technical Contact Facsimile Number:(?P.*)\n)?Technical Contact Email:(?P.*)", # .US, .biz (NeuStar), .buzz, .moe (Interlink Co. Ltd.) - "Technical Contacts\n Name: (?P.+)\n(?: Organization: (?P.+)\n)? ContactID: (?P.+)\n(?: Address: (?P.+)\n(?: (?P.+)\n(?: (?P.+)\n)?)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n)?(?: Created: (?P.+)\n)?(?: Last Update: (?P.+)\n)?", # nic.it // NOTE: Why does this say 'Contacts'? Can it have multiple? - "Tech Name[.]* (?P.*)\n Tech Address[.]* (?P.*)\n Tech Address[.]* (?P.*)\n(?: Tech Address[.]* (?P.*)\n)? Tech Address[.]* (?P.*)\n Tech Address[.]* (?P.*)\n Tech Address[.]* (?P.*)\n Tech Address[.]* (?P.*)\n Tech Email[.]* (?P.*)\n Tech Phone[.]* (?P.*)\n Tech Fax[.]* (?P.*)", # Melbourne IT - "Technical contact:\n(?: (?P.+)\n)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+), (?P.+) (?P.+) (?P.+)\n Phone: (?P.*)\n Fax: (?P.*)\n", # Fabulous.com - "tech-c-name:\s*(?P.+)\n(?:tech-c-organization:\s*(?P.*)\n)?tech-c-type:\s*(?P.+)\ntech-c-address:\s*(?P.+)\ntech-c-postcode:\s*(?P.+)\ntech-c-city:\s*(?P.+)\ntech-c-country:\s*(?P.+)\n(?:tech-c-phone:\s*(?P.+)\n)?(?:tech-c-email:\s*(?P.+)\n)?", # Hetzner - "Admin Contact Information :[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", # GAL Communication - " Technical contact:\n (?P.+)\n (?P.*)\n (?P.+)\n (?P.+) (?P\S+),[ ]+(?P.+)\n (?P.+)\n (?P.+)\n (?P.*)\n (?P.*)", # .am - "Technical:\n\s*Name:\s*(?P.*)\n\s*Organisation:\s*(?P.*)\n\s*Language:.*\n\s*Phone:\s*(?P.*)\n\s*Fax:\s*(?P.*)\n\s*Email:\s*(?P.*)\n", # EURid - "\[Zone-C\]\nType: (?P.+)\nName: (?P.+)\n(Organisation: (?P.+)\n){0,1}(Address: (?P.+)\n){1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}PostalCode: (?P.+)\nCity: (?P.+)\nCountryCode: (?P[A-Za-z]{2})\nPhone: (?P.+)\nFax: (?P.+)\nEmail: (?P.+)\n(Remarks: (?P.+)\n){0,1}Changed: (?P.+)", # DeNIC - "Technical Contact:\n Name: (?P.+)\n City: (?P.+)\n State: (?P.+)\n Country: (?P.+)\n", # Akky (.com.mx) - "Tech Contact: (?P.+)\n(?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?\n(?P.+),? (?P[A-Z]{2,3})(?: [A-Z0-9]+)?\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 1 - "Tech Contact: (?P.+)\n(?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?\n(?P.+), (?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 2 - "Tech Contact: (?P.+)\n(?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?, (?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 3 - "Tech Contact: (?P.+)\n(?P.+) (?P[^\s]+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 4 - "Tech Contact: (?P.+)\n(?P.+)\n(?P.+)\n(?P.+) (?P[^\s]+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 5 - "Tech Contact: (?P.+)\n(?P.+)\n(?P.+)\n(?P.+)\n(?:(?P.+)\n)?(?P.+)\n(?P.+)\n(?P[A-Z]+)\n", # .co.th, format 6 - " Technical Contact:\n (?P.+) (?P.+)\n (?P.*)\n (?P.*)\n", # .com.tw (Western registrars) - "Technical Contact Information:\n\n(?:Given name: (?P.+)\n)?(?:Family name: (?P.+)\n)?(?:Company name: (?P.+)\n)?Address: (?P.+)\nCountry: (?P.+)\nPhone: (?P.*)\nFax: (?P.*)\nEmail: (?P.+)\n(?:Account Name: (?P.+)\n)?", # HKDNR (.hk) - "TECH ID:(?P.+)\nTECH Name:(?P.*)\n(?:TECH Organization:(?P.*)\n)?TECH Street1:(?P.+?)\n(?:TECH Street2:(?P.+?)\n(?:TECH Street3:(?P.+?)\n)?)?TECH City:(?P.+)\nTECH State:(?P.*)\nTECH Postal Code:(?P.+)\nTECH Country:(?P[A-Z]+)\nTECH Phone:(?P.*?)\nTECH Fax:(?P.*)\nTECH Email:(?P.+)\n", # Realtime Register - "Technical Contact\n NIC Handle \(if known\)\.+:(?P.*)\n \(I\)ndividual \(R\)ole\.+:(?P.*)\n Name \(Last, First\)\.+:(?P.*)\n Organization Name\.+:(?P.*)\n Street Address\.+:(?P.*)\n City\.+: (?P.*)\n State\.+: (?P.*)\n Postal Code\.+:(?P.*)\n Country\.+:(?P.*)\n Phone Number\.+:(?P.*)\n Fax Number\.+:(?P.*)\n E-Mailbox\.+:(?P.*)", # .ai + " Technical Contact:[ ]*\n (?P.*)\n (?P.*)\n (?P.*)\n (?P.*), (?P.*) (?P.*)\n (?P.*)\n(?: Phone: (?P.*)\n)? Email: (?P.*)\n", + # Corporate Domains, Inc. + "Technical Contact:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", + # OVH + "(?:Tech ID:(?P.+)\n)?Tech Name:(?P.*)\n(:?Tech Organization:(?P.*)\n)?Tech Street1?:(?P.*)\n(?:Tech Street2:(?P.*)\n)?(?:Tech Street3:(?P.*)\n)?Tech City:(?P.*)\nTech State/Province:(?P.*)\nTech Postal Code:(?P.*)\nTech Country:(?P.*)\nTech Phone:(?P.*)\n(?:Tech Phone Ext.:(?P.*)\n)?(?:Tech FAX:(?P.*)\n)?(?:Tech FAX Ext.:(?P.*)\n)?Tech Email:(?P.*)", + # Public Interest Registry (.org), nic.pw, No-IP.com + "Tech(?:nical)? ID:(?P.+)\nTech(?:nical)? Name:(?P.*)\n(?:Tech(?:nical)? Organization:(?P.*)\n)?Tech(?:nical)? Address1?:(?P.*)\n(?:Tech(?:nical)? Address2:(?P.*)\n)?(?:Tech(?:nical)? Address3:(?P.*)\n)?Tech(?:nical)? City:(?P.*)\nTech(?:nical)? State/Province:(?P.*)\nTech(?:nical)? Country/Economy:(?P.*)\nTech(?:nical)? Postal Code:(?P.*)\nTech(?:nical)? Phone:(?P.*)\n(?:Tech(?:nical)? Phone Ext.:(?P.*)\n)?(?:Tech(?:nical)? FAX:(?P.*)\n)?(?:Tech(?:nical)? FAX Ext.:(?P.*)\n)?Tech(?:nical)? E-mail:(?P.*)", + # .ME, DotAsia + "Technical Contact ID:\s*(?P.+)\nTechnical Contact Name:\s*(?P.+)\nTechnical Contact Organization:\s*(?P.*)\nTechnical Contact Address1:\s*(?P.+)\nTechnical Contact Address2:\s*(?P.*)\nTechnical Contact City:\s*(?P.+)\nTechnical Contact State/Province:\s*(?P.+)\nTechnical Contact Postal Code:\s*(?P.+)\nTechnical Contact Country:\s*(?P.+)\nTechnical Contact Country Code:\s*(?P.+)\nTechnical Contact Phone Number:\s*(?P.+)\nTechnical Contact Email:\s*(?P.+)\n", + # .CO Internet + "Tech Contact: (?P.+)\nTech Organization: (?P.+)\nTech Name: (?P.+)\nTech Street: (?P.+)\nTech City: (?P.+)\nTech Postal Code: (?P.+)\nTech State: (?P.+)\nTech Country: (?P.+)\nTech Phone: (?P.*)\nTech Phone Ext: (?P.*)\nTech Fax: (?P.*)\nTech Fax Ext: (?P.*)\nTech Email: (?P.*)\n", + # Key-Systems GmbH + "(?:Tech ID:[ ]*(?P.*)\n)?Tech[ ]*Name:[ ]*(?P.*)\n(?:Tech[ ]*Organization:[ ]*(?P.*)\n)?Tech[ ]*Street:[ ]*(?P.+)\n(?:Tech[ ]*Street:[ ]*(?P.+)\n)?(?:Tech[ ]*Street:[ ]*(?P.+)\n)?Tech[ ]*City:[ ]*(?P.+)\nTech[ ]*State(?:\/Province)?:[ ]*(?P.*)\nTech[ ]*Postal[ ]*Code:[ ]*(?P.+)\nTech[ ]*Country:[ ]*(?P.+)\n(?:Tech[ ]*Phone:[ ]*(?P.*)\n)?(?:Tech[ ]*Phone[ ]*Ext:[ ]*(?P.*)\n)?(?:Tech[ ]*Fax:[ ]*(?P.*)\n)?(?:Tech[ ]*Fax[ ]*Ext:\s*?(?P.*)\n)?(?:Tech[ ]*Email:[ ]*(?P.+)\n)?", + # WildWestDomains, GoDaddy, Namecheap/eNom, Ascio, Musedoma (.museum), EuroDNS, nic.ps + "Technical Contact\n(?: (?P.+)\n)? (?P.+)\n Email:(?P.+)\n (?P.+)\n(?: (?P.+)\n)? (?P.+) (?P.+)\n (?P.+)\n Tel: (?P.+)\n\n", + # internet.bs + " Technical Contact Details:[ ]*\n (?P.*)\n (?P.*)[ ]{2,}\((?P.*)\)\n (?P.*)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.*)\n (?P.*),(?P.*)\n (?P.*)\n Tel. (?P.*)", + # Whois.com + "tech-id:[ ]*(?P.*)\n(?:tech-organization:[ ]*(?P.*)\n)?tech-name:[ ]*(?P.*)\ntech-street:[ ]*(?P.*)\ntech-city:[ ]*(?P.*)\ntech-zip:[ ]*(?P.*)\ntech-country:[ ]*(?P.*)\n(?:tech-phone:[ ]*(?P.*)\n)?(?:tech-fax:[ ]*(?P.*)\n)?tech-email:[ ]*(?P.*)", + # InterNetworX + "Technical Contact:\n tech_org: (?P.*)\n tech_name: (?P.*)\n tech_email: (?P.*)\n tech_address: (?P
.*)\n tech_city: (?P.*)\n tech_state: (?P.*)\n tech_zip: (?P.*)\n tech_country: (?P.*)\n tech_phone: (?P.*)", + # Bellnames + "Technical contact:\n(?P[\S\s]+)\n(?P.+)\n(?P[A-Z0-9-]+)\s+(?P.+)\n(?P.+)\n\n", + # nic.ch + "Tech Contact ID:[ ]*(?P.+)\nTech Contact Name:[ ]*(?P.+)", # .au + "Technical Contact ID:(?P.*)\nTechnical Contact Name:(?P.*)\n(?:Technical Contact Organization:(?P.*)\n)?Technical Contact Address1:(?P.*)\n(?:Technical Contact Address2:(?P.*)\n)?(?:Technical Contact Address3:(?P.*)\n)?Technical Contact City:(?P.*)\n(?:Technical Contact State/Province:(?P.*)\n)?(?:Technical Contact Postal Code:(?P.*)\n)?Technical Contact Country:(?P.*)\nTechnical Contact Country Code:.*\nTechnical Contact Phone Number:(?P.*)\n(?:Technical Contact Facsimile Number:(?P.*)\n)?Technical Contact Email:(?P.*)", + # .US, .biz (NeuStar), .buzz, .moe (Interlink Co. Ltd.) + "Technical Contacts\n Name: (?P.+)\n(?: Organization: (?P.+)\n)? ContactID: (?P.+)\n(?: Address: (?P.+)\n(?: (?P.+)\n(?: (?P.+)\n)?)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n)?(?: Created: (?P.+)\n)?(?: Last Update: (?P.+)\n)?", + # nic.it // NOTE: Why does this say 'Contacts'? Can it have multiple? + "Tech Name[.]* (?P.*)\n Tech Address[.]* (?P.*)\n Tech Address[.]* (?P.*)\n(?: Tech Address[.]* (?P.*)\n)? Tech Address[.]* (?P.*)\n Tech Address[.]* (?P.*)\n Tech Address[.]* (?P.*)\n Tech Address[.]* (?P.*)\n Tech Email[.]* (?P.*)\n Tech Phone[.]* (?P.*)\n Tech Fax[.]* (?P.*)", + # Melbourne IT + "Technical contact:\n(?: (?P.+)\n)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+), (?P.+) (?P.+) (?P.+)\n Phone: (?P.*)\n Fax: (?P.*)\n", + # Fabulous.com + "tech-c-name:\s*(?P.+)\n(?:tech-c-organization:\s*(?P.*)\n)?tech-c-type:\s*(?P.+)\ntech-c-address:\s*(?P.+)\ntech-c-postcode:\s*(?P.+)\ntech-c-city:\s*(?P.+)\ntech-c-country:\s*(?P.+)\n(?:tech-c-phone:\s*(?P.+)\n)?(?:tech-c-email:\s*(?P.+)\n)?", + # Hetzner + "Admin Contact Information :[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", + # GAL Communication + " Technical contact:\n (?P.+)\n (?P.*)\n (?P.+)\n (?P.+) (?P\S+),[ ]+(?P.+)\n (?P.+)\n (?P.+)\n (?P.*)\n (?P.*)", + # .am + "Technical:\n\s*Name:\s*(?P.*)\n\s*Organisation:\s*(?P.*)\n\s*Language:.*\n\s*Phone:\s*(?P.*)\n\s*Fax:\s*(?P.*)\n\s*Email:\s*(?P.*)\n", + # EURid + "\[Zone-C\]\nType: (?P.+)\nName: (?P.+)\n(Organisation: (?P.+)\n){0,1}(Address: (?P.+)\n){1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}PostalCode: (?P.+)\nCity: (?P.+)\nCountryCode: (?P[A-Za-z]{2})\nPhone: (?P.+)\nFax: (?P.+)\nEmail: (?P.+)\n(Remarks: (?P.+)\n){0,1}Changed: (?P.+)", + # DeNIC + "Technical Contact:\n Name: (?P.+)\n City: (?P.+)\n State: (?P.+)\n Country: (?P.+)\n", + # Akky (.com.mx) + "Tech Contact: (?P.+)\n(?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?\n(?P.+),? (?P[A-Z]{2,3})(?: [A-Z0-9]+)?\n(?P.+)\n(?P[A-Z]+)\n", + # .co.th, format 1 + "Tech Contact: (?P.+)\n(?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?\n(?P.+), (?P.+)\n(?P.+)\n(?P[A-Z]+)\n", + # .co.th, format 2 + "Tech Contact: (?P.+)\n(?P.+)\n(?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?)(?:,+ (?P.+?))?)?)?)?)?)?, (?P.+)\n(?P.+)\n(?P[A-Z]+)\n", + # .co.th, format 3 + "Tech Contact: (?P.+)\n(?P.+) (?P[^\s]+)\n(?P.+)\n(?P[A-Z]+)\n", + # .co.th, format 4 + "Tech Contact: (?P.+)\n(?P.+)\n(?P.+)\n(?P.+) (?P[^\s]+)\n(?P.+)\n(?P[A-Z]+)\n", + # .co.th, format 5 + "Tech Contact: (?P.+)\n(?P.+)\n(?P.+)\n(?P.+)\n(?:(?P.+)\n)?(?P.+)\n(?P.+)\n(?P[A-Z]+)\n", + # .co.th, format 6 + " Technical Contact:\n (?P.+) (?P.+)\n (?P.*)\n (?P.*)\n", + # .com.tw (Western registrars) + "Technical Contact Information:\n\n(?:Given name: (?P.+)\n)?(?:Family name: (?P.+)\n)?(?:Company name: (?P.+)\n)?Address: (?P.+)\nCountry: (?P.+)\nPhone: (?P.*)\nFax: (?P.*)\nEmail: (?P.+)\n(?:Account Name: (?P.+)\n)?", + # HKDNR (.hk) + "TECH ID:(?P.+)\nTECH Name:(?P.*)\n(?:TECH Organization:(?P.*)\n)?TECH Street1:(?P.+?)\n(?:TECH Street2:(?P.+?)\n(?:TECH Street3:(?P.+?)\n)?)?TECH City:(?P.+)\nTECH State:(?P.*)\nTECH Postal Code:(?P.+)\nTECH Country:(?P[A-Z]+)\nTECH Phone:(?P.*?)\nTECH Fax:(?P.*)\nTECH Email:(?P.+)\n", + # Realtime Register + "Technical Contact\n NIC Handle \(if known\)\.+:(?P.*)\n \(I\)ndividual \(R\)ole\.+:(?P.*)\n Name \(Last, First\)\.+:(?P.*)\n Organization Name\.+:(?P.*)\n Street Address\.+:(?P.*)\n City\.+: (?P.*)\n State\.+: (?P.*)\n Postal Code\.+:(?P.*)\n Country\.+:(?P.*)\n Phone Number\.+:(?P.*)\n Fax Number\.+:(?P.*)\n E-Mailbox\.+:(?P.*)", + # .ai ] admin_contact_regexes = [ - " Administrative Contact:[ ]*\n (?P.*)\n (?P.*)\n (?P.*)\n (?P.*), (?P.*) (?P.*)\n (?P.*)\n(?: Phone: (?P.*)\n)? Email: (?P.*)\n", # Corporate Domains, Inc. - "Administrative Contact:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", # OVH - "(?:Admin ID:(?P.+)\n)?Admin Name:(?P.*)\n(?:Admin Organization:(?P.*)\n)?Admin Street1?:(?P.*)\n(?:Admin Street2:(?P.*)\n)?(?:Admin Street3:(?P.*)\n)?Admin City:(?P.*)\nAdmin State/Province:(?P.*)\nAdmin Postal Code:(?P.*)\nAdmin Country:(?P.*)\nAdmin Phone:(?P.*)\n(?:Admin Phone Ext.:(?P.*)\n)?(?:Admin FAX:(?P.*)\n)?(?:Admin FAX Ext.:(?P.*)\n)?Admin Email:(?P.*)", # Public Interest Registry (.org), nic.pw, No-IP.com - "Admin(?:istrative)? ID:(?P.+)\nAdmin(?:istrative)? Name:(?P.*)\n(?:Admin(?:istrative)? Organization:(?P.*)\n)?Admin(?:istrative)? Address1?:(?P.*)\n(?:Admin(?:istrative)? Address2:(?P.*)\n)?(?:Admin(?:istrative)? Address3:(?P.*)\n)?Admin(?:istrative)? City:(?P.*)\nAdmin(?:istrative)? State/Province:(?P.*)\nAdmin(?:istrative)? Country/Economy:(?P.*)\nAdmin(?:istrative)? Postal Code:(?P.*)\nAdmin(?:istrative)? Phone:(?P.*)\n(?:Admin(?:istrative)? Phone Ext.:(?P.*)\n)?(?:Admin(?:istrative)? FAX:(?P.*)\n)?(?:Admin(?:istrative)? FAX Ext.:(?P.*)\n)?Admin(?:istrative)? E-mail:(?P.*)", # .ME, DotAsia - "Administrative Contact ID:\s*(?P.+)\nAdministrative Contact Name:\s*(?P.+)\nAdministrative Contact Organization:\s*(?P.*)\nAdministrative Contact Address1:\s*(?P.+)\nAdministrative Contact Address2:\s*(?P.*)\nAdministrative Contact City:\s*(?P.+)\nAdministrative Contact State/Province:\s*(?P.+)\nAdministrative Contact Postal Code:\s*(?P.+)\nAdministrative Contact Country:\s*(?P.+)\nAdministrative Contact Country Code:\s*(?P.+)\nAdministrative Contact Phone Number:\s*(?P.+)\nAdministrative Contact Email:\s*(?P.+)\n", # .CO Internet - "Admin Contact: (?P.+)\nAdmin Organization: (?P.+)\nAdmin Name: (?P.+)\nAdmin Street: (?P.+)\nAdmin City: (?P.+)\nAdmin State: (?P.+)\nAdmin Postal Code: (?P.+)\nAdmin Country: (?P.+)\nAdmin Phone: (?P.*)\nAdmin Phone Ext: (?P.*)\nAdmin Fax: (?P.*)\nAdmin Fax Ext: (?P.*)\nAdmin Email: (?P.*)\n", # Key-Systems GmbH - "(?:Admin ID:[ ]*(?P.*)\n)?Admin[ ]*Name:[ ]*(?P.*)\n(?:Admin[ ]*Organization:[ ]*(?P.*)\n)?Admin[ ]*Street:[ ]*(?P.+)\n(?:Admin[ ]*Street:[ ]*(?P.+)\n)?(?:Admin[ ]*Street:[ ]*(?P.+)\n)?Admin[ ]*City:[ ]*(?P.+)\nAdmin[ ]*State(?:\/Province)?:[ ]*(?P.*)\nAdmin[ ]*Postal[ ]*Code:[ ]*(?P.+)\nAdmin[ ]*Country:[ ]*(?P.+)\n(?:Admin[ ]*Phone:[ ]*(?P.*)\n)?(?:Admin[ ]*Phone[ ]*Ext:[ ]*(?P.*)\n)?(?:Admin[ ]*Fax:[ ]*(?P.*)\n)?(?:Admin[ ]*Fax[ ]*Ext:\s*?(?P.*)\n)?(?:Admin[ ]*Email:[ ]*(?P.+)\n)?", # WildWestDomains, GoDaddy, Namecheap/eNom, Ascio, Musedoma (.museum), EuroDNS, nic.ps - "Administrative Contact\n(?: (?P.+)\n)? (?P.+)\n Email:(?P.+)\n (?P.+)\n(?: (?P.+)\n)? (?P.+) (?P.+)\n (?P.+)\n Tel: (?P.+)\n\n", # internet.bs - " Administrative Contact Details:[ ]*\n (?P.*)\n (?P.*)[ ]{2,}\((?P.*)\)\n (?P.*)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.*)\n (?P.*),(?P.*)\n (?P.*)\n Tel. (?P.*)", # Whois.com - "admin-id:[ ]*(?P.*)\n(?:admin-organization:[ ]*(?P.*)\n)?admin-name:[ ]*(?P.*)\nadmin-street:[ ]*(?P.*)\nadmin-city:[ ]*(?P.*)\nadmin-zip:[ ]*(?P.*)\nadmin-country:[ ]*(?P.*)\n(?:admin-phone:[ ]*(?P.*)\n)?(?:admin-fax:[ ]*(?P.*)\n)?admin-email:[ ]*(?P.*)", # InterNetworX - "Administrative Contact:\n admin_org: (?P.*)\n admin_name: (?P.*)\n admin_email: (?P.*)\n admin_address: (?P
.*)\n admin_city: (?P.*)\n admin_state: (?P.*)\n admin_zip: (?P.*)\n admin_country: (?P.*)\n admin_phone: (?P.*)", # Bellnames - "Administrative Contact ID:(?P.*)\nAdministrative Contact Name:(?P.*)\n(?:Administrative Contact Organization:(?P.*)\n)?Administrative Contact Address1:(?P.*)\n(?:Administrative Contact Address2:(?P.*)\n)?(?:Administrative Contact Address3:(?P.*)\n)?Administrative Contact City:(?P.*)\n(?:Administrative Contact State/Province:(?P.*)\n)?(?:Administrative Contact Postal Code:(?P.*)\n)?Administrative Contact Country:(?P.*)\nAdministrative Contact Country Code:.*\nAdministrative Contact Phone Number:(?P.*)\n(?:Administrative Contact Facsimile Number:(?P.*)\n)?Administrative Contact Email:(?P.*)", # .US, .biz (NeuStar), .buzz, .moe (Interlink Co. Ltd.) - "Admin Contact\n Name: (?P.+)\n(?: Organization: (?P.+)\n)? ContactID: (?P.+)\n(?: Address: (?P.+)\n(?: (?P.+)\n(?: (?P.+)\n)?)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n)?(?: Created: (?P.+)\n)?(?: Last Update: (?P.+)\n)?", # nic.it - "Admin Name[.]* (?P.*)\n Admin Address[.]* (?P.*)\n Admin Address[.]* (?P.*)\n(?: Admin Address[.]* (?P.*)\n)? Admin Address[.]* (?P.*)\n Admin Address[.]* (?P.*)\n Admin Address[.]* (?P.*)\n Admin Address[.]* (?P.*)\n Admin Email[.]* (?P.*)\n Admin Phone[.]* (?P.*)\n Admin Fax[.]* (?P.*)", # Melbourne IT - "Administrative contact:\n(?: (?P.+)\n)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+), (?P.+) (?P.+) (?P.+)\n Phone: (?P.*)\n Fax: (?P.*)\n", # Fabulous.com - "admin-c-name:\s*(?P.+)\n(?:admin-c-organization:\s*(?P.*)\n)?admin-c-type:\s*(?P.+)\nadmin-c-address:\s*(?P.+)\nadmin-c-postcode:\s*(?P.+)\nadmin-c-city:\s*(?P.+)\nadmin-c-country:\s*(?P.+)\n(?:admin-c-phone:\s*(?P.+)\n)?(?:admin-c-email:\s*(?P.+)\n)?", # Hetzner - "Tech Contact Information :[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", # GAL Communication - " Administrative contact:\n (?P.+)\n (?P.*)\n (?P.+)\n (?P.+) (?P\S+),[ ]+(?P.+)\n (?P.+)\n (?P.+)\n (?P.*)\n (?P.*)", # .am - "Administrative Contact:\n Name: (?P.+)\n City: (?P.+)\n State: (?P.+)\n Country: (?P.+)\n", # Akky (.com.mx) - "\[Tech-C\]\nType: (?P.+)\nName: (?P.+)\n(Organisation: (?P.+)\n){0,1}(Address: (?P.+)\n){1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}PostalCode: (?P.+)\nCity: (?P.+)\nCountryCode: (?P[A-Za-z]{2})\nPhone: (?P.+)\nFax: (?P.+)\nEmail: (?P.+)\n(Remarks: (?P.+)\n){0,1}Changed: (?P.+)", # DeNIC - " Administrative Contact:\n (?P.+) (?P.+)\n (?P.*)\n (?P.*)\n", # .com.tw (Western registrars) - "Administrative Contact Information:\n\n(?:Given name: (?P.+)\n)?(?:Family name: (?P.+)\n)?(?:Company name: (?P.+)\n)?Address: (?P.+)\nCountry: (?P.+)\nPhone: (?P.*)\nFax: (?P.*)\nEmail: (?P.+)\n(?:Account Name: (?P.+)\n)?", # HKDNR (.hk) - "ADMIN ID:(?P.+)\nADMIN Name:(?P.*)\n(?:ADMIN Organization:(?P.*)\n)?ADMIN Street1:(?P.+?)\n(?:ADMIN Street2:(?P.+?)\n(?:ADMIN Street3:(?P.+?)\n)?)?ADMIN City:(?P.+)\nADMIN State:(?P.*)\nADMIN Postal Code:(?P.+)\nADMIN Country:(?P[A-Z]+)\nADMIN Phone:(?P.*?)\nADMIN Fax:(?P.*)\nADMIN Email:(?P.+)\n", # Realtime Register - "Administrative Contact\n NIC Handle \(if known\)\.+:(?P.*)\n \(I\)ndividual \(R\)ole\.+:(?P.*)\n Name \(Last, First\)\.+:(?P.*)\n Organization Name\.+:(?P.*)\n Street Address\.+:(?P.*)\n City\.+: (?P.*)\n State\.+: (?P.*)\n Postal Code\.+:(?P.*)\n Country\.+:(?P.*)\n Phone Number\.+:(?P.*)\n Fax Number\.+:(?P.*)\n E-Mailbox\.+:(?P.*)", # .ai + " Administrative Contact:[ ]*\n (?P.*)\n (?P.*)\n (?P.*)\n (?P.*), (?P.*) (?P.*)\n (?P.*)\n(?: Phone: (?P.*)\n)? Email: (?P.*)\n", + # Corporate Domains, Inc. + "Administrative Contact:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", + # OVH + "(?:Admin ID:(?P.+)\n)?Admin Name:(?P.*)\n(?:Admin Organization:(?P.*)\n)?Admin Street1?:(?P.*)\n(?:Admin Street2:(?P.*)\n)?(?:Admin Street3:(?P.*)\n)?Admin City:(?P.*)\nAdmin State/Province:(?P.*)\nAdmin Postal Code:(?P.*)\nAdmin Country:(?P.*)\nAdmin Phone:(?P.*)\n(?:Admin Phone Ext.:(?P.*)\n)?(?:Admin FAX:(?P.*)\n)?(?:Admin FAX Ext.:(?P.*)\n)?Admin Email:(?P.*)", + # Public Interest Registry (.org), nic.pw, No-IP.com + "Admin(?:istrative)? ID:(?P.+)\nAdmin(?:istrative)? Name:(?P.*)\n(?:Admin(?:istrative)? Organization:(?P.*)\n)?Admin(?:istrative)? Address1?:(?P.*)\n(?:Admin(?:istrative)? Address2:(?P.*)\n)?(?:Admin(?:istrative)? Address3:(?P.*)\n)?Admin(?:istrative)? City:(?P.*)\nAdmin(?:istrative)? State/Province:(?P.*)\nAdmin(?:istrative)? Country/Economy:(?P.*)\nAdmin(?:istrative)? Postal Code:(?P.*)\nAdmin(?:istrative)? Phone:(?P.*)\n(?:Admin(?:istrative)? Phone Ext.:(?P.*)\n)?(?:Admin(?:istrative)? FAX:(?P.*)\n)?(?:Admin(?:istrative)? FAX Ext.:(?P.*)\n)?Admin(?:istrative)? E-mail:(?P.*)", + # .ME, DotAsia + "Administrative Contact ID:\s*(?P.+)\nAdministrative Contact Name:\s*(?P.+)\nAdministrative Contact Organization:\s*(?P.*)\nAdministrative Contact Address1:\s*(?P.+)\nAdministrative Contact Address2:\s*(?P.*)\nAdministrative Contact City:\s*(?P.+)\nAdministrative Contact State/Province:\s*(?P.+)\nAdministrative Contact Postal Code:\s*(?P.+)\nAdministrative Contact Country:\s*(?P.+)\nAdministrative Contact Country Code:\s*(?P.+)\nAdministrative Contact Phone Number:\s*(?P.+)\nAdministrative Contact Email:\s*(?P.+)\n", + # .CO Internet + "Admin Contact: (?P.+)\nAdmin Organization: (?P.+)\nAdmin Name: (?P.+)\nAdmin Street: (?P.+)\nAdmin City: (?P.+)\nAdmin State: (?P.+)\nAdmin Postal Code: (?P.+)\nAdmin Country: (?P.+)\nAdmin Phone: (?P.*)\nAdmin Phone Ext: (?P.*)\nAdmin Fax: (?P.*)\nAdmin Fax Ext: (?P.*)\nAdmin Email: (?P.*)\n", + # Key-Systems GmbH + "(?:Admin ID:[ ]*(?P.*)\n)?Admin[ ]*Name:[ ]*(?P.*)\n(?:Admin[ ]*Organization:[ ]*(?P.*)\n)?Admin[ ]*Street:[ ]*(?P.+)\n(?:Admin[ ]*Street:[ ]*(?P.+)\n)?(?:Admin[ ]*Street:[ ]*(?P.+)\n)?Admin[ ]*City:[ ]*(?P.+)\nAdmin[ ]*State(?:\/Province)?:[ ]*(?P.*)\nAdmin[ ]*Postal[ ]*Code:[ ]*(?P.+)\nAdmin[ ]*Country:[ ]*(?P.+)\n(?:Admin[ ]*Phone:[ ]*(?P.*)\n)?(?:Admin[ ]*Phone[ ]*Ext:[ ]*(?P.*)\n)?(?:Admin[ ]*Fax:[ ]*(?P.*)\n)?(?:Admin[ ]*Fax[ ]*Ext:\s*?(?P.*)\n)?(?:Admin[ ]*Email:[ ]*(?P.+)\n)?", + # WildWestDomains, GoDaddy, Namecheap/eNom, Ascio, Musedoma (.museum), EuroDNS, nic.ps + "Administrative Contact\n(?: (?P.+)\n)? (?P.+)\n Email:(?P.+)\n (?P.+)\n(?: (?P.+)\n)? (?P.+) (?P.+)\n (?P.+)\n Tel: (?P.+)\n\n", + # internet.bs + " Administrative Contact Details:[ ]*\n (?P.*)\n (?P.*)[ ]{2,}\((?P.*)\)\n (?P.*)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.*)\n (?P.*),(?P.*)\n (?P.*)\n Tel. (?P.*)", + # Whois.com + "admin-id:[ ]*(?P.*)\n(?:admin-organization:[ ]*(?P.*)\n)?admin-name:[ ]*(?P.*)\nadmin-street:[ ]*(?P.*)\nadmin-city:[ ]*(?P.*)\nadmin-zip:[ ]*(?P.*)\nadmin-country:[ ]*(?P.*)\n(?:admin-phone:[ ]*(?P.*)\n)?(?:admin-fax:[ ]*(?P.*)\n)?admin-email:[ ]*(?P.*)", + # InterNetworX + "Administrative Contact:\n admin_org: (?P.*)\n admin_name: (?P.*)\n admin_email: (?P.*)\n admin_address: (?P
.*)\n admin_city: (?P.*)\n admin_state: (?P.*)\n admin_zip: (?P.*)\n admin_country: (?P.*)\n admin_phone: (?P.*)", + # Bellnames + "Administrative Contact ID:(?P.*)\nAdministrative Contact Name:(?P.*)\n(?:Administrative Contact Organization:(?P.*)\n)?Administrative Contact Address1:(?P.*)\n(?:Administrative Contact Address2:(?P.*)\n)?(?:Administrative Contact Address3:(?P.*)\n)?Administrative Contact City:(?P.*)\n(?:Administrative Contact State/Province:(?P.*)\n)?(?:Administrative Contact Postal Code:(?P.*)\n)?Administrative Contact Country:(?P.*)\nAdministrative Contact Country Code:.*\nAdministrative Contact Phone Number:(?P.*)\n(?:Administrative Contact Facsimile Number:(?P.*)\n)?Administrative Contact Email:(?P.*)", + # .US, .biz (NeuStar), .buzz, .moe (Interlink Co. Ltd.) + "Admin Contact\n Name: (?P.+)\n(?: Organization: (?P.+)\n)? ContactID: (?P.+)\n(?: Address: (?P.+)\n(?: (?P.+)\n(?: (?P.+)\n)?)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n)?(?: Created: (?P.+)\n)?(?: Last Update: (?P.+)\n)?", + # nic.it + "Admin Name[.]* (?P.*)\n Admin Address[.]* (?P.*)\n Admin Address[.]* (?P.*)\n(?: Admin Address[.]* (?P.*)\n)? Admin Address[.]* (?P.*)\n Admin Address[.]* (?P.*)\n Admin Address[.]* (?P.*)\n Admin Address[.]* (?P.*)\n Admin Email[.]* (?P.*)\n Admin Phone[.]* (?P.*)\n Admin Fax[.]* (?P.*)", + # Melbourne IT + "Administrative contact:\n(?: (?P.+)\n)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+), (?P.+) (?P.+) (?P.+)\n Phone: (?P.*)\n Fax: (?P.*)\n", + # Fabulous.com + "admin-c-name:\s*(?P.+)\n(?:admin-c-organization:\s*(?P.*)\n)?admin-c-type:\s*(?P.+)\nadmin-c-address:\s*(?P.+)\nadmin-c-postcode:\s*(?P.+)\nadmin-c-city:\s*(?P.+)\nadmin-c-country:\s*(?P.+)\n(?:admin-c-phone:\s*(?P.+)\n)?(?:admin-c-email:\s*(?P.+)\n)?", + # Hetzner + "Tech Contact Information :[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", + # GAL Communication + " Administrative contact:\n (?P.+)\n (?P.*)\n (?P.+)\n (?P.+) (?P\S+),[ ]+(?P.+)\n (?P.+)\n (?P.+)\n (?P.*)\n (?P.*)", + # .am + "Administrative Contact:\n Name: (?P.+)\n City: (?P.+)\n State: (?P.+)\n Country: (?P.+)\n", + # Akky (.com.mx) + "\[Tech-C\]\nType: (?P.+)\nName: (?P.+)\n(Organisation: (?P.+)\n){0,1}(Address: (?P.+)\n){1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}(Address: (?P.+)\n){0,1}PostalCode: (?P.+)\nCity: (?P.+)\nCountryCode: (?P[A-Za-z]{2})\nPhone: (?P.+)\nFax: (?P.+)\nEmail: (?P.+)\n(Remarks: (?P.+)\n){0,1}Changed: (?P.+)", + # DeNIC + " Administrative Contact:\n (?P.+) (?P.+)\n (?P.*)\n (?P.*)\n", + # .com.tw (Western registrars) + "Administrative Contact Information:\n\n(?:Given name: (?P.+)\n)?(?:Family name: (?P.+)\n)?(?:Company name: (?P.+)\n)?Address: (?P.+)\nCountry: (?P.+)\nPhone: (?P.*)\nFax: (?P.*)\nEmail: (?P.+)\n(?:Account Name: (?P.+)\n)?", + # HKDNR (.hk) + "ADMIN ID:(?P.+)\nADMIN Name:(?P.*)\n(?:ADMIN Organization:(?P.*)\n)?ADMIN Street1:(?P.+?)\n(?:ADMIN Street2:(?P.+?)\n(?:ADMIN Street3:(?P.+?)\n)?)?ADMIN City:(?P.+)\nADMIN State:(?P.*)\nADMIN Postal Code:(?P.+)\nADMIN Country:(?P[A-Z]+)\nADMIN Phone:(?P.*?)\nADMIN Fax:(?P.*)\nADMIN Email:(?P.+)\n", + # Realtime Register + "Administrative Contact\n NIC Handle \(if known\)\.+:(?P.*)\n \(I\)ndividual \(R\)ole\.+:(?P.*)\n Name \(Last, First\)\.+:(?P.*)\n Organization Name\.+:(?P.*)\n Street Address\.+:(?P.*)\n City\.+: (?P.*)\n State\.+: (?P.*)\n Postal Code\.+:(?P.*)\n Country\.+:(?P.*)\n Phone Number\.+:(?P.*)\n Fax Number\.+:(?P.*)\n E-Mailbox\.+:(?P.*)", + # .ai ] billing_contact_regexes = [ - "(?:Billing ID:(?P.+)\n)?Billing Name:(?P.*)\nBilling Organization:(?P.*)\nBilling Street1:(?P.*)\n(?:Billing Street2:(?P.*)\n)?(?:Billing Street3:(?P.*)\n)?Billing City:(?P.*)\nBilling State/Province:(?P.*)\nBilling Postal Code:(?P.*)\nBilling Country:(?P.*)\nBilling Phone:(?P.*)\n(?:Billing Phone Ext.:(?P.*)\n)?(?:Billing FAX:(?P.*)\n)?(?:Billing FAX Ext.:(?P.*)\n)?Billing Email:(?P.*)", # nic.pw, No-IP.com - "Billing ID:(?P.+)\nBilling Name:(?P.*)\n(?:Billing Organization:(?P.*)\n)?Billing Address1?:(?P.*)\n(?:Billing Address2:(?P.*)\n)?(?:Billing Address3:(?P.*)\n)?Billing City:(?P.*)\nBilling State/Province:(?P.*)\nBilling Country/Economy:(?P.*)\nBilling Postal Code:(?P.*)\nBilling Phone:(?P.*)\n(?:Billing Phone Ext.:(?P.*)\n)?(?:Billing FAX:(?P.*)\n)?(?:Billing FAX Ext.:(?P.*)\n)?Billing E-mail:(?P.*)", # DotAsia - "Billing Contact ID:\s*(?P.+)\nBilling Contact Name:\s*(?P.+)\nBilling Contact Organization:\s*(?P.*)\nBilling Contact Address1:\s*(?P.+)\nBilling Contact Address2:\s*(?P.*)\nBilling Contact City:\s*(?P.+)\nBilling Contact State/Province:\s*(?P.+)\nBilling Contact Postal Code:\s*(?P.+)\nBilling Contact Country:\s*(?P.+)\nBilling Contact Country Code:\s*(?P.+)\nBilling Contact Phone Number:\s*(?P.+)\nBilling Contact Email:\s*(?P.+)\n", # .CO Internet - "Billing Contact: (?P.+)\nBilling Organization: (?P.+)\nBilling Name: (?P.+)\nBilling Street: (?P.+)\nBilling City: (?P.+)\nBilling Postal Code: (?P.+)\nBilling State: (?P.+)\nBilling Country: (?P.+)\nBilling Phone: (?P.*)\nBilling Phone Ext: (?P.*)\nBilling Fax: (?P.*)\nBilling Fax Ext: (?P.*)\nBilling Email: (?P.*)\n", # Key-Systems GmbH - "(?:Billing ID:[ ]*(?P.*)\n)?Billing[ ]*Name:[ ]*(?P.*)\n(?:Billing[ ]*Organization:[ ]*(?P.*)\n)?Billing[ ]*Street:[ ]*(?P.+)\n(?:Billing[ ]*Street:[ ]*(?P.+)\n)?Billing[ ]*City:[ ]*(?P.+)\nBilling[ ]*State\/Province:[ ]*(?P.+)\nBilling[ ]*Postal[ ]*Code:[ ]*(?P.+)\nBilling[ ]*Country:[ ]*(?P.+)\n(?:Billing[ ]*Phone:[ ]*(?P.*)\n)?(?:Billing[ ]*Phone[ ]*Ext:[ ]*(?P.*)\n)?(?:Billing[ ]*Fax:[ ]*(?P.*)\n)?(?:Billing[ ]*Fax[ ]*Ext:\s*?(?P.*)\n)?(?:Billing[ ]*Email:[ ]*(?P.+)\n)?", # Musedoma (.museum) - "Billing Contact:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", # OVH - " Billing Contact Details:[ ]*\n (?P.*)\n (?P.*)[ ]{2,}\((?P.*)\)\n (?P.*)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.*)\n (?P.*),(?P.*)\n (?P.*)\n Tel. (?P.*)", # Whois.com - "billing-id:[ ]*(?P.*)\n(?:billing-organization:[ ]*(?P.*)\n)?billing-name:[ ]*(?P.*)\nbilling-street:[ ]*(?P.*)\nbilling-city:[ ]*(?P.*)\nbilling-zip:[ ]*(?P.*)\nbilling-country:[ ]*(?P.*)\n(?:billing-phone:[ ]*(?P.*)\n)?(?:billing-fax:[ ]*(?P.*)\n)?billing-email:[ ]*(?P.*)", # InterNetworX - "Billing Contact:\n bill_org: (?P.*)\n bill_name: (?P.*)\n bill_email: (?P.*)\n bill_address: (?P
.*)\n bill_city: (?P.*)\n bill_state: (?P.*)\n bill_zip: (?P.*)\n bill_country: (?P.*)\n bill_phone: (?P.*)", # Bellnames - "Billing Contact ID:(?P.*)\nBilling Contact Name:(?P.*)\n(?:Billing Contact Organization:(?P.*)\n)?Billing Contact Address1:(?P.*)\n(?:Billing Contact Address2:(?P.*)\n)?(?:Billing Contact Address3:(?P.*)\n)?Billing Contact City:(?P.*)\n(?:Billing Contact State/Province:(?P.*)\n)?(?:Billing Contact Postal Code:(?P.*)\n)?Billing Contact Country:(?P.*)\nBilling Contact Country Code:.*\nBilling Contact Phone Number:(?P.*)\n(?:Billing Contact Facsimile Number:(?P.*)\n)?Billing Contact Email:(?P.*)", # .US, .biz (NeuStar), .buzz, .moe (Interlink Co. Ltd.) - "Billing contact:\n(?: (?P.+)\n)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+), (?P.+) (?P.+) (?P.+)\n Phone: (?P.*)\n Fax: (?P.*)\n", # Fabulous.com - "Billing Contact Information :[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", # GAL Communication - "Billing Contact:\n Name: (?P.+)\n City: (?P.+)\n State: (?P.+)\n Country: (?P.+)\n", # Akky (.com.mx) - "BILLING ID:(?P.+)\nBILLING Name:(?P.*)\n(?:BILLING Organization:(?P.*)\n)?BILLING Street1:(?P.+?)\n(?:BILLING Street2:(?P.+?)\n(?:BILLING Street3:(?P.+?)\n)?)?BILLING City:(?P.+)\nBILLING State:(?P.*)\nBILLING Postal Code:(?P.+)\nBILLING Country:(?P[A-Z]+)\nBILLING Phone:(?P.*?)\nBILLING Fax:(?P.*)\nBILLING Email:(?P.+)\n", # Realtime Register - "Billing Contact\n NIC Handle \(if known\)\.+:(?P.*)\n \(I\)ndividual \(R\)ole\.+:(?P.*)\n Name \(Last, First\)\.+:(?P.*)\n Organization Name\.+:(?P.*)\n Street Address\.+:(?P.*)\n City\.+: (?P.*)\n State\.+: (?P.*)\n Postal Code\.+:(?P.*)\n Country\.+:(?P.*)\n Phone Number\.+:(?P.*)\n Fax Number\.+:(?P.*)\n E-Mailbox\.+:(?P.*)", # .ai + "(?:Billing ID:(?P.+)\n)?Billing Name:(?P.*)\nBilling Organization:(?P.*)\nBilling Street1:(?P.*)\n(?:Billing Street2:(?P.*)\n)?(?:Billing Street3:(?P.*)\n)?Billing City:(?P.*)\nBilling State/Province:(?P.*)\nBilling Postal Code:(?P.*)\nBilling Country:(?P.*)\nBilling Phone:(?P.*)\n(?:Billing Phone Ext.:(?P.*)\n)?(?:Billing FAX:(?P.*)\n)?(?:Billing FAX Ext.:(?P.*)\n)?Billing Email:(?P.*)", + # nic.pw, No-IP.com + "Billing ID:(?P.+)\nBilling Name:(?P.*)\n(?:Billing Organization:(?P.*)\n)?Billing Address1?:(?P.*)\n(?:Billing Address2:(?P.*)\n)?(?:Billing Address3:(?P.*)\n)?Billing City:(?P.*)\nBilling State/Province:(?P.*)\nBilling Country/Economy:(?P.*)\nBilling Postal Code:(?P.*)\nBilling Phone:(?P.*)\n(?:Billing Phone Ext.:(?P.*)\n)?(?:Billing FAX:(?P.*)\n)?(?:Billing FAX Ext.:(?P.*)\n)?Billing E-mail:(?P.*)", + # DotAsia + "Billing Contact ID:\s*(?P.+)\nBilling Contact Name:\s*(?P.+)\nBilling Contact Organization:\s*(?P.*)\nBilling Contact Address1:\s*(?P.+)\nBilling Contact Address2:\s*(?P.*)\nBilling Contact City:\s*(?P.+)\nBilling Contact State/Province:\s*(?P.+)\nBilling Contact Postal Code:\s*(?P.+)\nBilling Contact Country:\s*(?P.+)\nBilling Contact Country Code:\s*(?P.+)\nBilling Contact Phone Number:\s*(?P.+)\nBilling Contact Email:\s*(?P.+)\n", + # .CO Internet + "Billing Contact: (?P.+)\nBilling Organization: (?P.+)\nBilling Name: (?P.+)\nBilling Street: (?P.+)\nBilling City: (?P.+)\nBilling Postal Code: (?P.+)\nBilling State: (?P.+)\nBilling Country: (?P.+)\nBilling Phone: (?P.*)\nBilling Phone Ext: (?P.*)\nBilling Fax: (?P.*)\nBilling Fax Ext: (?P.*)\nBilling Email: (?P.*)\n", + # Key-Systems GmbH + "(?:Billing ID:[ ]*(?P.*)\n)?Billing[ ]*Name:[ ]*(?P.*)\n(?:Billing[ ]*Organization:[ ]*(?P.*)\n)?Billing[ ]*Street:[ ]*(?P.+)\n(?:Billing[ ]*Street:[ ]*(?P.+)\n)?Billing[ ]*City:[ ]*(?P.+)\nBilling[ ]*State\/Province:[ ]*(?P.+)\nBilling[ ]*Postal[ ]*Code:[ ]*(?P.+)\nBilling[ ]*Country:[ ]*(?P.+)\n(?:Billing[ ]*Phone:[ ]*(?P.*)\n)?(?:Billing[ ]*Phone[ ]*Ext:[ ]*(?P.*)\n)?(?:Billing[ ]*Fax:[ ]*(?P.*)\n)?(?:Billing[ ]*Fax[ ]*Ext:\s*?(?P.*)\n)?(?:Billing[ ]*Email:[ ]*(?P.+)\n)?", + # Musedoma (.museum) + "Billing Contact:\n (?P.+)\n (?P.+)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.+), (?P.+)\n (?P.+)\n (?P.+)\n (?P.+)\n\n", + # OVH + " Billing Contact Details:[ ]*\n (?P.*)\n (?P.*)[ ]{2,}\((?P.*)\)\n (?P.*)\n(?: (?P.*)\n)?(?: (?P.*)\n)? (?P.*)\n (?P.*),(?P.*)\n (?P.*)\n Tel. (?P.*)", + # Whois.com + "billing-id:[ ]*(?P.*)\n(?:billing-organization:[ ]*(?P.*)\n)?billing-name:[ ]*(?P.*)\nbilling-street:[ ]*(?P.*)\nbilling-city:[ ]*(?P.*)\nbilling-zip:[ ]*(?P.*)\nbilling-country:[ ]*(?P.*)\n(?:billing-phone:[ ]*(?P.*)\n)?(?:billing-fax:[ ]*(?P.*)\n)?billing-email:[ ]*(?P.*)", + # InterNetworX + "Billing Contact:\n bill_org: (?P.*)\n bill_name: (?P.*)\n bill_email: (?P.*)\n bill_address: (?P
.*)\n bill_city: (?P.*)\n bill_state: (?P.*)\n bill_zip: (?P.*)\n bill_country: (?P.*)\n bill_phone: (?P.*)", + # Bellnames + "Billing Contact ID:(?P.*)\nBilling Contact Name:(?P.*)\n(?:Billing Contact Organization:(?P.*)\n)?Billing Contact Address1:(?P.*)\n(?:Billing Contact Address2:(?P.*)\n)?(?:Billing Contact Address3:(?P.*)\n)?Billing Contact City:(?P.*)\n(?:Billing Contact State/Province:(?P.*)\n)?(?:Billing Contact Postal Code:(?P.*)\n)?Billing Contact Country:(?P.*)\nBilling Contact Country Code:.*\nBilling Contact Phone Number:(?P.*)\n(?:Billing Contact Facsimile Number:(?P.*)\n)?Billing Contact Email:(?P.*)", + # .US, .biz (NeuStar), .buzz, .moe (Interlink Co. Ltd.) + "Billing contact:\n(?: (?P.+)\n)? (?P.+)\n (?P.+)\n (?P.+)\n (?P.+), (?P.+) (?P.+) (?P.+)\n Phone: (?P.*)\n Fax: (?P.*)\n", + # Fabulous.com + "Billing Contact Information :[ ]*\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n[ ]+(?P.*)\n\n", + # GAL Communication + "Billing Contact:\n Name: (?P.+)\n City: (?P.+)\n State: (?P.+)\n Country: (?P.+)\n", + # Akky (.com.mx) + "BILLING ID:(?P.+)\nBILLING Name:(?P.*)\n(?:BILLING Organization:(?P.*)\n)?BILLING Street1:(?P.+?)\n(?:BILLING Street2:(?P.+?)\n(?:BILLING Street3:(?P.+?)\n)?)?BILLING City:(?P.+)\nBILLING State:(?P.*)\nBILLING Postal Code:(?P.+)\nBILLING Country:(?P[A-Z]+)\nBILLING Phone:(?P.*?)\nBILLING Fax:(?P.*)\nBILLING Email:(?P.+)\n", + # Realtime Register + "Billing Contact\n NIC Handle \(if known\)\.+:(?P.*)\n \(I\)ndividual \(R\)ole\.+:(?P.*)\n Name \(Last, First\)\.+:(?P.*)\n Organization Name\.+:(?P.*)\n Street Address\.+:(?P.*)\n City\.+: (?P.*)\n State\.+: (?P.*)\n Postal Code\.+:(?P.*)\n Country\.+:(?P.*)\n Phone Number\.+:(?P.*)\n Fax Number\.+:(?P.*)\n E-Mailbox\.+:(?P.*)", + # .ai ] # Some registries use NIC handle references instead of directly listing contacts... nic_contact_references = { - "registrant": [ - "registrant:\s*(?P.+)", # nic.at - "owner-contact:\s*(?P.+)", # LCN.com - "holder-c:\s*(?P.+)", # AFNIC - "holder:\s*(?P.+)", # iis.se (they apparently want to be difficult, and won't give you contact info for the handle over their WHOIS service) - ], - "tech": [ - "tech-c:\s*(?P.+)", # nic.at, AFNIC, iis.se - "technical-contact:\s*(?P.+)", # LCN.com - "n\. \[Technical Contact\] (?P.+)\n", #.co.jp - ], - "admin": [ - "admin-c:\s*(?P.+)", # nic.at, AFNIC, iis.se - "admin-contact:\s*(?P.+)", # LCN.com - "m\. \[Administrative Contact\] (?P.+)\n", # .co.jp - ], - "billing": [ - "billing-c:\s*(?P.+)", # iis.se - "billing-contact:\s*(?P.+)", # LCN.com - ] + "registrant": [ + "registrant:\s*(?P.+)", # nic.at + "owner-contact:\s*(?P.+)", # LCN.com + "holder-c:\s*(?P.+)", # AFNIC + "holder:\s*(?P.+)", + # iis.se (they apparently want to be difficult, and won't give you contact info for the handle over their WHOIS service) + ], + "tech": [ + "tech-c:\s*(?P.+)", # nic.at, AFNIC, iis.se + "technical-contact:\s*(?P.+)", # LCN.com + "n\. \[Technical Contact\] (?P.+)\n", # .co.jp + ], + "admin": [ + "admin-c:\s*(?P.+)", # nic.at, AFNIC, iis.se + "admin-contact:\s*(?P.+)", # LCN.com + "m\. \[Administrative Contact\] (?P.+)\n", # .co.jp + ], + "billing": [ + "billing-c:\s*(?P.+)", # iis.se + "billing-contact:\s*(?P.+)", # LCN.com + ] +} + +registrant_fallback_regexes = { + "handle": "Registrant[ \t\S]*ID:(?P.+)", + "name": "Registrant[ \t\S]*Name:(?P.*)", + "organization": "Registrant[ \t\S]*Organization:(?P.*)", + "street": "Registrant[ \t\S]*Street:(?P.*)", + "city": "Registrant[ \t\S]*City:(?P.*)", + "state": "Registrant[ \t\S]*State:(?P.*)", + "postalcode": "Registrant[ \t\S]*Postal Code:(?P.*)", + "country": "Registrant[ \t\S]*Country:(?P.*)", + "phone": "Registrant[ \t\S]*Phone:(?P.*)", + "fax": "Registrant[ \t\S]*Fax:(?P.*)", + "email": "Registrant[ \t\S]*Email:(?P.*)" +} + +admin_fallback_regexes = { + "handle": "Admin[ \t\S]*ID:(?P.+)", + "name": "Admin[ \t\S]*Name:(?P.*)", + "organization": "Admin[ \t\S]*Organization:(?P.*)", + "street": "Admin[ \t\S]*Street:(?P.*)", + "city": "Admin[ \t\S]*City:(?P.*)", + "state": "Admin[ \t\S]*State:(?P.*)", + "postalcode": "Admin[ \t\S]*Postal Code:(?P.*)", + "country": "Admin[ \t\S]*Country:(?P.*)", + "phone": "Admin[ \t\S]*Phone:(?P.*)", + "fax": "Admin[ \t\S]*Fax:(?P.*)", + "email": "Admin[ \t\S]*Email:(?P.*)" +} + +billing_fallback_regexes = { + "handle": "Billing[ \t\S]*ID:(?P.+)", + "name": "Billing[ \t\S]*Name:(?P.*)", + "organization": "Billing[ \t\S]*Organization:(?P.*)", + "street": "Billing[ \t\S]*Street:(?P.*)", + "city": "Billing[ \t\S]*City:(?P.*)", + "state": "Billing[ \t\S]*State:(?P.*)", + "postalcode": "Billing[ \t\S]*Postal Code:(?P.*)", + "country": "Billing[ \t\S]*Country:(?P.*)", + "phone": "Billing[ \t\S]*Phone:(?P.*)", + "fax": "Billing[ \t\S]*Fax:(?P.*)", + "email": "Billing[ \t\S]*Email:(?P.*)" +} + +tech_fallback_regexes = { + "handle": "Tech[ \t\S]*ID:(?P.+)", + "name": "Tech[ \t\S]*Name:(?P.*)", + "organization": "Tech[ \t\S]*Organization:(?P.*)", + "street": "Tech[ \t\S]*Street:(?P.*)", + "city": "Tech[ \t\S]*City:(?P.*)", + "state": "Tech[ \t\S]*State:(?P.*)", + "postalcode": "Tech[ \t\S]*Postal Code:(?P.*)", + "country": "Tech[ \t\S]*Country:(?P.*)", + "phone": "Tech[ \t\S]*Phone:(?P.*)", + "fax": "Tech[ \t\S]*Fax:(?P.*)", + "email": "Tech[ \t\S]*Email:(?P.*)" } # Why do the below? The below is meant to handle with an edge case (issue #2) where a partial match followed @@ -400,79 +584,89 @@ def allow_trailing_comma_dict(regexes): billing_contact_regexes = [preprocess_regex(regex) for regex in billing_contact_regexes] nic_contact_regexes = [ - "personname:\s*(?P.+)\norganization:\s*(?P.+)\nstreet address:\s*(?P.+)\npostal code:\s*(?P.+)\ncity:\s*(?P.+)\ncountry:\s*(?P.+)\n(?:phone:\s*(?P.+)\n)?(?:fax-no:\s*(?P.+)\n)?(?:e-mail:\s*(?P.+)\n)?nic-hdl:\s*(?P.+)\nchanged:\s*(?P.+)", # nic.at - "contact-handle:[ ]*(?P.+)\ncontact:[ ]*(?P.+)\n(?:organisation:[ ]*(?P.+)\n)?address:[ ]*(?P.+)\n(?:address:[ ]*(?P.+)\n)?(?:address:[ ]*(?P.+)\n)?(?:address:[ ]*(?P.+)\n)?address:[ ]*(?P.+)\naddress:[ ]*(?P.+)\naddress:[ ]*(?P.+)\naddress:[ ]*(?P.+)\n(?:phone:[ ]*(?P.+)\n)?(?:fax:[ ]*(?P.+)\n)?(?:email:[ ]*(?P.+)\n)?", # LCN.com - "Contact Information:\na\. \[JPNIC Handle\] (?P.+)\nc\. \[Last, First\] (?P.+), (?P.+)\nd\. \[E-Mail\] (?P.+)\ng\. \[Organization\] (?P.+)\nl\. \[Division\] (?P.+)\nn\. \[Title\] (?P.+)\no\. \[TEL\] (?P<phone>.+)\np\. \[FAX\] (?P<fax>.+)\ny\. \[Reply Mail\] .*\n\[Last Update\] (?P<changedate>.+) \(JST\)\n", # JPRS .co.jp contact handle lookup - "person:\s*(?P<name>.+)\nnic-hdl:\s*(?P<handle>.+)\n", # .ie - "nic-hdl:\s+(?P<handle>.+)\nperson:\s+(?P<name>.+)\n(?:e-mail:\s+(?P<email>.+)\n)?(?:address:\s+(?P<street1>.+?)(?:,+ (?P<street2>.+?)(?:,+ (?P<street3>.+?)(?:,+ (?P<street4>.+?)(?:,+ (?P<street5>.+?)(?:,+ (?P<street6>.+?)(?:,+ (?P<street7>.+?))?)?)?)?)?)?, (?P<city>.+), (?P<state>.+), (?P<country>.+)\n)?(?:phone:\s+(?P<phone>.+)\n)?(?:fax-no:\s+(?P<fax>.+)\n)?", # nic.ir, individual - this is a nasty one. - "nic-hdl:\s+(?P<handle>.+)\norg:\s+(?P<organization>.+)\n(?:e-mail:\s+(?P<email>.+)\n)?(?:address:\s+(?P<street1>.+?)(?:,+ (?P<street2>.+?)(?:,+ (?P<street3>.+?)(?:,+ (?P<street4>.+?)(?:,+ (?P<street5>.+?)(?:,+ (?P<street6>.+?)(?:,+ (?P<street7>.+?))?)?)?)?)?)?, (?P<city>.+), (?P<state>.+), (?P<country>.+)\n)?(?:phone:\s+(?P<phone>.+)\n)?(?:fax-no:\s+(?P<fax>.+)\n)?", # nic.ir, organization - "nic-hdl:\s*(?P<handle>.+)\ntype:\s*(?P<type>.+)\ncontact:\s*(?P<name>.+)\n(?:.+\n)*?(?:address:\s*(?P<street1>.+)\naddress:\s*(?P<street2>.+)\naddress:\s*(?P<street3>.+)\naddress:\s*(?P<country>.+)\n)?(?:phone:\s*(?P<phone>.+)\n)?(?:fax-no:\s*(?P<fax>.+)\n)?(?:.+\n)*?(?:e-mail:\s*(?P<email>.+)\n)?(?:.+\n)*?changed:\s*(?P<changedate>[0-9]{2}\/[0-9]{2}\/[0-9]{4}).*\n", # AFNIC madness without country field - "nic-hdl:\s*(?P<handle>.+)\ntype:\s*(?P<type>.+)\ncontact:\s*(?P<name>.+)\n(?:.+\n)*?(?:address:\s*(?P<street1>.+)\n)?(?:address:\s*(?P<street2>.+)\n)?(?:address:\s*(?P<street3>.+)\n)?(?:phone:\s*(?P<phone>.+)\n)?(?:fax-no:\s*(?P<fax>.+)\n)?(?:.+\n)*?(?:e-mail:\s*(?P<email>.+)\n)?(?:.+\n)*?changed:\s*(?P<changedate>[0-9]{2}\/[0-9]{2}\/[0-9]{4}).*\n", # AFNIC madness any country -at all- - "nic-hdl:\s*(?P<handle>.+)\ntype:\s*(?P<type>.+)\ncontact:\s*(?P<name>.+)\n(?:.+\n)*?(?:address:\s*(?P<street1>.+)\n)?(?:address:\s*(?P<street2>.+)\n)?(?:address:\s*(?P<street3>.+)\n)?(?:address:\s*(?P<street4>.+)\n)?country:\s*(?P<country>.+)\n(?:phone:\s*(?P<phone>.+)\n)?(?:fax-no:\s*(?P<fax>.+)\n)?(?:.+\n)*?(?:e-mail:\s*(?P<email>.+)\n)?(?:.+\n)*?changed:\s*(?P<changedate>[0-9]{2}\/[0-9]{2}\/[0-9]{4}).*\n", # AFNIC madness with country field + "personname:\s*(?P<name>.+)\norganization:\s*(?P<organization>.+)\nstreet address:\s*(?P<street>.+)\npostal code:\s*(?P<postalcode>.+)\ncity:\s*(?P<city>.+)\ncountry:\s*(?P<country>.+)\n(?:phone:\s*(?P<phone>.+)\n)?(?:fax-no:\s*(?P<fax>.+)\n)?(?:e-mail:\s*(?P<email>.+)\n)?nic-hdl:\s*(?P<handle>.+)\nchanged:\s*(?P<changedate>.+)", + # nic.at + "contact-handle:[ ]*(?P<handle>.+)\ncontact:[ ]*(?P<name>.+)\n(?:organisation:[ ]*(?P<organization>.+)\n)?address:[ ]*(?P<street1>.+)\n(?:address:[ ]*(?P<street2>.+)\n)?(?:address:[ ]*(?P<street3>.+)\n)?(?:address:[ ]*(?P<street4>.+)\n)?address:[ ]*(?P<city>.+)\naddress:[ ]*(?P<state>.+)\naddress:[ ]*(?P<postalcode>.+)\naddress:[ ]*(?P<country>.+)\n(?:phone:[ ]*(?P<phone>.+)\n)?(?:fax:[ ]*(?P<fax>.+)\n)?(?:email:[ ]*(?P<email>.+)\n)?", + # LCN.com + "Contact Information:\na\. \[JPNIC Handle\] (?P<handle>.+)\nc\. \[Last, First\] (?P<lastname>.+), (?P<firstname>.+)\nd\. \[E-Mail\] (?P<email>.+)\ng\. \[Organization\] (?P<organization>.+)\nl\. \[Division\] (?P<division>.+)\nn\. \[Title\] (?P<title>.+)\no\. \[TEL\] (?P<phone>.+)\np\. \[FAX\] (?P<fax>.+)\ny\. \[Reply Mail\] .*\n\[Last Update\] (?P<changedate>.+) \(JST\)\n", + # JPRS .co.jp contact handle lookup + "person:\s*(?P<name>.+)\nnic-hdl:\s*(?P<handle>.+)\n", # .ie + "nic-hdl:\s+(?P<handle>.+)\nperson:\s+(?P<name>.+)\n(?:e-mail:\s+(?P<email>.+)\n)?(?:address:\s+(?P<street1>.+?)(?:,+ (?P<street2>.+?)(?:,+ (?P<street3>.+?)(?:,+ (?P<street4>.+?)(?:,+ (?P<street5>.+?)(?:,+ (?P<street6>.+?)(?:,+ (?P<street7>.+?))?)?)?)?)?)?, (?P<city>.+), (?P<state>.+), (?P<country>.+)\n)?(?:phone:\s+(?P<phone>.+)\n)?(?:fax-no:\s+(?P<fax>.+)\n)?", + # nic.ir, individual - this is a nasty one. + "nic-hdl:\s+(?P<handle>.+)\norg:\s+(?P<organization>.+)\n(?:e-mail:\s+(?P<email>.+)\n)?(?:address:\s+(?P<street1>.+?)(?:,+ (?P<street2>.+?)(?:,+ (?P<street3>.+?)(?:,+ (?P<street4>.+?)(?:,+ (?P<street5>.+?)(?:,+ (?P<street6>.+?)(?:,+ (?P<street7>.+?))?)?)?)?)?)?, (?P<city>.+), (?P<state>.+), (?P<country>.+)\n)?(?:phone:\s+(?P<phone>.+)\n)?(?:fax-no:\s+(?P<fax>.+)\n)?", + # nic.ir, organization + "nic-hdl:\s*(?P<handle>.+)\ntype:\s*(?P<type>.+)\ncontact:\s*(?P<name>.+)\n(?:.+\n)*?(?:address:\s*(?P<street1>.+)\naddress:\s*(?P<street2>.+)\naddress:\s*(?P<street3>.+)\naddress:\s*(?P<country>.+)\n)?(?:phone:\s*(?P<phone>.+)\n)?(?:fax-no:\s*(?P<fax>.+)\n)?(?:.+\n)*?(?:e-mail:\s*(?P<email>.+)\n)?(?:.+\n)*?changed:\s*(?P<changedate>[0-9]{2}\/[0-9]{2}\/[0-9]{4}).*\n", + # AFNIC madness without country field + "nic-hdl:\s*(?P<handle>.+)\ntype:\s*(?P<type>.+)\ncontact:\s*(?P<name>.+)\n(?:.+\n)*?(?:address:\s*(?P<street1>.+)\n)?(?:address:\s*(?P<street2>.+)\n)?(?:address:\s*(?P<street3>.+)\n)?(?:phone:\s*(?P<phone>.+)\n)?(?:fax-no:\s*(?P<fax>.+)\n)?(?:.+\n)*?(?:e-mail:\s*(?P<email>.+)\n)?(?:.+\n)*?changed:\s*(?P<changedate>[0-9]{2}\/[0-9]{2}\/[0-9]{4}).*\n", + # AFNIC madness any country -at all- + "nic-hdl:\s*(?P<handle>.+)\ntype:\s*(?P<type>.+)\ncontact:\s*(?P<name>.+)\n(?:.+\n)*?(?:address:\s*(?P<street1>.+)\n)?(?:address:\s*(?P<street2>.+)\n)?(?:address:\s*(?P<street3>.+)\n)?(?:address:\s*(?P<street4>.+)\n)?country:\s*(?P<country>.+)\n(?:phone:\s*(?P<phone>.+)\n)?(?:fax-no:\s*(?P<fax>.+)\n)?(?:.+\n)*?(?:e-mail:\s*(?P<email>.+)\n)?(?:.+\n)*?changed:\s*(?P<changedate>[0-9]{2}\/[0-9]{2}\/[0-9]{4}).*\n", + # AFNIC madness with country field ] abbreviated_organization_regexes = ( - r"(?:^|\s|,)ltd\.?($|\s)", - r"(?:^|\s|,)co\.?($|\s)", - r"(?:^|\s|,)corp\.?($|\s)", - r"(?:^|\s|,)inc\.?($|\s)", - r"(?:^|\s|,)s\.?p\.?a\.?($|\s)", - r"(?:^|\s|,)s\.?(c\.?)?r\.?l\.?($|\s)", - r"(?:^|\s|,)s\.?a\.?s\.?($|\s)", - r"(?:^|\s|,)a\.?g\.?($|\s)", - r"(?:^|\s|,)n\.?v\.?($|\s)", - r"(?:^|\s|,)b\.?v\.?($|\s)", - r"(?:^|\s|,)p\.?t\.?y\.?($|\s)", - r"(?:^|\s|,)p\.?l\.?c\.?($|\s)", - r"(?:^|\s|,)v\.?o\.?f\.?($|\s)", - r"(?:^|\s|,)b\.?v\.?b\.?a\.?($|\s)", - r"(?:^|\s|,)g\.?m\.?b\.?h\.?($|\s)", - r"(?:^|\s|,)s\.?a\.?r\.?l\.?($|\s)", - r"(?:^|\s|,)g\.?b\.?r\.?($|\s)", - r"(?:^|\s|,)s\.?r\.?o\.?($|\s)", + r"(?:^|\s|,)ltd\.?($|\s)", + r"(?:^|\s|,)co\.?($|\s)", + r"(?:^|\s|,)corp\.?($|\s)", + r"(?:^|\s|,)inc\.?($|\s)", + r"(?:^|\s|,)s\.?p\.?a\.?($|\s)", + r"(?:^|\s|,)s\.?(c\.?)?r\.?l\.?($|\s)", + r"(?:^|\s|,)s\.?a\.?s\.?($|\s)", + r"(?:^|\s|,)a\.?g\.?($|\s)", + r"(?:^|\s|,)n\.?v\.?($|\s)", + r"(?:^|\s|,)b\.?v\.?($|\s)", + r"(?:^|\s|,)p\.?t\.?y\.?($|\s)", + r"(?:^|\s|,)p\.?l\.?c\.?($|\s)", + r"(?:^|\s|,)v\.?o\.?f\.?($|\s)", + r"(?:^|\s|,)b\.?v\.?b\.?a\.?($|\s)", + r"(?:^|\s|,)g\.?m\.?b\.?h\.?($|\s)", + r"(?:^|\s|,)s\.?a\.?r\.?l\.?($|\s)", + r"(?:^|\s|,)g\.?b\.?r\.?($|\s)", + r"(?:^|\s|,)s\.?r\.?o\.?($|\s)", ) organization_regexes = ( - r"(?:^|\s|,)limited\.?($|\s|,)", - r"(?:^|\s|,)holdings\.?($|\s|,)", - r"(?:^|\s|,)(?:in)?corporat(?:ed?|ion)\.?($|\s|,)", - r"(?:^|\s|,)company\.?($|\s|,)", - r"(?:^|\s|,)operations\.?($|\s|,)", - r"(?:^|\s|,)association\.?($|\s|,)", - r"(?:^|\s|,)council\.?($|\s|,)", - r"(?:^|\s|,)university\.?($|\s|,)", - r"(?:^|\s|,)college\.?($|\s|,)", - r"(?:^|\s|,)services?\.?($|\s|,)", - r"(?:^|\s|,)cabinet\.?($|\s|,)", - r"(?:^|\s|,)billing\.?($|\s|,)", - r"(?:^|\s|,)administration\.?($|\s|,)", + r"(?:^|\s|,)limited\.?($|\s|,)", + r"(?:^|\s|,)holdings\.?($|\s|,)", + r"(?:^|\s|,)(?:in)?corporat(?:ed?|ion)\.?($|\s|,)", + r"(?:^|\s|,)company\.?($|\s|,)", + r"(?:^|\s|,)operations\.?($|\s|,)", + r"(?:^|\s|,)association\.?($|\s|,)", + r"(?:^|\s|,)council\.?($|\s|,)", + r"(?:^|\s|,)university\.?($|\s|,)", + r"(?:^|\s|,)college\.?($|\s|,)", + r"(?:^|\s|,)services?\.?($|\s|,)", + r"(?:^|\s|,)cabinet\.?($|\s|,)", + r"(?:^|\s|,)billing\.?($|\s|,)", + r"(?:^|\s|,)administration\.?($|\s|,)", ) known_abbreviations = allow_trailing_comma_dict({ - "GPO Box": r"gpo box", - "OVH": r"^ovh\.?$", - "GmbH": r"^gmbh$", - "Inc.": r"^inc\.?$", - "of": r"^of$", - "Ltd.": r"^ltd\.?$", - "Pty": r"^pty\.?$", - "Co.": r"^co\.$", - "SARL": r"^sarl$", - "d/b/a": r"^(?:d\/b\/a|dba)$", + "GPO Box": r"gpo box", + "OVH": r"^ovh\.?$", + "GmbH": r"^gmbh$", + "Inc.": r"^inc\.?$", + "of": r"^of$", + "Ltd.": r"^ltd\.?$", + "Pty": r"^pty\.?$", + "Co.": r"^co\.$", + "SARL": r"^sarl$", + "d/b/a": r"^(?:d\/b\/a|dba)$", }) role_regexes = ( - r"(?:^|\s|,)administrator\.?($|\s|,)", + r"(?:^|\s|,)administrator\.?($|\s|,)", ) country_regexes = [r"(?:\s|,)" + dotify(country_code.upper()) + r"($|\s)" for country_code in countries.keys()] -for key in ('id', 'status', 'creation_date', 'expiration_date', 'updated_date', 'registrar', 'whois_server', 'nameservers', 'emails'): - grammar["_data"][key] = precompile_regexes(grammar["_data"][key], re.IGNORECASE) +for key in ( + 'id', 'status', 'creation_date', 'expiration_date', 'updated_date', 'registrar', 'whois_server', 'nameservers', + 'emails'): + grammar["_data"][key] = precompile_regexes(grammar["_data"][key], re.IGNORECASE) for key in ('registrant', 'tech', 'admin', 'billing'): - nic_contact_references[key] = precompile_regexes(nic_contact_references[key]) - + nic_contact_references[key] = precompile_regexes(nic_contact_references[key]) + grammar["_dateformats"] = precompile_regexes(grammar["_dateformats"], re.IGNORECASE) registrant_regexes = precompile_regexes(registrant_regexes) @@ -494,861 +688,958 @@ def allow_trailing_comma_dict(regexes): comma_without_space = re.compile(r',([a-z])', re.IGNORECASE) if sys.version_info < (3, 0): - def is_string(data): - """Test for string with support for python 2.""" - return isinstance(data, basestring) + def is_string(data): + """Test for string with support for python 2.""" + return isinstance(data, basestring) else: - def is_string(data): - """Test for string with support for python 3.""" - return isinstance(data, str) + def is_string(data): + """Test for string with support for python 3.""" + return isinstance(data, str) + def filter_characters(string, delete_characters): - return ''.join([char for char in string if char not in delete_characters]) + return ''.join([char for char in string if char not in delete_characters]) + def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_server=""): - normalized = normalized or [] - data = {} - - raw_data = [segment.replace("\r", "") for segment in raw_data] # Carriage returns are the devil - - for segment in raw_data: - for rule_key, rule_regexes in grammar['_data'].items(): - if (rule_key in data) == False: - for line in segment.splitlines(): - for regex in rule_regexes: - result = re.search(regex, line) - - if result is not None: - val = result.group("val").strip() - if val != "": - try: - data[rule_key].append(val) - except KeyError as e: - data[rule_key] = [val] - - # Whois.com is a bit special... Fabulous.com also seems to use this format. As do some others. - match = re.search("^\s?Name\s?[Ss]ervers:?\s*\n((?:\s*.+\n)+?\s?)(?:\n|$)", segment, re.MULTILINE) - if match is not None: - chunk = match.group(1) - for match in re.findall("[ ]*(.+)\n", chunk): - if match.strip() != "": - if not re.match("^[a-zA-Z .]+:", match): - try: - data["nameservers"].append(match.strip()) - except KeyError as e: - data["nameservers"] = [match.strip()] - # Nominet also needs some special attention - match = re.search(" Registrar:\n (.+)\n", segment) - if match is not None: - data["registrar"] = [match.group(1).strip()] - match = re.search(" Registration status:\n (.+)\n", segment) - if match is not None: - data["status"] = [match.group(1).strip()] - match = re.search(" Name servers:\n([\s\S]*?\n)\n", segment) - if match is not None: - chunk = match.group(1) - for match in re.findall(" (.+)\n", chunk): - match = match.split()[0] - try: - data["nameservers"].append(match.strip()) - except KeyError as e: - data["nameservers"] = [match.strip()] - # janet (.ac.uk) is kinda like Nominet, but also kinda not - match = re.search("Registered By:\n\t(.+)\n", segment) - if match is not None: - data["registrar"] = [match.group(1).strip()] - match = re.search("Entry created:\n\t(.+)\n", segment) - if match is not None: - data["creation_date"] = [match.group(1).strip()] - match = re.search("Renewal date:\n\t(.+)\n", segment) - if match is not None: - data["expiration_date"] = [match.group(1).strip()] - match = re.search("Entry updated:\n\t(.+)\n", segment) - if match is not None: - data["updated_date"] = [match.group(1).strip()] - match = re.search("Servers:([\s\S]*?\n)\n", segment) - if match is not None: - chunk = match.group(1) - for match in re.findall("\t(.+)\n", chunk): - match = match.split()[0] - try: - data["nameservers"].append(match.strip()) - except KeyError as e: - data["nameservers"] = [match.strip()] - # .am plays the same game - match = re.search(" DNS servers:([\s\S]*?\n)\n", segment) - if match is not None: - chunk = match.group(1) - for match in re.findall(" (.+)\n", chunk): - match = match.split()[0] - try: - data["nameservers"].append(match.strip()) - except KeyError as e: - data["nameservers"] = [match.strip()] - # SIDN isn't very standard either. And EURid uses a similar format. - match = re.search("Registrar:\n\s+(?:Name:\s*)?(\S.*)", segment) - if match is not None: - data["registrar"].insert(0, match.group(1).strip()) - match = re.search("(?:Domain nameservers|Name servers):([\s\S]*?\n)\n", segment) - if match is not None: - chunk = match.group(1) - for match in re.findall("\s+?(.+)\n", chunk): - match = match.split()[0] - # Prevent nameserver aliases from being picked up. - if not match.startswith("[") and not match.endswith("]"): - try: - data["nameservers"].append(match.strip()) - except KeyError as e: - data["nameservers"] = [match.strip()] - # The .ie WHOIS server puts ambiguous status information in an unhelpful order - match = re.search('ren-status:\s*(.+)', segment) - if match is not None: - data["status"].insert(0, match.group(1).strip()) - # nic.it gives us the registrar in a multi-line format... - match = re.search('Registrar\n Organization: (.+)\n', segment) - if match is not None: - data["registrar"] = [match.group(1).strip()] - # HKDNR (.hk) provides a weird nameserver format with too much whitespace - match = re.search("Name Servers Information:\n\n([\s\S]*?\n)\n", segment) - if match is not None: - chunk = match.group(1) - for match in re.findall("(.+)\n", chunk): - match = match.split()[0] - try: - data["nameservers"].append(match.strip()) - except KeyError as e: - data["nameservers"] = [match.strip()] - # ... and again for TWNIC. - match = re.search(" Domain servers in listed order:\n([\s\S]*?\n)\n", segment) - if match is not None: - chunk = match.group(1) - for match in re.findall(" (.+)\n", chunk): - match = match.split()[0] - try: - data["nameservers"].append(match.strip()) - except KeyError as e: - data["nameservers"] = [match.strip()] - - - data["contacts"] = parse_registrants(raw_data, never_query_handles, handle_server) - - # Parse dates - try: - data['expiration_date'] = remove_duplicates(data['expiration_date']) - data['expiration_date'] = parse_dates(data['expiration_date']) - except KeyError as e: - pass # Not present - - try: - data['creation_date'] = remove_duplicates(data['creation_date']) - data['creation_date'] = parse_dates(data['creation_date']) - except KeyError as e: - pass # Not present - - try: - data['updated_date'] = remove_duplicates(data['updated_date']) - data['updated_date'] = parse_dates(data['updated_date']) - except KeyError as e: - pass # Not present - - try: - data['nameservers'] = remove_suffixes(data['nameservers']) - data['nameservers'] = remove_duplicates([ns.rstrip(".") for ns in data['nameservers']]) - except KeyError as e: - pass # Not present - - try: - data['emails'] = remove_duplicates(data['emails']) - except KeyError as e: - pass # Not present - - try: - data['registrar'] = remove_duplicates(data['registrar']) - except KeyError as e: - pass # Not present - - known_emails = [] - for contact in ("registrant", "tech", "admin", "billing"): - if data["contacts"][contact] is not None: - try: - known_emails.append(data["contacts"][contact]["email"]) - except KeyError as e: - pass # No e-mail recorded for this contact... - - # Remove e-mail addresses if they are already listed for any of the contacts - try: - data['emails'] = [email for email in data["emails"] if email not in known_emails] - except KeyError as e: - pass # Not present - - for key in list(data.keys()): - if data[key] is None or len(data[key]) == 0: - del data[key] - - data["raw"] = raw_data - - if normalized != []: - data = normalize_data(data, normalized) - - return data + normalized = normalized or [] + data = {} + + raw_data = [segment.replace("\r", "") for segment in raw_data] # Carriage returns are the devil + + for segment in raw_data: + for rule_key, rule_regexes in grammar['_data'].items(): + if (rule_key in data) == False: + for line in segment.splitlines(): + for regex in rule_regexes: + result = re.search(regex, line) + + if result is not None: + val = result.group("val").strip() + if val != "": + try: + data[rule_key].append(val) + except KeyError as e: + data[rule_key] = [val] + + # Whois.com is a bit special... Fabulous.com also seems to use this format. As do some others. + match = re.search("^\s?Name\s?[Ss]ervers:?\s*\n((?:\s*.+\n)+?\s?)(?:\n|$)", segment, re.MULTILINE) + if match is not None: + chunk = match.group(1) + for match in re.findall("[ ]*(.+)\n", chunk): + if match.strip() != "": + if not re.match("^[a-zA-Z .]+:", match): + try: + data["nameservers"].append(match.strip()) + except KeyError as e: + data["nameservers"] = [match.strip()] + # Nominet also needs some special attention + match = re.search(" Registrar:\n (.+)\n", segment) + if match is not None: + data["registrar"] = [match.group(1).strip()] + match = re.search(" Registration status:\n (.+)\n", segment) + if match is not None: + data["status"] = [match.group(1).strip()] + match = re.search(" Name servers:\n([\s\S]*?\n)\n", segment) + if match is not None: + chunk = match.group(1) + for match in re.findall(" (.+)\n", chunk): + match = match.split()[0] + try: + data["nameservers"].append(match.strip()) + except KeyError as e: + data["nameservers"] = [match.strip()] + # janet (.ac.uk) is kinda like Nominet, but also kinda not + match = re.search("Registered By:\n\t(.+)\n", segment) + if match is not None: + data["registrar"] = [match.group(1).strip()] + match = re.search("Entry created:\n\t(.+)\n", segment) + if match is not None: + data["creation_date"] = [match.group(1).strip()] + match = re.search("Renewal date:\n\t(.+)\n", segment) + if match is not None: + data["expiration_date"] = [match.group(1).strip()] + match = re.search("Entry updated:\n\t(.+)\n", segment) + if match is not None: + data["updated_date"] = [match.group(1).strip()] + match = re.search("Servers:([\s\S]*?\n)\n", segment) + if match is not None: + chunk = match.group(1) + for match in re.findall("\t(.+)\n", chunk): + match = match.split()[0] + try: + data["nameservers"].append(match.strip()) + except KeyError as e: + data["nameservers"] = [match.strip()] + # .am plays the same game + match = re.search(" DNS servers:([\s\S]*?\n)\n", segment) + if match is not None: + chunk = match.group(1) + for match in re.findall(" (.+)\n", chunk): + match = match.split()[0] + try: + data["nameservers"].append(match.strip()) + except KeyError as e: + data["nameservers"] = [match.strip()] + # SIDN isn't very standard either. And EURid uses a similar format. + match = re.search("Registrar:\n\s+(?:Name:\s*)?(\S.*)", segment) + if match is not None: + data["registrar"] = [match.group(1).strip()] + match = re.search("(?:Domain nameservers|Name servers):([\s\S]*?\n)\n", segment) + if match is not None: + chunk = match.group(1) + for match in re.findall("\s+?(.+)\n", chunk): + match = match.split()[0] + # Prevent nameserver aliases from being picked up. + if not match.startswith("[") and not match.endswith("]"): + try: + data["nameservers"].append(match.strip()) + except KeyError as e: + data["nameservers"] = [match.strip()] + # The .ie WHOIS server puts ambiguous status information in an unhelpful order + match = re.search('ren-status:\s*(.+)', segment) + if match is not None: + data["status"].insert(0, match.group(1).strip()) + # nic.it gives us the registrar in a multi-line format... + match = re.search('Registrar\n Organization: (.+)\n', segment) + if match is not None: + data["registrar"] = [match.group(1).strip()] + # HKDNR (.hk) provides a weird nameserver format with too much whitespace + match = re.search("Name Servers Information:\n\n([\s\S]*?\n)\n", segment) + if match is not None: + chunk = match.group(1) + for match in re.findall("(.+)\n", chunk): + match = match.split()[0] + try: + data["nameservers"].append(match.strip()) + except KeyError as e: + data["nameservers"] = [match.strip()] + # ... and again for TWNIC. + match = re.search(" Domain servers in listed order:\n([\s\S]*?\n)\n", segment) + if match is not None: + chunk = match.group(1) + for match in re.findall(" (.+)\n", chunk): + match = match.split()[0] + try: + data["nameservers"].append(match.strip()) + except KeyError as e: + data["nameservers"] = [match.strip()] + + data["contacts"] = parse_registrants(raw_data, never_query_handles, handle_server) + + # Parse dates + try: + data['expiration_date'] = remove_duplicates(data['expiration_date']) + data['expiration_date'] = parse_dates(data['expiration_date']) + except KeyError as e: + pass # Not present + + try: + data['creation_date'] = remove_duplicates(data['creation_date']) + data['creation_date'] = parse_dates(data['creation_date']) + except KeyError as e: + pass # Not present + + try: + data['updated_date'] = remove_duplicates(data['updated_date']) + data['updated_date'] = parse_dates(data['updated_date']) + except KeyError as e: + pass # Not present + + try: + data['nameservers'] = remove_suffixes(data['nameservers']) + data['nameservers'] = remove_duplicates([ns.rstrip(".") for ns in data['nameservers']]) + except KeyError as e: + pass # Not present + + try: + data['emails'] = remove_duplicates(data['emails']) + except KeyError as e: + pass # Not present + + try: + data['registrar'] = remove_duplicates(data['registrar']) + except KeyError as e: + pass # Not present + + known_emails = [] + for contact in ("registrant", "tech", "admin", "billing"): + if data["contacts"][contact] is not None: + try: + known_emails.append(data["contacts"][contact]["email"]) + except KeyError as e: + pass # No e-mail recorded for this contact... + + # Remove e-mail addresses if they are already listed for any of the contacts + try: + data['emails'] = [email for email in data["emails"] if email not in known_emails] + except KeyError as e: + pass # Not present + + for key in list(data.keys()): + if data[key] is None or len(data[key]) == 0: + del data[key] + + data["raw"] = raw_data + + if normalized != []: + data = normalize_data(data, normalized) + + return data + def normalize_data(data, normalized): - for key in ("nameservers", "emails", "whois_server"): - if key in data and data[key] is not None and (normalized == True or key in normalized): - if is_string(data[key]): - data[key] = data[key].lower() - else: - data[key] = [item.lower() for item in data[key]] - - for key, threshold in (("registrar", 4), ("status", 3)): - if key == "registrar": - ignore_nic = True - else: - ignore_nic = False - if key in data and data[key] is not None and (normalized == True or key in normalized): - if is_string(data[key]): - data[key] = normalize_name(data[key], abbreviation_threshold=threshold, length_threshold=1, ignore_nic=ignore_nic) - else: - data[key] = [normalize_name(item, abbreviation_threshold=threshold, length_threshold=1, ignore_nic=ignore_nic) for item in data[key]] - - for contact_type, contact in data['contacts'].items(): - if contact is not None: - if 'country' in contact and contact['country'] in countries: - contact['country'] = countries[contact['country']] - if 'city' in contact and contact['city'] in airports: - contact['city'] = airports[contact['city']] - if 'country' in contact and 'state' in contact: - for country, source in (("united states", states_us), ("australia", states_au), ("canada", states_ca)): - if country in contact["country"].lower() and contact["state"] in source: - contact["state"] = source[contact["state"]] - - - # Some registries (eg. Instra via .ai) may duplicate the name of a person into the organization field - if 'name' in contact and 'organization' in contact and contact['name'] == contact['organization']: - del contact['organization'] - - new_organization_lines = [] - new_name_lines = [] - - for key in list(contact.keys()): - # First deduplication pass - if is_string(contact[key]): - if key in ('organization', 'name'): - contact[key] = deduplicate(contact[key], fuzzy=True) - else: - contact[key] = deduplicate(contact[key]) - - if 'name' in contact: - name_lines = [x.strip() for x in contact["name"].splitlines()] - else: - name_lines = [] - - if 'organization' in contact: - organization_lines = [x.strip() for x in contact["organization"].splitlines()] - else: - organization_lines = [] - - # Move names that look like organizations, to the organization field - for i, line in enumerate(name_lines): - if 'type' in contact and contact['type'].lower() == "person": - # To deal with sole proprietors who name their company after themselves - is_organization = is_organization_name(line) - else: - is_organization = is_organization_name(line) or is_fuzzy_duplicate(line, organization_lines) - - if is_organization: - if "," in line: - name_words = re.split(name_separators, line) - if is_full_incorporation_form(name_words[0]): - line = reverse_name_comma(line) - - new_organization_lines.append(line) - del name_lines[i] - - # ... and move organizations that look like names, to the name field. - for i, line in enumerate(organization_lines): - is_name = is_person_name(line) - is_organization = is_organization_name(line) - - if is_name == True and is_organization == False: - new_name_lines.append(line) - del organization_lines[i] - - combined_name_lines = name_lines + new_name_lines - combined_organization_lines = new_organization_lines + organization_lines - - if len(combined_name_lines) > 0: - contact["name"] = "\n".join(combined_name_lines) - elif 'name' in contact: - del contact["name"] - - if len(combined_organization_lines) > 0: - contact["organization"] = "\n".join(combined_organization_lines) - elif 'organization' in contact: - del contact["organization"] - - new_roles = [] - - if 'name' in contact: - # Check whether the name is reversed; first name last, last name first. - names = contact['name'].splitlines() - unswapped_names = [] - - for name in names: - if "," in name: - name = reverse_name_comma(name) - else: - # Split the name into normalized (ie. alpha-only) 'words' for comparison. We only care about ASCII, as our first-name - # list currently only contains English names. - name_words = [filter_characters(segment, non_name_characters) for segment in name.split()] - - if len(name_words) > 1 and is_first_name(name_words[-1]) and not is_first_name(name_words[0]): - # The last 'word' was in the common first names, but the first one was not. Likely swapped around. - name = reverse_name(name) - - if is_role(name): - new_roles.append(name) - else: - unswapped_names.append(name) - - if len(unswapped_names) > 0: - contact['name'] = "\n".join(unswapped_names) - else: - del contact['name'] - - if 'organization' in contact: - organizations = contact['organization'].splitlines() - new_organizations = [] - - for organization in organizations: - if is_role(organization): - new_roles.append(organization) - else: - new_organizations.append(organization) - - if len(new_organizations) > 0: - contact['organization'] = "\n".join(new_organizations) - else: - del contact['organization'] - - if 'street' in contact: - streets = contact['street'].splitlines() - - if is_role(streets[0]): - new_roles.append(streets[0]) - streets = streets[1:] - - contact['street'] = "\n".join(streets) - - if 'role' in contact: - existing_roles = contact['role'].splitlines() - else: - existing_roles = [] - - if len(new_roles) > 0: - contact['role'] = "\n".join(new_roles + existing_roles) - - if "street" in contact: - lines = [x.strip() for x in contact["street"].splitlines()] - if len(lines) > 1: - if is_organization_name(lines[0], include_countries=False): - if "organization" in contact: - organizations = contact["organization"].splitlines() - else: - organizations = [] - - contact["organization"] = "\n".join([lines[0]] + organizations) - contact["street"] = "\n".join(lines[1:]) - - if 'organization' in contact: - contact['organization'] = re.sub(comma_without_space, r", \1", contact['organization']) - - for key in ("email",): - if key in contact and contact[key] is not None and (normalized == True or key in normalized): - if is_string(contact[key]): - contact[key] = contact[key].lower() - else: - contact[key] = [item.lower() for item in contact[key]] - - for key in ("street",): - if key in contact and contact[key] is not None and (normalized == True or key in normalized): - for phrase in ("GPO Box",): - regex = known_abbreviations[phrase] - contact[key] = re.sub(regex, phrase, contact[key]) - - for key in ("name", "street"): - if key in contact and contact[key] is not None and (normalized == True or key in normalized): - contact[key] = normalize_name(contact[key], abbreviation_threshold=3) - - for key in ("role", "city", "organization", "state", "country"): - if key in contact and contact[key] is not None and (normalized == True or key in normalized): - contact[key] = normalize_name(contact[key], abbreviation_threshold=3, length_threshold=3, check_known_incorrect=True) - - for key in list(contact.keys()): - # Certain registries like .co.th have HTML entities in their WHOIS data... - if is_string(contact[key]): - contact[key] = contact[key].replace("<", "<") - contact[key] = contact[key].replace(">", ">") - contact[key] = contact[key].replace(" ", " ") - contact[key] = contact[key].replace("&", "&") - - contact[key] = contact[key].strip(", ") - contact[key] = re.sub(duplicate_spaces, " ", contact[key]) - - # Second deduplication pass - if key in ('organization', 'name'): - contact[key] = deduplicate(contact[key], fuzzy=True) - else: - contact[key] = deduplicate(contact[key]) - - if contact[key] == "-" or contact[key].lower() == "n/a" or contact[key].lower() == "null": - del contact[key] - - if contact is not None and len(contact) == 0: - # We don't have any actual data. - data['contacts'][contact_type] = None - return data + for key in ("nameservers", "emails", "whois_server"): + if key in data and data[key] is not None and (normalized == True or key in normalized): + if is_string(data[key]): + data[key] = data[key].lower() + else: + data[key] = [item.lower() for item in data[key]] + + for key, threshold in (("registrar", 4), ("status", 3)): + if key == "registrar": + ignore_nic = True + else: + ignore_nic = False + if key in data and data[key] is not None and (normalized == True or key in normalized): + if is_string(data[key]): + data[key] = normalize_name(data[key], abbreviation_threshold=threshold, length_threshold=1, + ignore_nic=ignore_nic) + else: + data[key] = [ + normalize_name(item, abbreviation_threshold=threshold, length_threshold=1, ignore_nic=ignore_nic) + for item in data[key]] + + for contact_type, contact in data['contacts'].items(): + if contact is not None: + if 'country' in contact and contact['country'] in countries: + contact['country'] = countries[contact['country']] + if 'city' in contact and contact['city'] in airports: + contact['city'] = airports[contact['city']] + if 'country' in contact and 'state' in contact: + for country, source in (("united states", states_us), ("australia", states_au), ("canada", states_ca)): + if country in contact["country"].lower() and contact["state"] in source: + contact["state"] = source[contact["state"]] + + # Some registries (eg. Instra via .ai) may duplicate the name of a person into the organization field + if 'name' in contact and 'organization' in contact and contact['name'] == contact['organization']: + del contact['organization'] + + new_organization_lines = [] + new_name_lines = [] + + for key in list(contact.keys()): + # First deduplication pass + if is_string(contact[key]): + if key in ('organization', 'name'): + contact[key] = deduplicate(contact[key], fuzzy=True) + else: + contact[key] = deduplicate(contact[key]) + + if 'name' in contact: + name_lines = [x.strip() for x in contact["name"].splitlines()] + else: + name_lines = [] + + if 'organization' in contact: + organization_lines = [x.strip() for x in contact["organization"].splitlines()] + else: + organization_lines = [] + + # Move names that look like organizations, to the organization field + for i, line in enumerate(name_lines): + if 'type' in contact and contact['type'].lower() == "person": + # To deal with sole proprietors who name their company after themselves + is_organization = is_organization_name(line) + else: + is_organization = is_organization_name(line) or is_fuzzy_duplicate(line, organization_lines) + + if is_organization: + if "," in line: + name_words = re.split(name_separators, line) + if is_full_incorporation_form(name_words[0]): + line = reverse_name_comma(line) + + new_organization_lines.append(line) + del name_lines[i] + + # ... and move organizations that look like names, to the name field. + for i, line in enumerate(organization_lines): + is_name = is_person_name(line) + is_organization = is_organization_name(line) + + if is_name == True and is_organization == False: + new_name_lines.append(line) + del organization_lines[i] + + combined_name_lines = name_lines + new_name_lines + combined_organization_lines = new_organization_lines + organization_lines + + if len(combined_name_lines) > 0: + contact["name"] = "\n".join(combined_name_lines) + elif 'name' in contact: + del contact["name"] + + if len(combined_organization_lines) > 0: + contact["organization"] = "\n".join(combined_organization_lines) + elif 'organization' in contact: + del contact["organization"] + + new_roles = [] + + if 'name' in contact: + # Check whether the name is reversed; first name last, last name first. + names = contact['name'].splitlines() + unswapped_names = [] + + for name in names: + if "," in name: + name = reverse_name_comma(name) + else: + # Split the name into normalized (ie. alpha-only) 'words' for comparison. We only care about ASCII, as our first-name + # list currently only contains English names. + name_words = [filter_characters(segment, non_name_characters) for segment in name.split()] + + if len(name_words) > 1 and is_first_name(name_words[-1]) and not is_first_name(name_words[0]): + # The last 'word' was in the common first names, but the first one was not. Likely swapped around. + name = reverse_name(name) + + if is_role(name): + new_roles.append(name) + else: + unswapped_names.append(name) + + if len(unswapped_names) > 0: + contact['name'] = "\n".join(unswapped_names) + else: + del contact['name'] + + if 'organization' in contact: + organizations = contact['organization'].splitlines() + new_organizations = [] + + for organization in organizations: + if is_role(organization): + new_roles.append(organization) + else: + new_organizations.append(organization) + + if len(new_organizations) > 0: + contact['organization'] = "\n".join(new_organizations) + else: + del contact['organization'] + + if 'street' in contact: + streets = contact['street'].splitlines() + + if is_role(streets[0]): + new_roles.append(streets[0]) + streets = streets[1:] + + contact['street'] = "\n".join(streets) + + if 'role' in contact: + existing_roles = contact['role'].splitlines() + else: + existing_roles = [] + + if len(new_roles) > 0: + contact['role'] = "\n".join(new_roles + existing_roles) + + if "street" in contact: + lines = [x.strip() for x in contact["street"].splitlines()] + if len(lines) > 1: + if is_organization_name(lines[0], include_countries=False): + if "organization" in contact: + organizations = contact["organization"].splitlines() + else: + organizations = [] + + contact["organization"] = "\n".join([lines[0]] + organizations) + contact["street"] = "\n".join(lines[1:]) + + if 'organization' in contact: + contact['organization'] = re.sub(comma_without_space, r", \1", contact['organization']) + + for key in ("email",): + if key in contact and contact[key] is not None and (normalized == True or key in normalized): + if is_string(contact[key]): + contact[key] = contact[key].lower() + else: + contact[key] = [item.lower() for item in contact[key]] + + for key in ("street",): + if key in contact and contact[key] is not None and (normalized == True or key in normalized): + for phrase in ("GPO Box",): + regex = known_abbreviations[phrase] + contact[key] = re.sub(regex, phrase, contact[key]) + + for key in ("name", "street"): + if key in contact and contact[key] is not None and (normalized == True or key in normalized): + contact[key] = normalize_name(contact[key], abbreviation_threshold=3) + + for key in ("role", "city", "organization", "state", "country"): + if key in contact and contact[key] is not None and (normalized == True or key in normalized): + contact[key] = normalize_name(contact[key], abbreviation_threshold=3, length_threshold=3, + check_known_incorrect=True) + + for key in list(contact.keys()): + # Certain registries like .co.th have HTML entities in their WHOIS data... + if is_string(contact[key]): + contact[key] = contact[key].replace("<", "<") + contact[key] = contact[key].replace(">", ">") + contact[key] = contact[key].replace(" ", " ") + contact[key] = contact[key].replace("&", "&") + + contact[key] = contact[key].strip(", ") + contact[key] = re.sub(duplicate_spaces, " ", contact[key]) + + # Second deduplication pass + if key in ('organization', 'name'): + contact[key] = deduplicate(contact[key], fuzzy=True) + else: + contact[key] = deduplicate(contact[key]) + + if contact[key] == "-" or contact[key].lower() == "n/a" or contact[key].lower() == "null": + del contact[key] + + if contact is not None and len(contact) == 0: + # We don't have any actual data. + data['contacts'][contact_type] = None + return data + def deduplicate(value, fuzzy=False): - lines = value.splitlines() - unique_lines = [] - - # Filter out obviously identical lines first. - for i, line in enumerate(lines): - if line not in unique_lines: - unique_lines.append(line) - - if fuzzy == True: - # Do a fuzzy comparison to the shortest line for the remainder... - duplicates = get_fuzzy_duplicates(unique_lines) - - if len(duplicates) > 1: - longest_duplicate = max(duplicates, key=len) - duplicates.remove(longest_duplicate) - - for duplicate in duplicates: - unique_lines.remove(duplicate) - - return "\n".join(unique_lines) - + lines = value.splitlines() + unique_lines = [] + + # Filter out obviously identical lines first. + for i, line in enumerate(lines): + if line not in unique_lines: + unique_lines.append(line) + + if fuzzy == True: + # Do a fuzzy comparison to the shortest line for the remainder... + duplicates = get_fuzzy_duplicates(unique_lines) + + if len(duplicates) > 1: + longest_duplicate = max(duplicates, key=len) + duplicates.remove(longest_duplicate) + + for duplicate in duplicates: + unique_lines.remove(duplicate) + + return "\n".join(unique_lines) + + def is_fuzzy_duplicate(line, other_lines): - if len(other_lines) == 0: - return False - - duplicates = get_fuzzy_duplicates([line] + other_lines) - return (len(duplicates) > 1 and line in duplicates) - + if len(other_lines) == 0: + return False + + duplicates = get_fuzzy_duplicates([line] + other_lines) + return (len(duplicates) > 1 and line in duplicates) + + def get_fuzzy_duplicates(lines): - shortest = min(lines, key=len) - words = re.split(name_separators, shortest) + shortest = min(lines, key=len) + words = re.split(name_separators, shortest) + + return [line for line in lines if not fuzzy_word_match(line, words)] + - return [line for line in lines if not fuzzy_word_match(line, words)] - def fuzzy_word_match(line, words): - unique = False - for word in words: - if word.lower() not in line.lower(): - unique = True + unique = False + for word in words: + if word.lower() not in line.lower(): + unique = True + + return unique - return unique def is_organization_name(name, include_countries=True): - if is_incorporation_form(name): - return True - - # Special (common) cases - if name.lower() in ('neustar',): - return True - - if include_countries == True: - if is_country(name): - return True - - return False + if is_incorporation_form(name): + return True + + # Special (common) cases + if name.lower() in ('neustar',): + return True + + if include_countries == True: + if is_country(name): + return True + + return False + def is_person_name(name): - name_segments = re.split(name_separators, name) - - for segment in name_segments: - if is_first_name(segment): - return True - - return False + name_segments = re.split(name_separators, name) + + for segment in name_segments: + if is_first_name(segment): + return True + + return False + def is_first_name(name): - return (name.lower() in common_first_names) + return (name.lower() in common_first_names) + def is_abbreviation(word, abbreviation_threshold): - is_regular_abbreviation = not (len(word) >= abbreviation_threshold and "." not in word) - return (is_regular_abbreviation or is_abbreviated_incorporation_form(word) or is_country(word)) + is_regular_abbreviation = not (len(word) >= abbreviation_threshold and "." not in word) + return (is_regular_abbreviation or is_abbreviated_incorporation_form(word) or is_country(word)) + def is_domain(word): - return ("." in word and not word.endswith(".") and not word.startswith(".")) + return ("." in word and not word.endswith(".") and not word.startswith(".")) + def is_incorporation_form(word): - return (is_abbreviated_incorporation_form(word) or is_full_incorporation_form(word)) + return (is_abbreviated_incorporation_form(word) or is_full_incorporation_form(word)) + def is_full_incorporation_form(word): - return match_regexes(word, organization_regexes) - + return match_regexes(word, organization_regexes) + + def is_abbreviated_incorporation_form(word): - return match_regexes(word, abbreviated_organization_regexes) + return match_regexes(word, abbreviated_organization_regexes) + def is_role(line): - return match_regexes(line, role_regexes) - + return match_regexes(line, role_regexes) + + def is_country(word): - return match_regexes(word, country_regexes) + return match_regexes(word, country_regexes) + def is_known_abbreviation(word): - return match_regexes(word, known_abbreviations.values()) + return match_regexes(word, known_abbreviations.values()) + def has_country(line, country): - return country in line.lower() + return country in line.lower() + def has_incorrect_known_abbreviation(line): - for word in line.split(): - for sub, regex in known_abbreviations.items(): - if re.search(regex, word): - if sub not in word: - return True - - return False + for word in line.split(): + for sub, regex in known_abbreviations.items(): + if re.search(regex, word): + if sub not in word: + return True + + return False + # TODO: Cache/memoize lookup results? def get_known_abbreviation(word): - return match_regexes_dict(word, known_abbreviations) + return match_regexes_dict(word, known_abbreviations) + def match_regexes(string, regexes): - for regex in regexes: - if re.search(regex, string): - return True - - return False + for regex in regexes: + if re.search(regex, string): + return True + + return False + def match_regexes_dict(string, regexes): - for sub, regex in regexes.items(): - if re.search(regex, string): - return sub - - raise Error("No matching values.") - + for sub, regex in regexes.items(): + if re.search(regex, string): + return sub + + raise Error("No matching values.") + + def capitalize_words(line): - return ' '.join([word.capitalize() for word in line.split(" ")]) + return ' '.join([word.capitalize() for word in line.split(" ")]) + def reverse_name(name): - name_segments = re.split(name_separators, name) - name_segments.insert(0, name_segments.pop()) - return ' '.join(name_segments) + name_segments = re.split(name_separators, name) + name_segments.insert(0, name_segments.pop()) + return ' '.join(name_segments) + def reverse_name_comma(name): - name_segments = [segment.strip() for segment in name.split(",")] - first_segment = name_segments.pop() - return first_segment + " " + ', '.join(name_segments) + name_segments = [segment.strip() for segment in name.split(",")] + first_segment = name_segments.pop() + return first_segment + " " + ', '.join(name_segments) + def normalize_word(word, abbreviation_threshold=4, lowercase_domains=True, never_abbreviations=[]): - if word.lower() in never_abbreviations: - return word.capitalize() - elif is_known_abbreviation(word): - return get_known_abbreviation(word) - elif not is_abbreviation(word, abbreviation_threshold): - return word.capitalize() - elif lowercase_domains and is_domain(word): - return word.lower() - else: - # Probably an abbreviation or domain, leave it alone - return word - -def normalize_name(value, abbreviation_threshold=4, length_threshold=8, lowercase_domains=True, ignore_nic=False, check_known_incorrect=False): - normalized_lines = [] - for line in value.split("\n"): - line = line.strip(",") # Get rid of useless comma's - if (line.isupper() or line.islower() or (check_known_incorrect and has_incorrect_known_abbreviation(line))) and len(line) >= length_threshold: - # This line is likely not capitalized properly - if ignore_nic == True and "nic" in line.lower(): - # This is a registrar name containing 'NIC' - it should probably be all-uppercase. - line = line.upper() - else: - words = line.split() - normalized_words = [] - if len(words) >= 1: - # First word - normalized_words.append(normalize_word(words[0], abbreviation_threshold=abbreviation_threshold, lowercase_domains=lowercase_domains)) - if len(words) >= 3: - # Words between the first and last - for word in words[1:-1]: - normalized_words.append(normalize_word(word, abbreviation_threshold=abbreviation_threshold, lowercase_domains=lowercase_domains, never_abbreviations=('as',))) - if len(words) >= 2: - # Last word - normalized_words.append(normalize_word(words[-1], abbreviation_threshold=abbreviation_threshold, lowercase_domains=lowercase_domains)) - line = " ".join(normalized_words) - - # Fix country capitalization - for country in country_names: - if has_country(line, country): - line = re.sub(re.compile(country, re.IGNORECASE), capitalize_words(country), line) - - normalized_lines.append(line) - return "\n".join(normalized_lines) + if word.lower() in never_abbreviations: + return word.capitalize() + elif is_known_abbreviation(word): + return get_known_abbreviation(word) + elif not is_abbreviation(word, abbreviation_threshold): + return word.capitalize() + elif lowercase_domains and is_domain(word): + return word.lower() + else: + # Probably an abbreviation or domain, leave it alone + return word + + +def normalize_name(value, abbreviation_threshold=4, length_threshold=8, lowercase_domains=True, ignore_nic=False, + check_known_incorrect=False): + normalized_lines = [] + for line in value.split("\n"): + line = line.strip(",") # Get rid of useless comma's + if (line.isupper() or line.islower() or ( + check_known_incorrect and has_incorrect_known_abbreviation(line))) and len( + line) >= length_threshold: + # This line is likely not capitalized properly + if ignore_nic == True and "nic" in line.lower(): + # This is a registrar name containing 'NIC' - it should probably be all-uppercase. + line = line.upper() + else: + words = line.split() + normalized_words = [] + if len(words) >= 1: + # First word + normalized_words.append(normalize_word(words[0], abbreviation_threshold=abbreviation_threshold, + lowercase_domains=lowercase_domains)) + if len(words) >= 3: + # Words between the first and last + for word in words[1:-1]: + normalized_words.append(normalize_word(word, abbreviation_threshold=abbreviation_threshold, + lowercase_domains=lowercase_domains, + never_abbreviations=('as',))) + if len(words) >= 2: + # Last word + normalized_words.append(normalize_word(words[-1], abbreviation_threshold=abbreviation_threshold, + lowercase_domains=lowercase_domains)) + line = " ".join(normalized_words) + + # Fix country capitalization + for country in country_names: + if has_country(line, country): + line = re.sub(re.compile(country, re.IGNORECASE), capitalize_words(country), line) + + normalized_lines.append(line) + return "\n".join(normalized_lines) + def parse_dates(dates): - global grammar - parsed_dates = [] - - for date in dates: - for rule in grammar['_dateformats']: - result = re.match(rule, date) - - if result is not None: - try: - # These are always numeric. If they fail, there is no valid date present. - year = int(result.group("year")) - day = int(result.group("day")) - - # Detect and correct shorthand year notation - if year < 60: - year += 2000 - elif year < 100: - year += 1900 - - # This will require some more guesswork - some WHOIS servers present the name of the month - try: - month = int(result.group("month")) - except ValueError as e: - # Apparently not a number. Look up the corresponding number. - try: - month = grammar['_months'][result.group("month").lower()] - except KeyError as e: - # Unknown month name, default to 0 - month = 0 - - try: - hour = int(result.group("hour")) - except IndexError as e: - hour = 0 - except TypeError as e: - hour = 0 - - try: - minute = int(result.group("minute")) - except IndexError as e: - minute = 0 - except TypeError as e: - minute = 0 - - try: - second = int(result.group("second")) - except IndexError as e: - second = 0 - except TypeError as e: - second = 0 - - break - except ValueError as e: - # Something went horribly wrong, maybe there is no valid date present? - year = 0 - month = 0 - day = 0 - hour = 0 - minute = 0 - second = 0 - print(e.message) # FIXME: This should have proper logging of some sort...? - try: - if year > 0: - try: - parsed_dates.append(datetime.datetime(year, month, day, hour, minute, second)) - except ValueError as e: - # We might have gotten the day and month the wrong way around, let's try it the other way around - # If you're not using an ISO-standard date format, you're an evil registrar! - parsed_dates.append(datetime.datetime(year, day, month, hour, minute, second)) - except UnboundLocalError as e: - pass - - if len(parsed_dates) > 0: - return parsed_dates - else: - return None + global grammar + parsed_dates = [] + + for date in dates: + for rule in grammar['_dateformats']: + result = re.match(rule, date) + + if result is not None: + try: + # These are always numeric. If they fail, there is no valid date present. + year = int(result.group("year")) + day = int(result.group("day")) + + # Detect and correct shorthand year notation + if year < 60: + year += 2000 + elif year < 100: + year += 1900 + + # This will require some more guesswork - some WHOIS servers present the name of the month + try: + month = int(result.group("month")) + except ValueError as e: + # Apparently not a number. Look up the corresponding number. + try: + month = grammar['_months'][result.group("month").lower()] + except KeyError as e: + # Unknown month name, default to 0 + month = 0 + + try: + hour = int(result.group("hour")) + except IndexError as e: + hour = 0 + except TypeError as e: + hour = 0 + + try: + minute = int(result.group("minute")) + except IndexError as e: + minute = 0 + except TypeError as e: + minute = 0 + + try: + second = int(result.group("second")) + except IndexError as e: + second = 0 + except TypeError as e: + second = 0 + + break + except ValueError as e: + # Something went horribly wrong, maybe there is no valid date present? + year = 0 + month = 0 + day = 0 + hour = 0 + minute = 0 + second = 0 + print(e.message) # FIXME: This should have proper logging of some sort...? + try: + if year > 0: + try: + parsed_dates.append(datetime.datetime(year, month, day, hour, minute, second)) + except ValueError as e: + # We might have gotten the day and month the wrong way around, let's try it the other way around + # If you're not using an ISO-standard date format, you're an evil registrar! + parsed_dates.append(datetime.datetime(year, day, month, hour, minute, second)) + except UnboundLocalError as e: + pass + + if len(parsed_dates) > 0: + return parsed_dates + else: + return None + def remove_duplicates(data): - cleaned_list = [] + cleaned_list = [] - for entry in data: - if entry not in cleaned_list: - cleaned_list.append(entry) + for entry in data: + if entry not in cleaned_list: + cleaned_list.append(entry) - return cleaned_list + return cleaned_list -def remove_suffixes(data): - # Removes everything before and after the first non-whitespace continuous string. - # Used to get rid of IP suffixes for nameservers. - cleaned_list = [] - for entry in data: - cleaned_list.append(re.search("([^\s]+)\s*[\s]*", entry).group(1).lstrip()) +def remove_suffixes(data): + # Removes everything before and after the first non-whitespace continuous string. + # Used to get rid of IP suffixes for nameservers. + cleaned_list = [] + + for entry in data: + cleaned_list.append(re.search("([^\s]+)\s*[\s]*", entry).group(1).lstrip()) + + return cleaned_list + + +def fallback_extraction(data, contact, regexes): + """ + Try to use the fallback rules to extract any missed data + :param data: The array containing the data + :param contact: The contact to check + :param regexes: The regexes to use + :return: The new contact if the result is not None, otherwise None + """ + if contact is None: + temp_contact = {} + else: + temp_contact = copy.deepcopy(contact) + + for segment in data: + get_contact_field_from_response(segment, "handle", temp_contact, regexes) + get_contact_field_from_response(segment, "name", temp_contact, regexes) + get_contact_field_from_response(segment, "organization", temp_contact, regexes) + get_contact_field_from_response(segment, "street", temp_contact, regexes) + get_contact_field_from_response(segment, "city", temp_contact, regexes) + get_contact_field_from_response(segment, "state", temp_contact, regexes) + get_contact_field_from_response(segment, "postalcode", temp_contact, regexes) + get_contact_field_from_response(segment, "country", temp_contact, regexes) + get_contact_field_from_response(segment, "phone", temp_contact, regexes) + get_contact_field_from_response(segment, "fax", temp_contact, regexes) + get_contact_field_from_response(segment, "email", temp_contact, regexes) + + if temp_contact: + return temp_contact + else: + return None + + +def get_contact_field_from_response(segment, field, contact, regexes): + """ + Check if a contact contains a given field and if it does not, + see if it can be extracted from the WHOIS response + :param segment: The WHOIS response to check + :param field: The field to check for + :param contact: The contact to verify + :param regexes: The set of regexes to use + :return Does not return anything, but modifies the contact + """ + if field not in contact: + match = re.search(regexes.get(field), segment, re.IGNORECASE) + if match is not None: + contact.update(match.groupdict()) - return cleaned_list def parse_registrants(data, never_query_handles=True, handle_server=""): - registrant = None - tech_contact = None - billing_contact = None - admin_contact = None - - for segment in data: - for regex in registrant_regexes: - match = re.search(regex, segment) - if match is not None: - registrant = match.groupdict() - break - - for segment in data: - for regex in tech_contact_regexes: - match = re.search(regex, segment) - if match is not None: - tech_contact = match.groupdict() - break - - for segment in data: - for regex in admin_contact_regexes: - match = re.search(regex, segment) - if match is not None: - admin_contact = match.groupdict() - break - - for segment in data: - for regex in billing_contact_regexes: - match = re.search(regex, segment) - if match is not None: - billing_contact = match.groupdict() - break - - # Find NIC handle contact definitions - handle_contacts = parse_nic_contact(data) - - # Find NIC handle references and process them - missing_handle_contacts = [] - for category in nic_contact_references: - for regex in nic_contact_references[category]: - for segment in data: - match = re.search(regex, segment) - if match is not None: - data_reference = match.groupdict() - if data_reference["handle"] == "-" or re.match("https?:\/\/", data_reference["handle"]) is not None: - pass # Reference was either blank or a URL; the latter is to deal with false positives for nic.ru - else: - found = False - for contact in handle_contacts: - if contact["handle"] == data_reference["handle"]: - found = True - data_reference.update(contact) - if found == False: - # The contact definition was not found in the supplied raw WHOIS data. If the - # method has been called with never_query_handles=False, we can use the supplied - # WHOIS server for looking up the handle information separately. - if never_query_handles == False: - try: - contact = fetch_nic_contact(data_reference["handle"], handle_server) - data_reference.update(contact) - except shared.WhoisException as e: - pass # No data found. TODO: Log error? - else: - pass # TODO: Log warning? - if category == "registrant": - registrant = data_reference - elif category == "tech": - tech_contact = data_reference - elif category == "billing": - billing_contact = data_reference - elif category == "admin": - admin_contact = data_reference - break - - # Post-processing - for obj in (registrant, tech_contact, billing_contact, admin_contact): - if obj is not None: - for key in list(obj.keys()): - if obj[key] is None or obj[key].strip() == "": # Just chomp all surrounding whitespace - del obj[key] - else: - obj[key] = obj[key].strip() - if "phone_ext" in obj: - if "phone" in obj: - obj["phone"] += " ext. %s" % obj["phone_ext"] - del obj["phone_ext"] - if "street1" in obj: - street_items = [] - i = 1 - while True: - try: - street_items.append(obj["street%d" % i]) - del obj["street%d" % i] - except KeyError as e: - break - i += 1 - obj["street"] = "\n".join(street_items) - if "organization1" in obj: # This is to deal with eg. HKDNR, who allow organization names in multiple languages. - organization_items = [] - i = 1 - while True: - try: - if obj["organization%d" % i].strip() != "": - organization_items.append(obj["organization%d" % i]) - del obj["organization%d" % i] - except KeyError as e: - break - i += 1 - obj["organization"] = "\n".join(organization_items) - if 'changedate' in obj: - obj['changedate'] = parse_dates([obj['changedate']])[0] - if 'creationdate' in obj: - obj['creationdate'] = parse_dates([obj['creationdate']])[0] - if 'street' in obj and "\n" in obj["street"] and 'postalcode' not in obj: - # Deal with certain mad WHOIS servers that don't properly delimit address data... (yes, AFNIC, looking at you) - lines = [x.strip() for x in obj["street"].splitlines()] - if " " in lines[-1]: - postal_code, city = lines[-1].split(" ", 1) - if "." not in lines[-1] and re.match("[0-9]", postal_code) and len(postal_code) >= 3: - obj["postalcode"] = postal_code - obj["city"] = city - obj["street"] = "\n".join(lines[:-1]) - if 'firstname' in obj or 'lastname' in obj: - elements = [] - if 'firstname' in obj: - elements.append(obj["firstname"]) - if 'lastname' in obj: - elements.append(obj["lastname"]) - obj["name"] = " ".join(elements) - if 'country' in obj and 'city' in obj and (re.match("^R\.?O\.?C\.?$", obj["country"], re.IGNORECASE) or obj["country"].lower() == "republic of china") and obj["city"].lower() == "taiwan": - # There's an edge case where some registrants append ", Republic of China" after "Taiwan", and this is mis-parsed - # as Taiwan being the city. This is meant to correct that. - obj["country"] = "%s, %s" % (obj["city"], obj["country"]) - lines = [x.strip() for x in obj["street"].splitlines()] - obj["city"] = lines[-1] - obj["street"] = "\n".join(lines[:-1]) - - return { - "registrant": registrant, - "tech": tech_contact, - "admin": admin_contact, - "billing": billing_contact, - } + registrant = None + tech_contact = None + billing_contact = None + admin_contact = None + + for segment in data: + for regex in registrant_regexes: + match = re.search(regex, segment) + if match is not None: + registrant = match.groupdict() + break + + for segment in data: + for regex in tech_contact_regexes: + match = re.search(regex, segment) + if match is not None: + tech_contact = match.groupdict() + break + + for segment in data: + for regex in admin_contact_regexes: + match = re.search(regex, segment) + if match is not None: + admin_contact = match.groupdict() + break + + for segment in data: + for regex in billing_contact_regexes: + match = re.search(regex, segment) + if match is not None: + billing_contact = match.groupdict() + break + + registrant = fallback_extraction(data, registrant, registrant_fallback_regexes) + tech_contact = fallback_extraction(data, tech_contact, tech_fallback_regexes) + admin_contact = fallback_extraction(data, admin_contact, admin_fallback_regexes) + billing_contact = fallback_extraction(data, billing_contact, billing_fallback_regexes) + + # Find NIC handle contact definitions + handle_contacts = parse_nic_contact(data) + + # Find NIC handle references and process them + missing_handle_contacts = [] + for category in nic_contact_references: + for regex in nic_contact_references[category]: + for segment in data: + match = re.search(regex, segment) + if match is not None: + data_reference = match.groupdict() + if data_reference["handle"] == "-" or re.match("https?:\/\/", data_reference["handle"]) is not None: + pass # Reference was either blank or a URL; the latter is to deal with false positives for nic.ru + else: + found = False + for contact in handle_contacts: + if contact["handle"] == data_reference["handle"]: + found = True + data_reference.update(contact) + if found == False: + # The contact definition was not found in the supplied raw WHOIS data. If the + # method has been called with never_query_handles=False, we can use the supplied + # WHOIS server for looking up the handle information separately. + if never_query_handles == False: + try: + contact = fetch_nic_contact(data_reference["handle"], handle_server) + data_reference.update(contact) + except shared.WhoisException as e: + pass # No data found. TODO: Log error? + else: + pass # TODO: Log warning? + if category == "registrant": + registrant = data_reference + elif category == "tech": + tech_contact = data_reference + elif category == "billing": + billing_contact = data_reference + elif category == "admin": + admin_contact = data_reference + break + + # Post-processing + for obj in (registrant, tech_contact, billing_contact, admin_contact): + if obj is not None: + for key in list(obj.keys()): + if obj[key] is None or obj[key].strip() == "": # Just chomp all surrounding whitespace + del obj[key] + else: + obj[key] = obj[key].strip() + if "phone_ext" in obj: + if "phone" in obj: + obj["phone"] += " ext. %s" % obj["phone_ext"] + del obj["phone_ext"] + if "street1" in obj: + street_items = [] + i = 1 + while True: + try: + street_items.append(obj["street%d" % i]) + del obj["street%d" % i] + except KeyError as e: + break + i += 1 + obj["street"] = "\n".join(street_items) + if "organization1" in obj: # This is to deal with eg. HKDNR, who allow organization names in multiple languages. + organization_items = [] + i = 1 + while True: + try: + if obj["organization%d" % i].strip() != "": + organization_items.append(obj["organization%d" % i]) + del obj["organization%d" % i] + except KeyError as e: + break + i += 1 + obj["organization"] = "\n".join(organization_items) + if 'changedate' in obj: + obj['changedate'] = parse_dates([obj['changedate']])[0] + if 'creationdate' in obj: + obj['creationdate'] = parse_dates([obj['creationdate']])[0] + if 'street' in obj and "\n" in obj["street"] and 'postalcode' not in obj: + # Deal with certain mad WHOIS servers that don't properly delimit address data... (yes, AFNIC, looking at you) + lines = [x.strip() for x in obj["street"].splitlines()] + if " " in lines[-1]: + postal_code, city = lines[-1].split(" ", 1) + if "." not in lines[-1] and re.match("[0-9]", postal_code) and len(postal_code) >= 3: + obj["postalcode"] = postal_code + obj["city"] = city + obj["street"] = "\n".join(lines[:-1]) + if 'firstname' in obj or 'lastname' in obj: + elements = [] + if 'firstname' in obj: + elements.append(obj["firstname"]) + if 'lastname' in obj: + elements.append(obj["lastname"]) + obj["name"] = " ".join(elements) + if 'country' in obj and 'city' in obj and (re.match("^R\.?O\.?C\.?$", obj["country"], re.IGNORECASE) or obj[ + "country"].lower() == "republic of china") and obj["city"].lower() == "taiwan": + # There's an edge case where some registrants append ", Republic of China" after "Taiwan", and this is mis-parsed + # as Taiwan being the city. This is meant to correct that. + obj["country"] = "%s, %s" % (obj["city"], obj["country"]) + lines = [x.strip() for x in obj["street"].splitlines()] + obj["city"] = lines[-1] + obj["street"] = "\n".join(lines[:-1]) + + return { + "registrant": registrant, + "tech": tech_contact, + "admin": admin_contact, + "billing": billing_contact, + } + def fetch_nic_contact(handle, lookup_server): - response = net.get_whois_raw(handle, lookup_server) - response = [segment.replace("\r", "") for segment in response] # Carriage returns are the devil - results = parse_nic_contact(response) + response = net.get_whois_raw(handle, lookup_server) + response = [segment.replace("\r", "") for segment in response] # Carriage returns are the devil + results = parse_nic_contact(response) + + if len(results) > 0: + return results[0] + else: + raise shared.WhoisException("No contact data found in the response.") - if len(results) > 0: - return results[0] - else: - raise shared.WhoisException("No contact data found in the response.") def parse_nic_contact(data): - handle_contacts = [] - for regex in nic_contact_regexes: - for segment in data: - matches = re.finditer(regex, segment) - for match in matches: - handle_contacts.append(match.groupdict()) - - return handle_contacts + handle_contacts = [] + for regex in nic_contact_regexes: + for segment in data: + matches = re.finditer(regex, segment) + for match in matches: + handle_contacts.append(match.groupdict()) + + return handle_contacts From 90c5f0a3f4e288c80a3ef85f03ef4480e6346057 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Mon, 6 Jun 2016 10:22:31 +0200 Subject: [PATCH 02/40] FIX: Registrar key not existing (resulted in failure for .eu domains) FIX: Tabs to spaces (PEP-8) ADD: Tests ADD: Test output. Shows failed tests because of more extracted data. --- pwhois | 214 +++++++------ test.py | 431 ++++++++++++++------------ test/data/google.durban | 85 +++++ test/data/google.eu | 66 ++++ test/data/google.luxury | 68 ++++ test/data/noir.haus | 126 ++++++++ test/target_default/google.durban | 1 + test/target_default/google.eu | 1 + test/target_default/google.luxury | 1 + test/target_default/noir.haus | 1 + test/target_normalized/google.durban | 1 + test/target_normalized/google.eu | 1 + test/target_normalized/google.luxury | 1 + test/target_normalized/noir.haus | 1 + test_output.txt | 447 +++++++++++++++++++++++++++ 15 files changed, 1143 insertions(+), 302 deletions(-) create mode 100644 test/data/google.durban create mode 100644 test/data/google.eu create mode 100644 test/data/google.luxury create mode 100644 test/data/noir.haus create mode 100644 test/target_default/google.durban create mode 100644 test/target_default/google.eu create mode 100644 test/target_default/google.luxury create mode 100644 test/target_default/noir.haus create mode 100644 test/target_normalized/google.durban create mode 100644 test/target_normalized/google.eu create mode 100644 test/target_normalized/google.luxury create mode 100644 test/target_normalized/noir.haus create mode 100644 test_output.txt diff --git a/pwhois b/pwhois index cf0f19b..8f92fd3 100755 --- a/pwhois +++ b/pwhois @@ -1,116 +1,130 @@ #!/usr/bin/env python2 -import argparse, pythonwhois, json, datetime, sys +import argparse +import datetime +import json +import sys + +import pythonwhois + try: - from collections import OrderedDict + from collections import OrderedDict except ImportError as e: - from ordereddict import OrderedDict + from ordereddict import OrderedDict parser = argparse.ArgumentParser(description="Retrieves and parses WHOIS data for a domain name.") -parser.add_argument("-r", "--raw", action="store_true", help="Outputs raw WHOIS data and doesn't attempt to parse it. Segments are separated by a double-dash (--).") -parser.add_argument("-j", "--json", action="store_true", help="Outputs structured WHOIS data in JSON format, according to the pythonwhois API.") -parser.add_argument("-f", "--file", action="store", help="Loads and parses raw double-dash-delimited WHOIS data from a specified file, instead of contacting a WHOIS server.", default=None) +parser.add_argument("-r", "--raw", action="store_true", + help="Outputs raw WHOIS data and doesn't attempt to parse it. Segments are separated by a double-dash (--).") +parser.add_argument("-j", "--json", action="store_true", + help="Outputs structured WHOIS data in JSON format, according to the pythonwhois API.") +parser.add_argument("-f", "--file", action="store", + help="Loads and parses raw double-dash-delimited WHOIS data from a specified file, instead of contacting a WHOIS server.", + default=None) parser.add_argument("domain", nargs=1) args = parser.parse_args() + def json_fallback(obj): - if isinstance(obj, datetime.datetime): - return obj.isoformat() - else: - return obj + if isinstance(obj, datetime.datetime): + return obj.isoformat() + else: + return obj + if args.file is None: - data, server_list = pythonwhois.net.get_whois_raw(args.domain[0], with_server_list=True) + data, server_list = pythonwhois.net.get_whois_raw(args.domain[0], with_server_list=True) else: - server_list = [] - with open(args.file, "r") as f: - data = f.read().split("\n--\n") + server_list = [] + with open(args.file, "r") as f: + data = f.read().split("\n--\n") if args.raw == True: - print("\n--\n".join([x.encode("utf-8") for x in data])) + print("\n--\n".join([x.encode("utf-8") for x in data])) else: - if len(server_list) > 0: - parsed = pythonwhois.parse.parse_raw_whois(data, normalized=True, never_query_handles=False, handle_server=server_list[-1]) - else: - parsed = pythonwhois.parse.parse_raw_whois(data, normalized=True) - - if args.json == True: - print(json.dumps(parsed, default=json_fallback)) - else: - data_map = OrderedDict({}) - - # This defines the fields shown in the output - data_map["id"] = ("Domain ID", 1) - data_map["status"] = ("Status", 1) - data_map["registrar"] = ("Registrar", 1) - data_map["creation_date"] = ("Registration date", 1) - data_map["expiration_date"] = ("Expiration date", 1) - data_map["updated_date"] = ("Last update", 1) - data_map["nameservers"] = ("Name server", "+") - data_map["emails"] = ("E-mail address", "+") - - widest_label = 0 - for key, value in data_map.items(): - if len(value[0]) > widest_label: - widest_label = len(value[0]) - - for key, value in data_map.items(): - if key in parsed and parsed[key] is not None: - label = value[0] + (" " * (widest_label - len(value[0]))) + " :" - if value[1] == 1: - print("%s %s" % (label, parsed[key][0])) - elif value[1] == "+": - for item in parsed[key]: - print("%s %s" % (label, item)) - - if parsed["contacts"] is not None: - # This defines the contacts shown in the output - contacts_map = OrderedDict({}) - contacts_map["registrant"] ="Registrant" - contacts_map["tech"] = "Technical Contact" - contacts_map["admin"] = "Administrative Contact" - contacts_map["billing"] = "Billing Contact" - - # This defines the contact data shown in the output - data_map = OrderedDict({}) - data_map["handle"] ="NIC handle" - data_map["name"] ="Name" - data_map["role"] ="Role" - data_map["organization"] = "Organization" - data_map["street"] = "Street address" - data_map["postalcode"] = "Postal code" - data_map["city"] = "City" - data_map["state"] = "State / Province" - data_map["country"] = "Country" - data_map["email"] = "E-mail address" - data_map["phone"] = "Phone number" - data_map["fax"] = "Fax number" - data_map["creationdate"] = "Created" - data_map["changedate"] = "Last changed" - - for contact in contacts_map: - if parsed["contacts"][contact] is not None: - contact_data = parsed["contacts"][contact] - - print("\n" + contacts_map[contact]) - - for key, value in data_map.items(): - if len(value) > widest_label: - widest_label = len(value) - - for key, value in data_map.items(): - if key in contact_data and contact_data[key] is not None: - label = " " + value + (" " * (widest_label - len(value))) + " :" - if sys.version_info < (3, 0): - if type(contact_data[key]) == str: - actual_data = contact_data[key].decode("utf-8") - elif type(contact_data[key]) == datetime.datetime: - actual_data = unicode(contact_data[key]) - else: - actual_data = contact_data[key] - else: - actual_data = str(contact_data[key]) - if "\n" in actual_data: # Indent multi-line values properly - lines = actual_data.split("\n") - actual_data = "\n".join([lines[0]] + [(" " * (widest_label + 7)) + line for line in lines[1:]]) - print("%s %s" % (label, actual_data)) + if len(server_list) > 0: + parsed = pythonwhois.parse.parse_raw_whois(data, normalized=True, never_query_handles=False, + handle_server=server_list[-1]) + else: + parsed = pythonwhois.parse.parse_raw_whois(data, normalized=True) + + if args.json == True: + print(json.dumps(parsed, default=json_fallback)) + else: + data_map = OrderedDict({}) + + # This defines the fields shown in the output + data_map["id"] = ("Domain ID", 1) + data_map["status"] = ("Status", 1) + data_map["registrar"] = ("Registrar", 1) + data_map["creation_date"] = ("Registration date", 1) + data_map["expiration_date"] = ("Expiration date", 1) + data_map["updated_date"] = ("Last update", 1) + data_map["nameservers"] = ("Name server", "+") + data_map["emails"] = ("E-mail address", "+") + + widest_label = 0 + for key, value in data_map.items(): + if len(value[0]) > widest_label: + widest_label = len(value[0]) + + for key, value in data_map.items(): + if key in parsed and parsed[key] is not None: + label = value[0] + (" " * (widest_label - len(value[0]))) + " :" + if value[1] == 1: + print("%s %s" % (label, parsed[key][0])) + elif value[1] == "+": + for item in parsed[key]: + print("%s %s" % (label, item)) + + if parsed["contacts"] is not None: + # This defines the contacts shown in the output + contacts_map = OrderedDict({}) + contacts_map["registrant"] = "Registrant" + contacts_map["tech"] = "Technical Contact" + contacts_map["admin"] = "Administrative Contact" + contacts_map["billing"] = "Billing Contact" + + # This defines the contact data shown in the output + data_map = OrderedDict({}) + data_map["handle"] = "NIC handle" + data_map["name"] = "Name" + data_map["role"] = "Role" + data_map["organization"] = "Organization" + data_map["street"] = "Street address" + data_map["postalcode"] = "Postal code" + data_map["city"] = "City" + data_map["state"] = "State / Province" + data_map["country"] = "Country" + data_map["email"] = "E-mail address" + data_map["phone"] = "Phone number" + data_map["fax"] = "Fax number" + data_map["creationdate"] = "Created" + data_map["changedate"] = "Last changed" + + for contact in contacts_map: + if parsed["contacts"][contact] is not None: + contact_data = parsed["contacts"][contact] + + print("\n" + contacts_map[contact]) + + for key, value in data_map.items(): + if len(value) > widest_label: + widest_label = len(value) + + for key, value in data_map.items(): + if key in contact_data and contact_data[key] is not None: + label = " " + value + (" " * (widest_label - len(value))) + " :" + if sys.version_info < (3, 0): + if type(contact_data[key]) == str: + actual_data = contact_data[key].decode("utf-8") + elif type(contact_data[key]) == datetime.datetime: + actual_data = unicode(contact_data[key]) + else: + actual_data = contact_data[key] + else: + actual_data = str(contact_data[key]) + if "\n" in actual_data: # Indent multi-line values properly + lines = actual_data.split("\n") + actual_data = "\n".join( + [lines[0]] + [(" " * (widest_label + 7)) + line for line in lines[1:]]) + print("%s %s" % (label, actual_data)) diff --git a/test.py b/test.py index a26b355..ae731d1 100755 --- a/test.py +++ b/test.py @@ -1,244 +1,271 @@ #!/usr/bin/env python2 -import sys, argparse, os, pythonwhois, json, datetime, codecs, time -import pkgutil +import argparse +import codecs +import datetime import encodings +import json +import os +import pkgutil +import sys +import time + +import pythonwhois unicode_stdout = codecs.getwriter(sys.stdout.encoding)(sys.stdout) unicode_stderr = codecs.getwriter(sys.stderr.encoding)(sys.stderr) if sys.version_info < (3, 0): - def is_string(data): - """Test for string with support for python 2.""" - return isinstance(data, basestring) + def is_string(data): + """Test for string with support for python 2.""" + return isinstance(data, basestring) else: - def is_string(data): - """Test for string with support for python 3.""" - return isinstance(data, str) + def is_string(data): + """Test for string with support for python 3.""" + return isinstance(data, str) + # FIXME: The testing script is currently incapable of testing referenced NIC handles that are # retrieved separately, such as is the case with the JPRS registry for .co.jp. This # really needs to be fixed, to ensure that contact parsing for this doesn't break. def get_codecs(): - """Dynamically get list of codecs in python.""" - false_positives = set(["aliases"]) - found = set(name for imp, name, ispkg in pkgutil.iter_modules(encodings.__path__) if not ispkg) - found.difference_update(false_positives) - return found + """Dynamically get list of codecs in python.""" + false_positives = set(["aliases"]) + found = set(name for imp, name, ispkg in pkgutil.iter_modules(encodings.__path__) if not ispkg) + found.difference_update(false_positives) + return found def read_encoded_file(file_path): - """Try reading file using all codecs. Return the first succesfull one.""" - for encoding in get_codecs(): - try: - with codecs.open(file_path, "r", encoding) as f: - return f.read() - except Exception: - pass + """Try reading file using all codecs. Return the first succesfull one.""" + for encoding in get_codecs(): + try: + with codecs.open(file_path, "r", encoding) as f: + return f.read() + except Exception: + pass + parser = argparse.ArgumentParser(description="Runs or modifies the test suite for python-whois.") -parser.add_argument("mode", nargs=1, choices=["run", "update"], default="run", help="Whether to run or update the tests. Only update if you know what you're doing!") -parser.add_argument("target", nargs="+", help="The targets to run/modify tests for. Use 'all' to run the full test suite.") +parser.add_argument("mode", nargs=1, choices=["run", "update"], default="run", + help="Whether to run or update the tests. Only update if you know what you're doing!") +parser.add_argument("target", nargs="+", + help="The targets to run/modify tests for. Use 'all' to run the full test suite.") args = parser.parse_args() OK = '\033[92m' FAIL = '\033[91m' ENDC = '\033[0m' + def encoded_json_dumps(obj): - try: - return json.dumps(obj, default=json_fallback) - except UnicodeDecodeError as e: - return json.dumps(recursive_encode(obj, "latin-1"), default=json_fallback) + try: + return json.dumps(obj, default=json_fallback) + except UnicodeDecodeError as e: + return json.dumps(recursive_encode(obj, "latin-1"), default=json_fallback) + def json_fallback(obj): - if isinstance(obj, datetime.datetime): - return obj.isoformat() - elif is_string(obj): - return indent_values(obj) - else: - return obj + if isinstance(obj, datetime.datetime): + return obj.isoformat() + elif is_string(obj): + return indent_values(obj) + else: + return obj + def indent_values(string): - return string.replace("\n", "\n ") - + return string.replace("\n", "\n ") + + def recursive_encode(obj, encoding): - for key in list(obj.keys()): - if isinstance(obj[key], dict): - obj[key] = recursive_encode(obj[key], encoding) - elif isinstance(obj[key], list): - obj[key] = [x.decode(encoding) for x in obj[key]] - else: - try: - obj[key] = obj[key].decode(encoding) - except: - pass - return obj + for key in list(obj.keys()): + if isinstance(obj[key], dict): + obj[key] = recursive_encode(obj[key], encoding) + elif isinstance(obj[key], list): + obj[key] = [x.decode(encoding) for x in obj[key]] + else: + try: + obj[key] = obj[key].decode(encoding) + except: + pass + return obj + def recursive_compare(obj1, obj2, chain=[]): - errors = [] - chain_name = " -> ".join(chain) - s1 = set(obj1.keys()) - s2 = set(obj2.keys()) - - for key in s1.difference(s2): - value = json_fallback(obj1[key]) - errors.append("(%s) Key present in previous data, but missing in current data: `%s`\n [---] %s" % (chain_name, key, value)) - - for key in s2.difference(s1): - value = json_fallback(obj2[key]) - errors.append("(%s) New key present in current data, but missing in previous data: `%s`\n [+++] %s" % (chain_name, key, value)) - - for key in s1.intersection(s2): - if isinstance(obj1[key], dict) and isinstance(obj2[key], dict): - errors += recursive_compare(obj1[key], obj2[key], chain + [key]) - elif isinstance(obj1[key], list) and isinstance(obj2[key], list): - lst1 = [json_fallback(x) for x in obj1[key]] - lst2 = [json_fallback(x) for x in obj2[key]] - if set(lst1) != set(lst2): - errors.append("(%s) List mismatch in key `%s`.\n [old] %s\n [new] %s" % (chain_name, key, set(lst1), set(lst2))) - else: - if json_fallback(obj1[key]) != json_fallback(obj2[key]): - errors.append("(%s) Data mismatch in key `%s`.\n [old] %s\n [new] %s" % (chain_name, key, json_fallback(obj1[key]), json_fallback(obj2[key]))) - - return errors + errors = [] + chain_name = " -> ".join(chain) + s1 = set(obj1.keys()) + s2 = set(obj2.keys()) + + for key in s1.difference(s2): + value = json_fallback(obj1[key]) + errors.append("(%s) Key present in previous data, but missing in current data: `%s`\n [---] %s" % ( + chain_name, key, value)) + + for key in s2.difference(s1): + value = json_fallback(obj2[key]) + errors.append("(%s) New key present in current data, but missing in previous data: `%s`\n [+++] %s" % ( + chain_name, key, value)) + + for key in s1.intersection(s2): + if isinstance(obj1[key], dict) and isinstance(obj2[key], dict): + errors += recursive_compare(obj1[key], obj2[key], chain + [key]) + elif isinstance(obj1[key], list) and isinstance(obj2[key], list): + lst1 = [json_fallback(x) for x in obj1[key]] + lst2 = [json_fallback(x) for x in obj2[key]] + if set(lst1) != set(lst2): + errors.append("(%s) List mismatch in key `%s`.\n [old] %s\n [new] %s" % ( + chain_name, key, set(lst1), set(lst2))) + else: + if json_fallback(obj1[key]) != json_fallback(obj2[key]): + errors.append("(%s) Data mismatch in key `%s`.\n [old] %s\n [new] %s" % ( + chain_name, key, json_fallback(obj1[key]), json_fallback(obj2[key]))) + + return errors + if "all" in args.target: - targets = os.listdir("test/data") + targets = os.listdir("test/data") else: - targets = args.target + targets = args.target targets.sort() if args.mode[0] == "run": - times_default = [] - times_normalized = [] - errors = False - suites = [] - for target in targets: - try: - with codecs.open(os.path.join("test/data", target), "r") as f: - data = f.read().split("\n--\n") - except IOError as e: - sys.stderr.write("Invalid domain %(domain)s specified. No test case or base data exists.\n" % {"domain": target}) - errors = True - continue - except UnicodeDecodeError: - try: - # Try cp1252 (ufpa.br uses that) - with codecs.open(os.path.join("test/data", target), "r", 'cp1252') as f: - data = f.read().split("\n--\n") - except UnicodeDecodeError as e: - # Fall back to trying all registered codecs - data = read_encoded_file(os.path.join("test/data", target)).split("\n--\n") - try: - with codecs.open(os.path.join("test/target_default", target), "r") as f: - default = f.read() - with codecs.open(os.path.join("test/target_normalized", target), "r") as f: - normalized = f.read() - except IOError as e: - sys.stderr.write("Missing target data for domain %(domain)s. Run `./test.py update %(domain)s` to correct this, after verifying that pythonwhois can correctly parse this particular domain.\n" % {"domain": target}) - errors = True - continue - - suites.append((target, data, default, normalized)) - - if errors: - exit(1) - - total_errors = 0 - total_failed = 0 - total_passed = 0 - done = 1 - total = len(suites) * 2 - for target, data, target_default, target_normalized in suites: - for normalization in (True, []): - start_time = time.time() - parsed = pythonwhois.parse.parse_raw_whois(data, normalized=normalization) - time_taken = (time.time() - start_time) * 1000 # in ms - parsed = json.loads(encoded_json_dumps(parsed)) # Stupid Unicode hack - - if normalization == True: - target_data = json.loads(target_normalized) - else: - target_data = json.loads(target_default) - - errors = recursive_compare(target_data, parsed, chain=["root"]) - - if normalization == True: - mode ="normalized" - else: - mode ="default" - - progress_prefix = "[%s/%s] " % (str(done).rjust(len(str(total))), str(total).rjust(len(str(total)))) - - if len(errors) == 0: - sys.stdout.write(OK) - sys.stdout.write(progress_prefix + "%s passed in %s mode.\n" % (target, mode)) - sys.stderr.write(ENDC) - if normalization == True: - times_normalized.append(time_taken) - else: - times_default.append(time_taken) - total_passed += 1 - else: - sys.stderr.write(FAIL) - sys.stderr.write(progress_prefix + "%s TEST CASE FAILED, ERRORS BELOW\n" % target) - sys.stderr.write("Mode: %s\n" % mode) - sys.stderr.write("=======================================\n") - for error in errors: - unicode_stderr.write(error + "\n") - sys.stderr.write("=======================================\n") - sys.stderr.write(ENDC) - total_errors += len(errors) - total_failed += 1 - done += 1 - - if len(times_default) > 0: - average_default = int(sum(times_default) / float(len(times_default))) - min_default = min(times_default) - max_default = max(times_default) - sys.stdout.write("Timing in default mode: %dms avg, %dms min, %dms max\n" % (average_default, min_default, max_default)) - - if len(times_normalized) > 0: - average_normalized = int(sum(times_normalized) / float(len(times_normalized))) - min_normalized = min(times_normalized) - max_normalized = max(times_normalized) - sys.stdout.write("Timing in normalized mode: %dms avg, %dms min, %dms max\n" % (average_normalized, min_normalized, max_normalized)) - - if total_failed == 0: - sys.stdout.write(OK) - sys.stdout.write("All tests passed!\n") - sys.stderr.write(ENDC) - else: - sys.stdout.write(FAIL) - sys.stdout.write("%d tests failed, %d errors in total.\n" % (total_failed, total_errors)) - sys.stderr.write(ENDC) - exit(1) - - + times_default = [] + times_normalized = [] + errors = False + suites = [] + for target in targets: + try: + with codecs.open(os.path.join("test/data", target), "r") as f: + data = f.read().split("\n--\n") + except IOError as e: + sys.stderr.write( + "Invalid domain %(domain)s specified. No test case or base data exists.\n" % {"domain": target}) + errors = True + continue + except UnicodeDecodeError: + try: + # Try cp1252 (ufpa.br uses that) + with codecs.open(os.path.join("test/data", target), "r", 'cp1252') as f: + data = f.read().split("\n--\n") + except UnicodeDecodeError as e: + # Fall back to trying all registered codecs + data = read_encoded_file(os.path.join("test/data", target)).split("\n--\n") + try: + with codecs.open(os.path.join("test/target_default", target), "r") as f: + default = f.read() + with codecs.open(os.path.join("test/target_normalized", target), "r") as f: + normalized = f.read() + except IOError as e: + sys.stderr.write( + "Missing target data for domain %(domain)s. Run `./test.py update %(domain)s` to correct this, after verifying that pythonwhois can correctly parse this particular domain.\n" % { + "domain": target}) + errors = True + continue + + suites.append((target, data, default, normalized)) + + if errors: + exit(1) + + total_errors = 0 + total_failed = 0 + total_passed = 0 + done = 1 + total = len(suites) * 2 + for target, data, target_default, target_normalized in suites: + for normalization in (True, []): + start_time = time.time() + parsed = pythonwhois.parse.parse_raw_whois(data, normalized=normalization) + time_taken = (time.time() - start_time) * 1000 # in ms + parsed = json.loads(encoded_json_dumps(parsed)) # Stupid Unicode hack + + if normalization == True: + target_data = json.loads(target_normalized) + else: + target_data = json.loads(target_default) + + errors = recursive_compare(target_data, parsed, chain=["root"]) + + if normalization == True: + mode = "normalized" + else: + mode = "default" + + progress_prefix = "[%s/%s] " % (str(done).rjust(len(str(total))), str(total).rjust(len(str(total)))) + + if len(errors) == 0: + sys.stdout.write(OK) + sys.stdout.write(progress_prefix + "%s passed in %s mode.\n" % (target, mode)) + sys.stderr.write(ENDC) + if normalization == True: + times_normalized.append(time_taken) + else: + times_default.append(time_taken) + total_passed += 1 + else: + sys.stderr.write(FAIL) + sys.stderr.write(progress_prefix + "%s TEST CASE FAILED, ERRORS BELOW\n" % target) + sys.stderr.write("Mode: %s\n" % mode) + sys.stderr.write("=======================================\n") + for error in errors: + unicode_stderr.write(error + "\n") + sys.stderr.write("=======================================\n") + sys.stderr.write(ENDC) + total_errors += len(errors) + total_failed += 1 + done += 1 + + if len(times_default) > 0: + average_default = int(sum(times_default) / float(len(times_default))) + min_default = min(times_default) + max_default = max(times_default) + sys.stdout.write( + "Timing in default mode: %dms avg, %dms min, %dms max\n" % (average_default, min_default, max_default)) + + if len(times_normalized) > 0: + average_normalized = int(sum(times_normalized) / float(len(times_normalized))) + min_normalized = min(times_normalized) + max_normalized = max(times_normalized) + sys.stdout.write("Timing in normalized mode: %dms avg, %dms min, %dms max\n" % ( + average_normalized, min_normalized, max_normalized)) + + if total_failed == 0: + sys.stdout.write(OK) + sys.stdout.write("All tests passed!\n") + sys.stderr.write(ENDC) + else: + sys.stdout.write(FAIL) + sys.stdout.write("%d tests failed, %d errors in total.\n" % (total_failed, total_errors)) + sys.stderr.write(ENDC) + exit(1) + + elif args.mode[0] == "update": - errors = False - updates = [] - for target in targets: - try: - with codecs.open(os.path.join("test/data", target), "r") as f: - data = f.read().split("\n--\n") - updates.append((target, data)) - except IOError as e: - sys.stderr.write("Invalid domain %(domain)s specified. No base data exists.\n" % {"domain": target}) - errors = True - continue - - if errors: - exit(1) - - for target, data in updates: - default = pythonwhois.parse.parse_raw_whois(data) - normalized = pythonwhois.parse.parse_raw_whois(data, normalized=True) - with codecs.open(os.path.join("test/target_default", target), "w") as f: - f.write(encoded_json_dumps(default)) - with codecs.open(os.path.join("test/target_normalized", target), "w") as f: - f.write(encoded_json_dumps(normalized)) - print("Generated target data for %s." % target) + errors = False + updates = [] + for target in targets: + try: + with codecs.open(os.path.join("test/data", target), "r") as f: + data = f.read().split("\n--\n") + updates.append((target, data)) + except IOError as e: + sys.stderr.write("Invalid domain %(domain)s specified. No base data exists.\n" % {"domain": target}) + errors = True + continue + + if errors: + exit(1) + + for target, data in updates: + default = pythonwhois.parse.parse_raw_whois(data) + normalized = pythonwhois.parse.parse_raw_whois(data, normalized=True) + with codecs.open(os.path.join("test/target_default", target), "w") as f: + f.write(encoded_json_dumps(default)) + with codecs.open(os.path.join("test/target_normalized", target), "w") as f: + f.write(encoded_json_dumps(normalized)) + print("Generated target data for %s." % target) diff --git a/test/data/google.durban b/test/data/google.durban new file mode 100644 index 0000000..de5e3e3 --- /dev/null +++ b/test/data/google.durban @@ -0,0 +1,85 @@ +Domain Name: google.durban +Domain ID: dom_P--1 +WHOIS Server: durban-whois11.dns.net.za + +Updated Date: 2014-12-01T06:00:03Z +Creation Date: 2014-09-30T22:00:03Z +Registry Expiry Date: 2017-09-30T22:00:03Z +Sponsoring Registrar: MarkMonitor +Sponsoring Registrar IANA ID: 292 +Domain Status: clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited +Domain Status: clientTransferProhibited https://icann.org/epp#clientTransferProhibited +Domain Status: clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited + +Registrant ID: mmr-87489 +Registrant Name: Google Inc. +Registrant Organization: +Registrant Street: 1600 Amphitheatre Parkway +Registrant City: Mountain View +Registrant State/Province: CA +Registrant Postal Code: 94043 +Registrant Country: US +Registrant Phone: +1.6502530000 +Registrant Phone Ext: +Registrant Fax: +1.6502530001 +Registrant Fax Ext: +Registrant Email: dns-admin@google.com + +Admin ID: mmr-87489 +Admin Name: Google Inc. +Admin Organization: +Admin Street: 1600 Amphitheatre Parkway +Admin City: Mountain View +Admin State/Province: CA +Admin Postal Code: 94043 +Admin Country: US +Admin Phone: +1.6502530000 +Admin Phone Ext: +Admin Fax: +1.6502530001 +Admin Fax Ext: +Admin Email: dns-admin@google.com + +Billing ID: mmr-87489 +Billing Name: Google Inc. +Billing Organization: +Billing Street: 1600 Amphitheatre Parkway +Billing City: Mountain View +Billing State/Province: CA +Billing Postal Code: 94043 +Billing Country: US +Billing Phone: +1.6502530000 +Billing Phone Ext: +Billing Fax: +1.6502530001 +Billing Fax Ext: +Billing Email: dns-admin@google.com + +Tech ID: mmr-87489 +Tech Name: Google Inc. +Tech Organization: +Tech Street: 1600 Amphitheatre Parkway +Tech City: Mountain View +Tech State/Province: CA +Tech Postal Code: 94043 +Tech Country: US +Tech Phone: +1.6502530000 +Tech Phone Ext: +Tech Fax: +1.6502530001 +Tech Fax Ext: +Tech Email: dns-admin@google.com + + +Name Server: ns2.google.com +Name Server: ns4.google.com +Name Server: ns3.google.com +Name Server: ns1.google.com +DNSSEC: unsigned +>>> Last update of WHOIS database: 2016-06-06T08:13:55Z <<< + +# WHOIS lookup made at 2016-06-06T08:13:55Z +# -- +# For more information on Whois status codes, please visit https://icann.org/epp +# +# The use of this Whois facility is subject to the following terms and +# conditions. https://registry.net.za/whois_terms +# Copyright (c) ZACR 1995-2016 + diff --git a/test/data/google.eu b/test/data/google.eu new file mode 100644 index 0000000..fd7c926 --- /dev/null +++ b/test/data/google.eu @@ -0,0 +1,66 @@ +% The WHOIS service offered by EURid and the access to the records +% in the EURid WHOIS database are provided for information purposes +% only. It allows persons to check whether a specific domain name +% is still available or not and to obtain information related to +% the registration records of existing domain names. +% +% EURid cannot, under any circumstances, be held liable in case the +% stored information would prove to be wrong, incomplete or not +% accurate in any sense. +% +% By submitting a query you agree not to use the information made +% available to: +% +% - allow, enable or otherwise support the transmission of unsolicited, +% commercial advertising or other solicitations whether via email or +% otherwise; +% - target advertising in any possible way; +% +% - to cause nuisance in any possible way to the registrants by sending +% (whether by automated, electronic processes capable of enabling +% high volumes or other possible means) messages to them. +% +% Without prejudice to the above, it is explicitly forbidden to extract, +% copy and/or use or re-utilise in any form and by any means +% (electronically or not) the whole or a quantitatively or qualitatively +% substantial part of the contents of the WHOIS database without prior +% and explicit permission by EURid, nor in any attempt hereof, to apply +% automated, electronic processes to EURid (or its systems). +% +% You agree that any reproduction and/or transmission of data for +% commercial purposes will always be considered as the extraction of a +% substantial part of the content of the WHOIS database. +% +% By submitting the query you agree to abide by this policy and accept +% that EURid can take measures to limit the use of its WHOIS services +% in order to protect the privacy of its registrants or the integrity +% of the database. +% +% The EURid WHOIS service on port 43 (textual whois) never +% discloses any information concerning the registrant. +% Registrant and onsite contact information can be obtained through use of the +% webbased whois service available from the EURid website www.eurid.eu +% +% WHOIS google.eu +Domain: google.eu + +Registrant: + NOT DISCLOSED! + Visit www.eurid.eu for webbased whois. + +Onsite(s): + NOT DISCLOSED! + Visit www.eurid.eu for webbased whois. + +Registrar: + Name: MarkMonitor Inc. + Website: www.markmonitor.com + +Name servers: + ns3.google.com + ns4.google.com + ns1.google.com + ns2.google.com + +Please visit www.eurid.eu for more info. + diff --git a/test/data/google.luxury b/test/data/google.luxury new file mode 100644 index 0000000..9c85371 --- /dev/null +++ b/test/data/google.luxury @@ -0,0 +1,68 @@ +Domain Name: google.luxury +Domain ID: D9E3E900552D34B54856B0DAB0A9286FD-ARI +WHOIS Server: +Referral URL: https://www.markmonitor.com/ +Updated Date: 2016-03-15T10:22:05Z +Creation Date: 2014-04-11T10:46:41Z +Registry Expiry Date: 2017-04-11T10:46:41Z +Sponsoring Registrar: MarkMonitor Inc +Sponsoring Registrar IANA ID: 292 +Domain Status: clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited +Domain Status: clientTransferProhibited https://icann.org/epp#clientTransferProhibited +Domain Status: clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited +Registrant ID: CDE1C33C5B2534A75AC903887FF4F6063-ARI +Registrant Name: DNS Admin +Registrant Organization: Google Inc. +Registrant Street: 1600 Amphitheatre Parkway +Registrant Street: +Registrant Street: +Registrant City: Mountain View +Registrant State/Province: CA +Registrant Postal Code: 94043 +Registrant Country: US +Registrant Phone: +1.6502530000 +Registrant Phone Ext: +Registrant Fax: +1.6502530001 +Registrant Fax Ext: +Registrant Email: dns-admin@google.com +Admin ID: CDE1C33C5B2534A75AC903887FF4F6063-ARI +Admin Name: DNS Admin +Admin Organization: Google Inc. +Admin Street: 1600 Amphitheatre Parkway +Admin Street: +Admin Street: +Admin City: Mountain View +Admin State/Province: CA +Admin Postal Code: 94043 +Admin Country: US +Admin Phone: +1.6502530000 +Admin Phone Ext: +Admin Fax: +1.6502530001 +Admin Fax Ext: +Admin Email: dns-admin@google.com +Tech ID: CDE1C33C5B2534A75AC903887FF4F6063-ARI +Tech Name: DNS Admin +Tech Organization: Google Inc. +Tech Street: 1600 Amphitheatre Parkway +Tech Street: +Tech Street: +Tech City: Mountain View +Tech State/Province: CA +Tech Postal Code: 94043 +Tech Country: US +Tech Phone: +1.6502530000 +Tech Phone Ext: +Tech Fax: +1.6502530001 +Tech Fax Ext: +Tech Email: dns-admin@google.com +Name Server: ns4.googledomains.com +Name Server: ns2.googledomains.com +Name Server: ns3.googledomains.com +Name Server: ns1.googledomains.com +DNSSEC: unsigned +>>> Last update of WHOIS database: 2016-06-06T08:01:55Z <<< + +For more information on Whois status codes, please visit https://icann.org/epp + +This is the future of Luxury. .LUXURY is the new generic top level domain whose mission is to provide a dedicated digital platform for all things luxury. Designed to meet the distinct needs of the luxury industry, .LUXURY offers product manufacturers, service providers, retailers and consumers a central place online to engage, transact and celebrate. .LUXURY provides a truly comprehensive online platform - one never before available in the luxury market. We've been expecting you. + diff --git a/test/data/noir.haus b/test/data/noir.haus new file mode 100644 index 0000000..1d389eb --- /dev/null +++ b/test/data/noir.haus @@ -0,0 +1,126 @@ +Domain Name: noir.haus +Registry Domain ID: ce0a53bd44534f7aa261a54f8817c7ca-rside +Registrar WHOIS Server: whois.1and1.com +Registrar URL: http://1and1.com +Updated Date: 2015-11-23T00:11:30.987Z +Creation Date: 2014-11-23T00:11:16.903Z +Registrar Registration Expiration Date: 2016-11-23T00:11:16.903Z +Registrar: 1&1 Internet SE +Registrar IANA ID: 83 +Registrar Abuse Contact Email: abuse@1and1.com +Registrar Abuse Contact Phone: +1.8774612631 +Reseller: +Domain Status: clientTransferProhibited https://www.icann.org/epp#clientTransferProhibited +Domain Status: autoRenewPeriod https://www.icann.org/epp#autoRenewPeriod +Registry Registrant ID: 2s356hx7ryj +Registrant Name: Andres Flores +Registrant Organization: Noir Haus Media, inc. +Registrant Street: 4106 N 22nd St #2 +Registrant Street: +Registrant City: McAllen +Registrant State/Province: TX +Registrant Postal Code: 78504 +Registrant Country: US +Registrant Phone: +281.2450277 +Registrant Phone Ext: +Registrant Fax: +Registrant Fax Ext: +Registrant Email: sirEmmett@icloud.com +Registry Admin ID: 2s356mv337a +Admin Name: Andres Flores +Admin Organization: Noir Haus Media, inc. +Admin Street: 4106 N 22nd St #2 +Admin Street: +Admin City: McAllen +Admin State/Province: TX +Admin Postal Code: 78504 +Admin Country: US +Admin Phone: +281.2450277 +Admin Phone Ext: +Admin Fax: +Admin Fax Ext: +Admin Email: sirEmmett@icloud.com +Registry Tech ID: 2s356jjywoa +Tech Name: Andres Flores +Tech Organization: Noir Haus Media, inc. +Tech Street: 4106 N 22nd St #2 +Tech Street: +Tech City: McAllen +Tech State/Province: TX +Tech Postal Code: 78504 +Tech Country: US +Tech Phone: +281.2450277 +Tech Phone Ext: +Tech Fax: +Tech Fax Ext: +Tech Email: sirEmmett@icloud.com +Nameserver: ns-us.1and1-dns.com +Nameserver: ns-us.1and1-dns.de +Nameserver: ns-us.1and1-dns.org +Nameserver: ns-us.1and1-dns.us +DNSSEC: Unsigned +URL of the ICANN WHOIS Data Problem Reporting System: http://wdprs.internic.net/ +>>> Last update of WHOIS database: 2016-06-06T08:08:12Z <<< + +For more information on Whois status codes, please visit https://icann.org/epp +-- +Domain Name: noir.haus +Domain ID: ce0a53bd44534f7aa261a54f8817c7ca-RSIDE +WHOIS Server: whois.schlund.info +Referral URL: http://1and1.com +Updated Date: 2016-01-07T00:11:45Z +Creation Date: 2014-11-23T00:11:16Z +Registry Expiry Date: 2016-11-23T00:11:16Z +Sponsoring Registrar: 1&1 Internet AG +Sponsoring Registrar IANA ID: 83 +Domain Status: clientTransferProhibited https://icann.org/epp#clientTransferProhibited +Registrant ID: 2s356hx7ryj +Registrant Name: Andres Flores +Registrant Organization: Noir Haus Media, inc. +Registrant Street: 4106 N 22nd St #2 +Registrant City: McAllen +Registrant State/Province: TX +Registrant Postal Code: 78504 +Registrant Country: US +Registrant Phone: +281.2450277 +Registrant Phone Ext: +Registrant Fax: +Registrant Fax Ext: +Registrant Email: sirEmmett@icloud.com +Admin ID: 2s356mv337a +Admin Name: Andres Flores +Admin Organization: Noir Haus Media, inc. +Admin Street: 4106 N 22nd St #2 +Admin City: McAllen +Admin State/Province: TX +Admin Postal Code: 78504 +Admin Country: US +Admin Phone: +281.2450277 +Admin Phone Ext: +Admin Fax: +Admin Fax Ext: +Admin Email: sirEmmett@icloud.com +Tech ID: 2s356jjywoa +Tech Name: Andres Flores +Tech Organization: Noir Haus Media, inc. +Tech Street: 4106 N 22nd St #2 +Tech City: McAllen +Tech State/Province: TX +Tech Postal Code: 78504 +Tech Country: US +Tech Phone: +281.2450277 +Tech Phone Ext: +Tech Fax: +Tech Fax Ext: +Tech Email: sirEmmett@icloud.com +Name Server: ns-us.1and1-dns.org +Name Server: ns-us.1and1-dns.us +Name Server: ns-us.1and1-dns.com +Name Server: ns-us.1and1-dns.de +DNSSEC: unsigned +>>> Last update of WHOIS database: 2016-06-06T08:11:11Z <<< + +For more information on Whois status codes, please visit https://icann.org/epp + +Terms of Use: Users accessing the United TLD WHOIS service agree to use the data only for lawful purposes, and under no circumstances may this data be used to: Allow, enable, or otherwise support the transmission by e-mail, telephone, or facsimile of mass unsolicited, commercial advertising or solicitations to entities other than the registrar's own existing customers. Enable high volume, automated, electronic processes that send queries or data to the systems of United TLD or any ICANN-accredited registrar, except as reasonably necessary to register domain names or modify existing registrations. When using the United TLD Whois service, please consider the following: The Whois service is not a replacement for standard EPP commands to the SRS service. Whois is not considered authoritative for registered domain objects. The Whois service may be scheduled for downtime during production or OT&E maintenance periods. Queries to the Whois services are throttled. If too many queries are received from a single IP address within a specified time, the service will begin to reject further queries for a period of time to prevent disruption of Whois service access. Abuse of the Whois system through data mining is mitigated by detecting and limiting bulk query access from single sources. + diff --git a/test/target_default/google.durban b/test/target_default/google.durban new file mode 100644 index 0000000..3b16a81 --- /dev/null +++ b/test/target_default/google.durban @@ -0,0 +1 @@ +{"status": ["clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited", "clientTransferProhibited https://icann.org/epp#clientTransferProhibited", "clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited"], "updated_date": ["2014-12-01T06:00:03"], "contacts": {"admin": {"city": "Mountain View", "fax": "+1.6502530001", "handle": "mmr-87489", "name": "Google Inc.", "phone": "+1.6502530000", "state": "CA", "street": "1600 Amphitheatre Parkway", "country": "US", "postalcode": "94043", "email": "dns-admin@google.com"}, "tech": {"city": "Mountain View", "fax": "+1.6502530001", "handle": "mmr-87489", "name": "Google Inc.", "phone": "+1.6502530000", "state": "CA", "street": "1600 Amphitheatre Parkway", "country": "US", "postalcode": "94043", "email": "dns-admin@google.com"}, "registrant": {"city": "Mountain View", "fax": "+1.6502530001", "handle": "mmr-87489", "name": "Google Inc.", "phone": "+1.6502530000", "state": "CA", "street": "1600 Amphitheatre Parkway", "country": "US", "postalcode": "94043", "email": "dns-admin@google.com"}, "billing": {"city": "Mountain View", "fax": "+1.6502530001", "handle": "mmr-87489", "name": "Google Inc.", "phone": "+1.6502530000", "state": "CA", "street": "1600 Amphitheatre Parkway", "country": "US", "postalcode": "94043", "email": "dns-admin@google.com"}}, "nameservers": ["ns2.google.com", "ns4.google.com", "ns3.google.com", "ns1.google.com"], "expiration_date": ["2017-09-30T22:00:03"], "raw": ["Domain Name: google.durban\nDomain ID: dom_P--1\nWHOIS Server: durban-whois11.dns.net.za\n\nUpdated Date: 2014-12-01T06:00:03Z\nCreation Date: 2014-09-30T22:00:03Z\nRegistry Expiry Date: 2017-09-30T22:00:03Z\nSponsoring Registrar: MarkMonitor\nSponsoring Registrar IANA ID: 292\nDomain Status: clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited\nDomain Status: clientTransferProhibited https://icann.org/epp#clientTransferProhibited\nDomain Status: clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited\n\nRegistrant ID: mmr-87489\nRegistrant Name: Google Inc.\nRegistrant Organization: \nRegistrant Street: 1600 Amphitheatre Parkway \nRegistrant City: Mountain View\nRegistrant State/Province: CA\nRegistrant Postal Code: 94043\nRegistrant Country: US\nRegistrant Phone: +1.6502530000\nRegistrant Phone Ext: \nRegistrant Fax: +1.6502530001\nRegistrant Fax Ext: \nRegistrant Email: dns-admin@google.com\n\nAdmin ID: mmr-87489\nAdmin Name: Google Inc.\nAdmin Organization: \nAdmin Street: 1600 Amphitheatre Parkway \nAdmin City: Mountain View\nAdmin State/Province: CA\nAdmin Postal Code: 94043\nAdmin Country: US\nAdmin Phone: +1.6502530000\nAdmin Phone Ext: \nAdmin Fax: +1.6502530001\nAdmin Fax Ext: \nAdmin Email: dns-admin@google.com\n\nBilling ID: mmr-87489\nBilling Name: Google Inc.\nBilling Organization: \nBilling Street: 1600 Amphitheatre Parkway \nBilling City: Mountain View\nBilling State/Province: CA\nBilling Postal Code: 94043\nBilling Country: US\nBilling Phone: +1.6502530000\nBilling Phone Ext: \nBilling Fax: +1.6502530001\nBilling Fax Ext: \nBilling Email: dns-admin@google.com\n\nTech ID: mmr-87489\nTech Name: Google Inc.\nTech Organization: \nTech Street: 1600 Amphitheatre Parkway \nTech City: Mountain View\nTech State/Province: CA\nTech Postal Code: 94043\nTech Country: US\nTech Phone: +1.6502530000\nTech Phone Ext: \nTech Fax: +1.6502530001\nTech Fax Ext: \nTech Email: dns-admin@google.com\n\n\nName Server: ns2.google.com\nName Server: ns4.google.com\nName Server: ns3.google.com\nName Server: ns1.google.com\nDNSSEC: unsigned\n>>> Last update of WHOIS database: 2016-06-06T08:13:55Z <<<\n\n# WHOIS lookup made at 2016-06-06T08:13:55Z\n# --\n# For more information on Whois status codes, please visit https://icann.org/epp\n#\n# The use of this Whois facility is subject to the following terms and\n# conditions. https://registry.net.za/whois_terms\n# Copyright (c) ZACR 1995-2016\n\n"], "whois_server": ["durban-whois11.dns.net.za"], "registrar": ["MarkMonitor"], "creation_date": ["2014-09-30T22:00:03"], "id": ["dom_P--1"]} \ No newline at end of file diff --git a/test/target_default/google.eu b/test/target_default/google.eu new file mode 100644 index 0000000..5c43ef2 --- /dev/null +++ b/test/target_default/google.eu @@ -0,0 +1 @@ +{"nameservers": ["ns3.google.com", "ns4.google.com", "ns1.google.com", "ns2.google.com"], "raw": ["% The WHOIS service offered by EURid and the access to the records\n% in the EURid WHOIS database are provided for information purposes\n% only. It allows persons to check whether a specific domain name\n% is still available or not and to obtain information related to\n% the registration records of existing domain names.\n%\n% EURid cannot, under any circumstances, be held liable in case the\n% stored information would prove to be wrong, incomplete or not\n% accurate in any sense.\n%\n% By submitting a query you agree not to use the information made\n% available to:\n%\n% - allow, enable or otherwise support the transmission of unsolicited,\n% commercial advertising or other solicitations whether via email or\n% otherwise;\n% - target advertising in any possible way;\n%\n% - to cause nuisance in any possible way to the registrants by sending\n% (whether by automated, electronic processes capable of enabling\n% high volumes or other possible means) messages to them.\n%\n% Without prejudice to the above, it is explicitly forbidden to extract,\n% copy and/or use or re-utilise in any form and by any means\n% (electronically or not) the whole or a quantitatively or qualitatively\n% substantial part of the contents of the WHOIS database without prior\n% and explicit permission by EURid, nor in any attempt hereof, to apply\n% automated, electronic processes to EURid (or its systems).\n%\n% You agree that any reproduction and/or transmission of data for\n% commercial purposes will always be considered as the extraction of a\n% substantial part of the content of the WHOIS database.\n%\n% By submitting the query you agree to abide by this policy and accept\n% that EURid can take measures to limit the use of its WHOIS services\n% in order to protect the privacy of its registrants or the integrity\n% of the database.\n%\n% The EURid WHOIS service on port 43 (textual whois) never\n% discloses any information concerning the registrant.\n% Registrant and onsite contact information can be obtained through use of the\n% webbased whois service available from the EURid website www.eurid.eu\n%\n% WHOIS google.eu\nDomain: google.eu\n\nRegistrant:\n NOT DISCLOSED!\n Visit www.eurid.eu for webbased whois.\n\nOnsite(s):\n NOT DISCLOSED!\n Visit www.eurid.eu for webbased whois.\n\nRegistrar:\n Name: MarkMonitor Inc.\n Website: www.markmonitor.com\n\nName servers:\n ns3.google.com\n ns4.google.com\n ns1.google.com\n ns2.google.com\n\nPlease visit www.eurid.eu for more info.\n\n"], "registrar": ["MarkMonitor Inc."], "contacts": {"admin": null, "tech": null, "registrant": null, "billing": null}} \ No newline at end of file diff --git a/test/target_default/google.luxury b/test/target_default/google.luxury new file mode 100644 index 0000000..c112faf --- /dev/null +++ b/test/target_default/google.luxury @@ -0,0 +1 @@ +{"status": ["clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited", "clientTransferProhibited https://icann.org/epp#clientTransferProhibited", "clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited"], "updated_date": ["2016-03-15T10:22:05"], "contacts": {"admin": {"city": "Mountain View", "fax": "+1.6502530001", "street": "1600 Amphitheatre Parkway", "handle": "CDE1C33C5B2534A75AC903887FF4F6063-ARI", "name": "DNS Admin", "country": "US", "organization": "Google Inc.", "postal": "94043", "email": "dns-admin@google.com", "phone": "+1.6502530000"}, "tech": {"city": "Mountain View", "fax": "+1.6502530001", "street": "1600 Amphitheatre Parkway", "handle": "CDE1C33C5B2534A75AC903887FF4F6063-ARI", "name": "DNS Admin", "country": "US", "organization": "Google Inc.", "postal": "94043", "email": "dns-admin@google.com", "phone": "+1.6502530000"}, "registrant": {"city": "Mountain View", "fax": "+1.6502530001", "street": "1600 Amphitheatre Parkway", "handle": "CDE1C33C5B2534A75AC903887FF4F6063-ARI", "name": "DNS Admin", "country": "US", "organization": "Google Inc.", "postal": "94043", "email": "dns-admin@google.com", "phone": "+1.6502530000"}, "billing": null}, "nameservers": ["ns4.googledomains.com", "ns2.googledomains.com", "ns3.googledomains.com", "ns1.googledomains.com"], "expiration_date": ["2017-04-11T10:46:41"], "raw": ["Domain Name: google.luxury\nDomain ID: D9E3E900552D34B54856B0DAB0A9286FD-ARI\nWHOIS Server:\nReferral URL: https://www.markmonitor.com/\nUpdated Date: 2016-03-15T10:22:05Z\nCreation Date: 2014-04-11T10:46:41Z\nRegistry Expiry Date: 2017-04-11T10:46:41Z\nSponsoring Registrar: MarkMonitor Inc\nSponsoring Registrar IANA ID: 292\nDomain Status: clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited\nDomain Status: clientTransferProhibited https://icann.org/epp#clientTransferProhibited\nDomain Status: clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited\nRegistrant ID: CDE1C33C5B2534A75AC903887FF4F6063-ARI\nRegistrant Name: DNS Admin\nRegistrant Organization: Google Inc.\nRegistrant Street: 1600 Amphitheatre Parkway\nRegistrant Street:\nRegistrant Street:\nRegistrant City: Mountain View\nRegistrant State/Province: CA\nRegistrant Postal Code: 94043\nRegistrant Country: US\nRegistrant Phone: +1.6502530000\nRegistrant Phone Ext:\nRegistrant Fax: +1.6502530001\nRegistrant Fax Ext:\nRegistrant Email: dns-admin@google.com\nAdmin ID: CDE1C33C5B2534A75AC903887FF4F6063-ARI\nAdmin Name: DNS Admin\nAdmin Organization: Google Inc.\nAdmin Street: 1600 Amphitheatre Parkway\nAdmin Street:\nAdmin Street:\nAdmin City: Mountain View\nAdmin State/Province: CA\nAdmin Postal Code: 94043\nAdmin Country: US\nAdmin Phone: +1.6502530000\nAdmin Phone Ext:\nAdmin Fax: +1.6502530001\nAdmin Fax Ext:\nAdmin Email: dns-admin@google.com\nTech ID: CDE1C33C5B2534A75AC903887FF4F6063-ARI\nTech Name: DNS Admin\nTech Organization: Google Inc.\nTech Street: 1600 Amphitheatre Parkway\nTech Street:\nTech Street:\nTech City: Mountain View\nTech State/Province: CA\nTech Postal Code: 94043\nTech Country: US\nTech Phone: +1.6502530000\nTech Phone Ext:\nTech Fax: +1.6502530001\nTech Fax Ext:\nTech Email: dns-admin@google.com\nName Server: ns4.googledomains.com\nName Server: ns2.googledomains.com\nName Server: ns3.googledomains.com\nName Server: ns1.googledomains.com\nDNSSEC: unsigned\n>>> Last update of WHOIS database: 2016-06-06T08:01:55Z <<<\n\nFor more information on Whois status codes, please visit https://icann.org/epp\n\nThis is the future of Luxury. .LUXURY is the new generic top level domain whose mission is to provide a dedicated digital platform for all things luxury. Designed to meet the distinct needs of the luxury industry, .LUXURY offers product manufacturers, service providers, retailers and consumers a central place online to engage, transact and celebrate. .LUXURY provides a truly comprehensive online platform - one never before available in the luxury market. We've been expecting you.\n\n"], "registrar": ["MarkMonitor Inc"], "creation_date": ["2014-04-11T10:46:41"], "id": ["D9E3E900552D34B54856B0DAB0A9286FD-ARI"]} \ No newline at end of file diff --git a/test/target_default/noir.haus b/test/target_default/noir.haus new file mode 100644 index 0000000..418d7b4 --- /dev/null +++ b/test/target_default/noir.haus @@ -0,0 +1 @@ +{"status": ["clientTransferProhibited https://www.icann.org/epp#clientTransferProhibited", "autoRenewPeriod https://www.icann.org/epp#autoRenewPeriod"], "updated_date": ["2015-11-23T00:11:30"], "contacts": {"admin": {"city": "McAllen", "handle": "2s356mv337a", "name": "Andres Flores", "phone": "+281.2450277", "state": "TX", "street": "4106 N 22nd St #2", "country": "US", "postalcode": "78504", "organization": "Noir Haus Media, inc.", "email": "sirEmmett@icloud.com"}, "tech": {"city": "McAllen", "handle": "2s356jjywoa", "name": "Andres Flores", "phone": "+281.2450277", "state": "TX", "street": "4106 N 22nd St #2", "country": "US", "postalcode": "78504", "organization": "Noir Haus Media, inc.", "email": "sirEmmett@icloud.com"}, "registrant": {"city": "McAllen", "handle": "2s356hx7ryj", "name": "Andres Flores", "phone": "+281.2450277", "state": "TX", "street": "4106 N 22nd St #2", "country": "US", "postalcode": "78504", "organization": "Noir Haus Media, inc.", "email": "sirEmmett@icloud.com"}, "billing": null}, "nameservers": ["ns-us.1and1-dns.com", "ns-us.1and1-dns.de", "ns-us.1and1-dns.org", "ns-us.1and1-dns.us"], "expiration_date": ["2016-11-23T00:11:16"], "emails": ["abuse@1and1.com"], "raw": ["Domain Name: noir.haus\nRegistry Domain ID: ce0a53bd44534f7aa261a54f8817c7ca-rside\nRegistrar WHOIS Server: whois.1and1.com\nRegistrar URL: http://1and1.com\nUpdated Date: 2015-11-23T00:11:30.987Z\nCreation Date: 2014-11-23T00:11:16.903Z\nRegistrar Registration Expiration Date: 2016-11-23T00:11:16.903Z\nRegistrar: 1&1 Internet SE\nRegistrar IANA ID: 83\nRegistrar Abuse Contact Email: abuse@1and1.com\nRegistrar Abuse Contact Phone: +1.8774612631\nReseller: \nDomain Status: clientTransferProhibited https://www.icann.org/epp#clientTransferProhibited\nDomain Status: autoRenewPeriod https://www.icann.org/epp#autoRenewPeriod\nRegistry Registrant ID: 2s356hx7ryj\nRegistrant Name: Andres Flores\nRegistrant Organization: Noir Haus Media, inc.\nRegistrant Street: 4106 N 22nd St #2\nRegistrant Street: \nRegistrant City: McAllen\nRegistrant State/Province: TX\nRegistrant Postal Code: 78504\nRegistrant Country: US\nRegistrant Phone: +281.2450277\nRegistrant Phone Ext: \nRegistrant Fax: \nRegistrant Fax Ext: \nRegistrant Email: sirEmmett@icloud.com\nRegistry Admin ID: 2s356mv337a\nAdmin Name: Andres Flores\nAdmin Organization: Noir Haus Media, inc.\nAdmin Street: 4106 N 22nd St #2\nAdmin Street: \nAdmin City: McAllen\nAdmin State/Province: TX\nAdmin Postal Code: 78504\nAdmin Country: US\nAdmin Phone: +281.2450277\nAdmin Phone Ext: \nAdmin Fax: \nAdmin Fax Ext: \nAdmin Email: sirEmmett@icloud.com\nRegistry Tech ID: 2s356jjywoa\nTech Name: Andres Flores\nTech Organization: Noir Haus Media, inc.\nTech Street: 4106 N 22nd St #2\nTech Street: \nTech City: McAllen\nTech State/Province: TX\nTech Postal Code: 78504\nTech Country: US\nTech Phone: +281.2450277\nTech Phone Ext: \nTech Fax: \nTech Fax Ext: \nTech Email: sirEmmett@icloud.com\nNameserver: ns-us.1and1-dns.com\nNameserver: ns-us.1and1-dns.de\nNameserver: ns-us.1and1-dns.org\nNameserver: ns-us.1and1-dns.us\nDNSSEC: Unsigned\nURL of the ICANN WHOIS Data Problem Reporting System: http://wdprs.internic.net/\n>>> Last update of WHOIS database: 2016-06-06T08:08:12Z <<<\n\nFor more information on Whois status codes, please visit https://icann.org/epp", "Domain Name: noir.haus\nDomain ID: ce0a53bd44534f7aa261a54f8817c7ca-RSIDE\nWHOIS Server: whois.schlund.info\nReferral URL: http://1and1.com\nUpdated Date: 2016-01-07T00:11:45Z\nCreation Date: 2014-11-23T00:11:16Z\nRegistry Expiry Date: 2016-11-23T00:11:16Z\nSponsoring Registrar: 1&1 Internet AG\nSponsoring Registrar IANA ID: 83\nDomain Status: clientTransferProhibited https://icann.org/epp#clientTransferProhibited\nRegistrant ID: 2s356hx7ryj\nRegistrant Name: Andres Flores\nRegistrant Organization: Noir Haus Media, inc.\nRegistrant Street: 4106 N 22nd St #2\nRegistrant City: McAllen\nRegistrant State/Province: TX\nRegistrant Postal Code: 78504\nRegistrant Country: US\nRegistrant Phone: +281.2450277\nRegistrant Phone Ext: \nRegistrant Fax: \nRegistrant Fax Ext: \nRegistrant Email: sirEmmett@icloud.com\nAdmin ID: 2s356mv337a\nAdmin Name: Andres Flores\nAdmin Organization: Noir Haus Media, inc.\nAdmin Street: 4106 N 22nd St #2\nAdmin City: McAllen\nAdmin State/Province: TX\nAdmin Postal Code: 78504\nAdmin Country: US\nAdmin Phone: +281.2450277\nAdmin Phone Ext: \nAdmin Fax: \nAdmin Fax Ext: \nAdmin Email: sirEmmett@icloud.com\nTech ID: 2s356jjywoa\nTech Name: Andres Flores\nTech Organization: Noir Haus Media, inc.\nTech Street: 4106 N 22nd St #2\nTech City: McAllen\nTech State/Province: TX\nTech Postal Code: 78504\nTech Country: US\nTech Phone: +281.2450277\nTech Phone Ext: \nTech Fax: \nTech Fax Ext: \nTech Email: sirEmmett@icloud.com\nName Server: ns-us.1and1-dns.org\nName Server: ns-us.1and1-dns.us\nName Server: ns-us.1and1-dns.com\nName Server: ns-us.1and1-dns.de\nDNSSEC: unsigned\n>>> Last update of WHOIS database: 2016-06-06T08:11:11Z <<<\n\nFor more information on Whois status codes, please visit https://icann.org/epp\n\nTerms of Use: Users accessing the United TLD WHOIS service agree to use the data only for lawful purposes, and under no circumstances may this data be used to: Allow, enable, or otherwise support the transmission by e-mail, telephone, or facsimile of mass unsolicited, commercial advertising or solicitations to entities other than the registrar's own existing customers. Enable high volume, automated, electronic processes that send queries or data to the systems of United TLD or any ICANN-accredited registrar, except as reasonably necessary to register domain names or modify existing registrations. When using the United TLD Whois service, please consider the following: The Whois service is not a replacement for standard EPP commands to the SRS service. Whois is not considered authoritative for registered domain objects. The Whois service may be scheduled for downtime during production or OT&E maintenance periods. Queries to the Whois services are throttled. If too many queries are received from a single IP address within a specified time, the service will begin to reject further queries for a period of time to prevent disruption of Whois service access. Abuse of the Whois system through data mining is mitigated by detecting and limiting bulk query access from single sources.\n\n"], "whois_server": ["whois.1and1.com"], "registrar": ["1&1 Internet SE"], "creation_date": ["2014-11-23T00:11:16"], "id": ["ce0a53bd44534f7aa261a54f8817c7ca-rside"]} \ No newline at end of file diff --git a/test/target_normalized/google.durban b/test/target_normalized/google.durban new file mode 100644 index 0000000..a903409 --- /dev/null +++ b/test/target_normalized/google.durban @@ -0,0 +1 @@ +{"status": ["clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited", "clientTransferProhibited https://icann.org/epp#clientTransferProhibited", "clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited"], "updated_date": ["2014-12-01T06:00:03"], "contacts": {"admin": {"city": "Mountain View", "fax": "+1.6502530001", "handle": "mmr-87489", "phone": "+1.6502530000", "state": "California", "street": "1600 Amphitheatre Parkway", "country": "United States", "postalcode": "94043", "organization": "Google Inc.", "email": "dns-admin@google.com"}, "tech": {"city": "Mountain View", "fax": "+1.6502530001", "handle": "mmr-87489", "phone": "+1.6502530000", "state": "California", "street": "1600 Amphitheatre Parkway", "country": "United States", "postalcode": "94043", "organization": "Google Inc.", "email": "dns-admin@google.com"}, "registrant": {"city": "Mountain View", "fax": "+1.6502530001", "handle": "mmr-87489", "phone": "+1.6502530000", "state": "California", "street": "1600 Amphitheatre Parkway", "country": "United States", "postalcode": "94043", "organization": "Google Inc.", "email": "dns-admin@google.com"}, "billing": {"city": "Mountain View", "fax": "+1.6502530001", "handle": "mmr-87489", "phone": "+1.6502530000", "state": "California", "street": "1600 Amphitheatre Parkway", "country": "United States", "postalcode": "94043", "organization": "Google Inc.", "email": "dns-admin@google.com"}}, "nameservers": ["ns2.google.com", "ns4.google.com", "ns3.google.com", "ns1.google.com"], "expiration_date": ["2017-09-30T22:00:03"], "raw": ["Domain Name: google.durban\nDomain ID: dom_P--1\nWHOIS Server: durban-whois11.dns.net.za\n\nUpdated Date: 2014-12-01T06:00:03Z\nCreation Date: 2014-09-30T22:00:03Z\nRegistry Expiry Date: 2017-09-30T22:00:03Z\nSponsoring Registrar: MarkMonitor\nSponsoring Registrar IANA ID: 292\nDomain Status: clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited\nDomain Status: clientTransferProhibited https://icann.org/epp#clientTransferProhibited\nDomain Status: clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited\n\nRegistrant ID: mmr-87489\nRegistrant Name: Google Inc.\nRegistrant Organization: \nRegistrant Street: 1600 Amphitheatre Parkway \nRegistrant City: Mountain View\nRegistrant State/Province: CA\nRegistrant Postal Code: 94043\nRegistrant Country: US\nRegistrant Phone: +1.6502530000\nRegistrant Phone Ext: \nRegistrant Fax: +1.6502530001\nRegistrant Fax Ext: \nRegistrant Email: dns-admin@google.com\n\nAdmin ID: mmr-87489\nAdmin Name: Google Inc.\nAdmin Organization: \nAdmin Street: 1600 Amphitheatre Parkway \nAdmin City: Mountain View\nAdmin State/Province: CA\nAdmin Postal Code: 94043\nAdmin Country: US\nAdmin Phone: +1.6502530000\nAdmin Phone Ext: \nAdmin Fax: +1.6502530001\nAdmin Fax Ext: \nAdmin Email: dns-admin@google.com\n\nBilling ID: mmr-87489\nBilling Name: Google Inc.\nBilling Organization: \nBilling Street: 1600 Amphitheatre Parkway \nBilling City: Mountain View\nBilling State/Province: CA\nBilling Postal Code: 94043\nBilling Country: US\nBilling Phone: +1.6502530000\nBilling Phone Ext: \nBilling Fax: +1.6502530001\nBilling Fax Ext: \nBilling Email: dns-admin@google.com\n\nTech ID: mmr-87489\nTech Name: Google Inc.\nTech Organization: \nTech Street: 1600 Amphitheatre Parkway \nTech City: Mountain View\nTech State/Province: CA\nTech Postal Code: 94043\nTech Country: US\nTech Phone: +1.6502530000\nTech Phone Ext: \nTech Fax: +1.6502530001\nTech Fax Ext: \nTech Email: dns-admin@google.com\n\n\nName Server: ns2.google.com\nName Server: ns4.google.com\nName Server: ns3.google.com\nName Server: ns1.google.com\nDNSSEC: unsigned\n>>> Last update of WHOIS database: 2016-06-06T08:13:55Z <<<\n\n# WHOIS lookup made at 2016-06-06T08:13:55Z\n# --\n# For more information on Whois status codes, please visit https://icann.org/epp\n#\n# The use of this Whois facility is subject to the following terms and\n# conditions. https://registry.net.za/whois_terms\n# Copyright (c) ZACR 1995-2016\n\n"], "whois_server": ["durban-whois11.dns.net.za"], "registrar": ["MarkMonitor"], "creation_date": ["2014-09-30T22:00:03"], "id": ["dom_P--1"]} \ No newline at end of file diff --git a/test/target_normalized/google.eu b/test/target_normalized/google.eu new file mode 100644 index 0000000..5c43ef2 --- /dev/null +++ b/test/target_normalized/google.eu @@ -0,0 +1 @@ +{"nameservers": ["ns3.google.com", "ns4.google.com", "ns1.google.com", "ns2.google.com"], "raw": ["% The WHOIS service offered by EURid and the access to the records\n% in the EURid WHOIS database are provided for information purposes\n% only. It allows persons to check whether a specific domain name\n% is still available or not and to obtain information related to\n% the registration records of existing domain names.\n%\n% EURid cannot, under any circumstances, be held liable in case the\n% stored information would prove to be wrong, incomplete or not\n% accurate in any sense.\n%\n% By submitting a query you agree not to use the information made\n% available to:\n%\n% - allow, enable or otherwise support the transmission of unsolicited,\n% commercial advertising or other solicitations whether via email or\n% otherwise;\n% - target advertising in any possible way;\n%\n% - to cause nuisance in any possible way to the registrants by sending\n% (whether by automated, electronic processes capable of enabling\n% high volumes or other possible means) messages to them.\n%\n% Without prejudice to the above, it is explicitly forbidden to extract,\n% copy and/or use or re-utilise in any form and by any means\n% (electronically or not) the whole or a quantitatively or qualitatively\n% substantial part of the contents of the WHOIS database without prior\n% and explicit permission by EURid, nor in any attempt hereof, to apply\n% automated, electronic processes to EURid (or its systems).\n%\n% You agree that any reproduction and/or transmission of data for\n% commercial purposes will always be considered as the extraction of a\n% substantial part of the content of the WHOIS database.\n%\n% By submitting the query you agree to abide by this policy and accept\n% that EURid can take measures to limit the use of its WHOIS services\n% in order to protect the privacy of its registrants or the integrity\n% of the database.\n%\n% The EURid WHOIS service on port 43 (textual whois) never\n% discloses any information concerning the registrant.\n% Registrant and onsite contact information can be obtained through use of the\n% webbased whois service available from the EURid website www.eurid.eu\n%\n% WHOIS google.eu\nDomain: google.eu\n\nRegistrant:\n NOT DISCLOSED!\n Visit www.eurid.eu for webbased whois.\n\nOnsite(s):\n NOT DISCLOSED!\n Visit www.eurid.eu for webbased whois.\n\nRegistrar:\n Name: MarkMonitor Inc.\n Website: www.markmonitor.com\n\nName servers:\n ns3.google.com\n ns4.google.com\n ns1.google.com\n ns2.google.com\n\nPlease visit www.eurid.eu for more info.\n\n"], "registrar": ["MarkMonitor Inc."], "contacts": {"admin": null, "tech": null, "registrant": null, "billing": null}} \ No newline at end of file diff --git a/test/target_normalized/google.luxury b/test/target_normalized/google.luxury new file mode 100644 index 0000000..75f2767 --- /dev/null +++ b/test/target_normalized/google.luxury @@ -0,0 +1 @@ +{"status": ["clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited", "clientTransferProhibited https://icann.org/epp#clientTransferProhibited", "clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited"], "updated_date": ["2016-03-15T10:22:05"], "contacts": {"admin": {"city": "Mountain View", "fax": "+1.6502530001", "street": "1600 Amphitheatre Parkway", "handle": "CDE1C33C5B2534A75AC903887FF4F6063-ARI", "name": "DNS Admin", "country": "United States", "organization": "Google Inc.", "postal": "94043", "email": "dns-admin@google.com", "phone": "+1.6502530000"}, "tech": {"city": "Mountain View", "fax": "+1.6502530001", "street": "1600 Amphitheatre Parkway", "handle": "CDE1C33C5B2534A75AC903887FF4F6063-ARI", "name": "DNS Admin", "country": "United States", "organization": "Google Inc.", "postal": "94043", "email": "dns-admin@google.com", "phone": "+1.6502530000"}, "registrant": {"city": "Mountain View", "fax": "+1.6502530001", "street": "1600 Amphitheatre Parkway", "handle": "CDE1C33C5B2534A75AC903887FF4F6063-ARI", "name": "DNS Admin", "country": "United States", "organization": "Google Inc.", "postal": "94043", "email": "dns-admin@google.com", "phone": "+1.6502530000"}, "billing": null}, "nameservers": ["ns4.googledomains.com", "ns2.googledomains.com", "ns3.googledomains.com", "ns1.googledomains.com"], "expiration_date": ["2017-04-11T10:46:41"], "raw": ["Domain Name: google.luxury\nDomain ID: D9E3E900552D34B54856B0DAB0A9286FD-ARI\nWHOIS Server:\nReferral URL: https://www.markmonitor.com/\nUpdated Date: 2016-03-15T10:22:05Z\nCreation Date: 2014-04-11T10:46:41Z\nRegistry Expiry Date: 2017-04-11T10:46:41Z\nSponsoring Registrar: MarkMonitor Inc\nSponsoring Registrar IANA ID: 292\nDomain Status: clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited\nDomain Status: clientTransferProhibited https://icann.org/epp#clientTransferProhibited\nDomain Status: clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited\nRegistrant ID: CDE1C33C5B2534A75AC903887FF4F6063-ARI\nRegistrant Name: DNS Admin\nRegistrant Organization: Google Inc.\nRegistrant Street: 1600 Amphitheatre Parkway\nRegistrant Street:\nRegistrant Street:\nRegistrant City: Mountain View\nRegistrant State/Province: CA\nRegistrant Postal Code: 94043\nRegistrant Country: US\nRegistrant Phone: +1.6502530000\nRegistrant Phone Ext:\nRegistrant Fax: +1.6502530001\nRegistrant Fax Ext:\nRegistrant Email: dns-admin@google.com\nAdmin ID: CDE1C33C5B2534A75AC903887FF4F6063-ARI\nAdmin Name: DNS Admin\nAdmin Organization: Google Inc.\nAdmin Street: 1600 Amphitheatre Parkway\nAdmin Street:\nAdmin Street:\nAdmin City: Mountain View\nAdmin State/Province: CA\nAdmin Postal Code: 94043\nAdmin Country: US\nAdmin Phone: +1.6502530000\nAdmin Phone Ext:\nAdmin Fax: +1.6502530001\nAdmin Fax Ext:\nAdmin Email: dns-admin@google.com\nTech ID: CDE1C33C5B2534A75AC903887FF4F6063-ARI\nTech Name: DNS Admin\nTech Organization: Google Inc.\nTech Street: 1600 Amphitheatre Parkway\nTech Street:\nTech Street:\nTech City: Mountain View\nTech State/Province: CA\nTech Postal Code: 94043\nTech Country: US\nTech Phone: +1.6502530000\nTech Phone Ext:\nTech Fax: +1.6502530001\nTech Fax Ext:\nTech Email: dns-admin@google.com\nName Server: ns4.googledomains.com\nName Server: ns2.googledomains.com\nName Server: ns3.googledomains.com\nName Server: ns1.googledomains.com\nDNSSEC: unsigned\n>>> Last update of WHOIS database: 2016-06-06T08:01:55Z <<<\n\nFor more information on Whois status codes, please visit https://icann.org/epp\n\nThis is the future of Luxury. .LUXURY is the new generic top level domain whose mission is to provide a dedicated digital platform for all things luxury. Designed to meet the distinct needs of the luxury industry, .LUXURY offers product manufacturers, service providers, retailers and consumers a central place online to engage, transact and celebrate. .LUXURY provides a truly comprehensive online platform - one never before available in the luxury market. We've been expecting you.\n\n"], "registrar": ["MarkMonitor Inc"], "creation_date": ["2014-04-11T10:46:41"], "id": ["D9E3E900552D34B54856B0DAB0A9286FD-ARI"]} \ No newline at end of file diff --git a/test/target_normalized/noir.haus b/test/target_normalized/noir.haus new file mode 100644 index 0000000..f54fb60 --- /dev/null +++ b/test/target_normalized/noir.haus @@ -0,0 +1 @@ +{"status": ["clientTransferProhibited https://www.icann.org/epp#clientTransferProhibited", "autoRenewPeriod https://www.icann.org/epp#autoRenewPeriod"], "updated_date": ["2015-11-23T00:11:30"], "contacts": {"admin": {"city": "McAllen", "handle": "2s356mv337a", "name": "Andres Flores", "phone": "+281.2450277", "state": "Texas", "street": "4106 N 22nd St #2", "country": "United States", "postalcode": "78504", "organization": "Noir Haus Media, Inc.", "email": "siremmett@icloud.com"}, "tech": {"city": "McAllen", "handle": "2s356jjywoa", "name": "Andres Flores", "phone": "+281.2450277", "state": "Texas", "street": "4106 N 22nd St #2", "country": "United States", "postalcode": "78504", "organization": "Noir Haus Media, Inc.", "email": "siremmett@icloud.com"}, "registrant": {"city": "McAllen", "handle": "2s356hx7ryj", "name": "Andres Flores", "phone": "+281.2450277", "state": "Texas", "street": "4106 N 22nd St #2", "country": "United States", "postalcode": "78504", "organization": "Noir Haus Media, Inc.", "email": "siremmett@icloud.com"}, "billing": null}, "nameservers": ["ns-us.1and1-dns.com", "ns-us.1and1-dns.de", "ns-us.1and1-dns.org", "ns-us.1and1-dns.us"], "expiration_date": ["2016-11-23T00:11:16"], "emails": ["abuse@1and1.com"], "raw": ["Domain Name: noir.haus\nRegistry Domain ID: ce0a53bd44534f7aa261a54f8817c7ca-rside\nRegistrar WHOIS Server: whois.1and1.com\nRegistrar URL: http://1and1.com\nUpdated Date: 2015-11-23T00:11:30.987Z\nCreation Date: 2014-11-23T00:11:16.903Z\nRegistrar Registration Expiration Date: 2016-11-23T00:11:16.903Z\nRegistrar: 1&1 Internet SE\nRegistrar IANA ID: 83\nRegistrar Abuse Contact Email: abuse@1and1.com\nRegistrar Abuse Contact Phone: +1.8774612631\nReseller: \nDomain Status: clientTransferProhibited https://www.icann.org/epp#clientTransferProhibited\nDomain Status: autoRenewPeriod https://www.icann.org/epp#autoRenewPeriod\nRegistry Registrant ID: 2s356hx7ryj\nRegistrant Name: Andres Flores\nRegistrant Organization: Noir Haus Media, inc.\nRegistrant Street: 4106 N 22nd St #2\nRegistrant Street: \nRegistrant City: McAllen\nRegistrant State/Province: TX\nRegistrant Postal Code: 78504\nRegistrant Country: US\nRegistrant Phone: +281.2450277\nRegistrant Phone Ext: \nRegistrant Fax: \nRegistrant Fax Ext: \nRegistrant Email: sirEmmett@icloud.com\nRegistry Admin ID: 2s356mv337a\nAdmin Name: Andres Flores\nAdmin Organization: Noir Haus Media, inc.\nAdmin Street: 4106 N 22nd St #2\nAdmin Street: \nAdmin City: McAllen\nAdmin State/Province: TX\nAdmin Postal Code: 78504\nAdmin Country: US\nAdmin Phone: +281.2450277\nAdmin Phone Ext: \nAdmin Fax: \nAdmin Fax Ext: \nAdmin Email: sirEmmett@icloud.com\nRegistry Tech ID: 2s356jjywoa\nTech Name: Andres Flores\nTech Organization: Noir Haus Media, inc.\nTech Street: 4106 N 22nd St #2\nTech Street: \nTech City: McAllen\nTech State/Province: TX\nTech Postal Code: 78504\nTech Country: US\nTech Phone: +281.2450277\nTech Phone Ext: \nTech Fax: \nTech Fax Ext: \nTech Email: sirEmmett@icloud.com\nNameserver: ns-us.1and1-dns.com\nNameserver: ns-us.1and1-dns.de\nNameserver: ns-us.1and1-dns.org\nNameserver: ns-us.1and1-dns.us\nDNSSEC: Unsigned\nURL of the ICANN WHOIS Data Problem Reporting System: http://wdprs.internic.net/\n>>> Last update of WHOIS database: 2016-06-06T08:08:12Z <<<\n\nFor more information on Whois status codes, please visit https://icann.org/epp", "Domain Name: noir.haus\nDomain ID: ce0a53bd44534f7aa261a54f8817c7ca-RSIDE\nWHOIS Server: whois.schlund.info\nReferral URL: http://1and1.com\nUpdated Date: 2016-01-07T00:11:45Z\nCreation Date: 2014-11-23T00:11:16Z\nRegistry Expiry Date: 2016-11-23T00:11:16Z\nSponsoring Registrar: 1&1 Internet AG\nSponsoring Registrar IANA ID: 83\nDomain Status: clientTransferProhibited https://icann.org/epp#clientTransferProhibited\nRegistrant ID: 2s356hx7ryj\nRegistrant Name: Andres Flores\nRegistrant Organization: Noir Haus Media, inc.\nRegistrant Street: 4106 N 22nd St #2\nRegistrant City: McAllen\nRegistrant State/Province: TX\nRegistrant Postal Code: 78504\nRegistrant Country: US\nRegistrant Phone: +281.2450277\nRegistrant Phone Ext: \nRegistrant Fax: \nRegistrant Fax Ext: \nRegistrant Email: sirEmmett@icloud.com\nAdmin ID: 2s356mv337a\nAdmin Name: Andres Flores\nAdmin Organization: Noir Haus Media, inc.\nAdmin Street: 4106 N 22nd St #2\nAdmin City: McAllen\nAdmin State/Province: TX\nAdmin Postal Code: 78504\nAdmin Country: US\nAdmin Phone: +281.2450277\nAdmin Phone Ext: \nAdmin Fax: \nAdmin Fax Ext: \nAdmin Email: sirEmmett@icloud.com\nTech ID: 2s356jjywoa\nTech Name: Andres Flores\nTech Organization: Noir Haus Media, inc.\nTech Street: 4106 N 22nd St #2\nTech City: McAllen\nTech State/Province: TX\nTech Postal Code: 78504\nTech Country: US\nTech Phone: +281.2450277\nTech Phone Ext: \nTech Fax: \nTech Fax Ext: \nTech Email: sirEmmett@icloud.com\nName Server: ns-us.1and1-dns.org\nName Server: ns-us.1and1-dns.us\nName Server: ns-us.1and1-dns.com\nName Server: ns-us.1and1-dns.de\nDNSSEC: unsigned\n>>> Last update of WHOIS database: 2016-06-06T08:11:11Z <<<\n\nFor more information on Whois status codes, please visit https://icann.org/epp\n\nTerms of Use: Users accessing the United TLD WHOIS service agree to use the data only for lawful purposes, and under no circumstances may this data be used to: Allow, enable, or otherwise support the transmission by e-mail, telephone, or facsimile of mass unsolicited, commercial advertising or solicitations to entities other than the registrar's own existing customers. Enable high volume, automated, electronic processes that send queries or data to the systems of United TLD or any ICANN-accredited registrar, except as reasonably necessary to register domain names or modify existing registrations. When using the United TLD Whois service, please consider the following: The Whois service is not a replacement for standard EPP commands to the SRS service. Whois is not considered authoritative for registered domain objects. The Whois service may be scheduled for downtime during production or OT&E maintenance periods. Queries to the Whois services are throttled. If too many queries are received from a single IP address within a specified time, the service will begin to reject further queries for a period of time to prevent disruption of Whois service access. Abuse of the Whois system through data mining is mitigated by detecting and limiting bulk query access from single sources.\n\n"], "whois_server": ["whois.1and1.com"], "registrar": ["1&1 Internet SE"], "creation_date": ["2014-11-23T00:11:16"], "id": ["ce0a53bd44534f7aa261a54f8817c7ca-rside"]} \ No newline at end of file diff --git a/test_output.txt b/test_output.txt new file mode 100644 index 0000000..a45e3bf --- /dev/null +++ b/test_output.txt @@ -0,0 +1,447 @@ +[ 1/270] 0007games.com passed in normalized mode. +[ 2/270] 0007games.com passed in default mode. +[ 3/270] 0031fashion.com passed in normalized mode. +[ 4/270] 0031fashion.com passed in default mode. +[ 5/270] 123vitamine.com passed in normalized mode. +[ 6/270] 123vitamine.com passed in default mode. +[ 7/270] 2x4.ru passed in normalized mode. +[ 8/270] 2x4.ru passed in default mode. +[ 9/270] 365calendars.com passed in normalized mode. +[ 10/270] 365calendars.com passed in default mode. +[ 11/270] 9v.lt passed in normalized mode. +[ 12/270] 9v.lt passed in default mode. +[ 13/270] about.museum passed in normalized mode. +[ 14/270] about.museum passed in default mode. +[ 15/270] abouttubes.com passed in normalized mode. +[ 16/270] abouttubes.com passed in default mode. +[ 17/270] actu.org.au TEST CASE FAILED, ERRORS BELOW +Mode: normalized +======================================= +(root -> contacts -> tech) New key present in current data, but missing in previous data: `email` + [+++] visit whois.ausregistry.com.au for web based whois +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `email` + [+++] visit whois.ausregistry.com.au for web based whois +======================================= +[ 18/270] actu.org.au TEST CASE FAILED, ERRORS BELOW +Mode: default +======================================= +(root -> contacts -> tech) New key present in current data, but missing in previous data: `email` + [+++] Visit whois.ausregistry.com.au for Web based WhoIs +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `email` + [+++] Visit whois.ausregistry.com.au for Web based WhoIs +======================================= +[ 19/270] ajp.xyz passed in normalized mode. +[ 20/270] ajp.xyz passed in default mode. +[ 21/270] alibaba.jp passed in normalized mode. +[ 22/270] alibaba.jp passed in default mode. +[ 23/270] alliancefrançaise.nu passed in normalized mode. +[ 24/270] alliancefrançaise.nu passed in default mode. +[ 25/270] anink.com passed in normalized mode. +[ 26/270] anink.com passed in default mode. +[ 27/270] anonne.ws passed in normalized mode. +[ 28/270] anonne.ws passed in default mode. +[ 29/270] anonnews.org passed in normalized mode. +[ 30/270] anonnews.org passed in default mode. +[ 31/270] aol.com passed in normalized mode. +[ 32/270] aol.com passed in default mode. +[ 33/270] apple.ai passed in normalized mode. +[ 34/270] apple.ai passed in default mode. +[ 35/270] aridns.net.au TEST CASE FAILED, ERRORS BELOW +Mode: normalized +======================================= +(root -> contacts -> tech) New key present in current data, but missing in previous data: `email` + [+++] visit whois.ausregistry.com.au for web based whois +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `email` + [+++] visit whois.ausregistry.com.au for web based whois +======================================= +[ 36/270] aridns.net.au TEST CASE FAILED, ERRORS BELOW +Mode: default +======================================= +(root -> contacts -> tech) New key present in current data, but missing in previous data: `email` + [+++] Visit whois.ausregistry.com.au for Web based WhoIs +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `email` + [+++] Visit whois.ausregistry.com.au for Web based WhoIs +======================================= +[ 37/270] arkeysolutions.com passed in normalized mode. +[ 38/270] arkeysolutions.com passed in default mode. +[ 39/270] asiahotel.co.th passed in normalized mode. +[ 40/270] asiahotel.co.th passed in default mode. +[ 41/270] atheme.org passed in normalized mode. +[ 42/270] atheme.org passed in default mode. +[ 43/270] australia.gov.au TEST CASE FAILED, ERRORS BELOW +Mode: normalized +======================================= +(root -> contacts -> tech) New key present in current data, but missing in previous data: `email` + [+++] visit whois.ausregistry.com.au for web based whois +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `email` + [+++] visit whois.ausregistry.com.au for web based whois +======================================= +[ 44/270] australia.gov.au TEST CASE FAILED, ERRORS BELOW +Mode: default +======================================= +(root -> contacts -> tech) New key present in current data, but missing in previous data: `email` + [+++] Visit whois.ausregistry.com.au for Web based WhoIs +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `email` + [+++] Visit whois.ausregistry.com.au for Web based WhoIs +======================================= +[ 45/270] b.ro passed in normalized mode. +[ 46/270] b.ro passed in default mode. +[ 47/270] baligems.co.uk passed in normalized mode. +[ 48/270] baligems.co.uk passed in default mode. +[ 49/270] bidtheatre.com passed in normalized mode. +[ 50/270] bidtheatre.com passed in default mode. +[ 51/270] blackburn.ac.uk passed in normalized mode. +[ 52/270] blackburn.ac.uk passed in default mode. +[ 53/270] bristol.ac.uk passed in normalized mode. +[ 54/270] bristol.ac.uk passed in default mode. +[ 55/270] bts.co.th passed in normalized mode. +[ 56/270] bts.co.th passed in default mode. +[ 57/270] byme.at passed in normalized mode. +[ 58/270] byme.at passed in default mode. +[ 59/270] bäckerei.de passed in normalized mode. +[ 60/270] bäckerei.de passed in default mode. +[ 61/270] communigal.net passed in normalized mode. +[ 62/270] communigal.net passed in default mode. +[ 63/270] cryto.net passed in normalized mode. +[ 64/270] cryto.net passed in default mode. +[ 65/270] daemonrage.net passed in normalized mode. +[ 66/270] daemonrage.net passed in default mode. +[ 67/270] dailym.ai passed in normalized mode. +[ 68/270] dailym.ai passed in default mode. +[ 69/270] davicom.com.tw passed in normalized mode. +[ 70/270] davicom.com.tw passed in default mode. +[ 71/270] defunctkernel.me passed in normalized mode. +[ 72/270] defunctkernel.me passed in default mode. +[ 73/270] direct.gov.uk passed in normalized mode. +[ 74/270] direct.gov.uk passed in default mode. +[ 75/270] dns4pro.com passed in normalized mode. +[ 76/270] dns4pro.com passed in default mode. +[ 77/270] donuts.co passed in normalized mode. +[ 78/270] donuts.co passed in default mode. +[ 79/270] drpciv.biz passed in normalized mode. +[ 80/270] drpciv.biz passed in default mode. +[ 81/270] edis.at passed in normalized mode. +[ 82/270] edis.at passed in default mode. +[ 83/270] engine.com passed in normalized mode. +[ 84/270] engine.com passed in default mode. +[ 85/270] evalsed.info TEST CASE FAILED, ERRORS BELOW +Mode: normalized +======================================= +(root -> contacts) Data mismatch in key `billing`. + [old] None + [new] {u'city': u'Bruxelles', u'fax': u'+32.22355699', u'handle': u'Edns-b3324733', u'name': u'Dulieu Arnaud', u'country': u'Belgium', u'phone': u'+32.25374400', u'street': u'Globe village chaussee Alsemberg 1001', u'organization': u'Mostra s.a.', u'postal': u'1180', u'email': u'webadmin@mostra.com'} +======================================= +[ 86/270] evalsed.info TEST CASE FAILED, ERRORS BELOW +Mode: default +======================================= +(root -> contacts) Data mismatch in key `billing`. + [old] None + [new] {u'city': u'Bruxelles', u'fax': u'+32.22355699', u'handle': u'Edns-b3324733', u'name': u'Dulieu Arnaud', u'country': u'BE', u'phone': u'+32.25374400', u'street': u'Globe village chaussee Alsemberg 1001', u'organization': u'Mostra s.a.', u'postal': u'1180', u'email': u'webadmin@mostra.com'} +======================================= +[ 87/270] example.com passed in normalized mode. +[ 88/270] example.com passed in default mode. +[ 89/270] expopack.com.mx passed in normalized mode. +[ 90/270] expopack.com.mx passed in default mode. +[ 91/270] f63.net passed in normalized mode. +[ 92/270] f63.net passed in default mode. +[ 93/270] formule1fo.com passed in normalized mode. +[ 94/270] formule1fo.com passed in default mode. +[ 95/270] foxiepa.ws passed in normalized mode. +[ 96/270] foxiepa.ws passed in default mode. +[ 97/270] geko.dk passed in normalized mode. +[ 98/270] geko.dk passed in default mode. +[ 99/270] get.moe passed in normalized mode. +[100/270] get.moe passed in default mode. +[101/270] globallatedeals.com passed in normalized mode. +[102/270] globallatedeals.com passed in default mode. +[103/270] globaltravelgroup.com passed in normalized mode. +[104/270] globaltravelgroup.com passed in default mode. +[105/270] google.cn passed in normalized mode. +[106/270] google.cn passed in default mode. +[107/270] google.co.jp passed in normalized mode. +[108/270] google.co.jp passed in default mode. +[109/270] google.co.th passed in normalized mode. +[110/270] google.co.th passed in default mode. +[111/270] google.co.uk passed in normalized mode. +[112/270] google.co.uk passed in default mode. +[113/270] google.com passed in normalized mode. +[114/270] google.com passed in default mode. +[115/270] google.com.tw passed in normalized mode. +[116/270] google.com.tw passed in default mode. +[117/270] google.durban passed in normalized mode. +[118/270] google.durban passed in default mode. +[119/270] google.eu passed in normalized mode. +[120/270] google.eu passed in default mode. +[121/270] google.it passed in normalized mode. +[122/270] google.it passed in default mode. +[123/270] google.luxury passed in normalized mode. +[124/270] google.luxury passed in default mode. +[125/270] hl3.eu passed in normalized mode. +[126/270] hl3.eu passed in default mode. +[127/270] hopjb.eu passed in normalized mode. +[128/270] hopjb.eu passed in default mode. +[129/270] huskeh.net passed in normalized mode. +[130/270] huskeh.net passed in default mode. +[131/270] hyves.nl TEST CASE FAILED, ERRORS BELOW +Mode: normalized +======================================= +(root) List mismatch in key `registrar`. + [old] set([u'Domeinbalie.nl', u'NL Domain Registry']) + [new] set([u'Domeinbalie.nl']) +======================================= +[132/270] hyves.nl TEST CASE FAILED, ERRORS BELOW +Mode: default +======================================= +(root) List mismatch in key `registrar`. + [old] set([u'Domeinbalie.nl', u'NL Domain Registry']) + [new] set([u'Domeinbalie.nl']) +======================================= +[133/270] imperial.ac.uk passed in normalized mode. +[134/270] imperial.ac.uk passed in default mode. +[135/270] ireland.ie passed in normalized mode. +[136/270] ireland.ie passed in default mode. +[137/270] ismtgoxdeadyet.com passed in normalized mode. +[138/270] ismtgoxdeadyet.com passed in default mode. +[139/270] jizzbo.com passed in normalized mode. +[140/270] jizzbo.com passed in default mode. +[141/270] keybase.io passed in normalized mode. +[142/270] keybase.io passed in default mode. +[143/270] linux.conf.au TEST CASE FAILED, ERRORS BELOW +Mode: normalized +======================================= +(root -> contacts -> tech) New key present in current data, but missing in previous data: `email` + [+++] visit whois.ausregistry.com.au for web based whois +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `email` + [+++] visit whois.ausregistry.com.au for web based whois +======================================= +[144/270] linux.conf.au TEST CASE FAILED, ERRORS BELOW +Mode: default +======================================= +(root -> contacts -> tech) New key present in current data, but missing in previous data: `email` + [+++] Visit whois.ausregistry.com.au for Web based WhoIs +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `email` + [+++] Visit whois.ausregistry.com.au for Web based WhoIs +======================================= +[145/270] lowendbox.com passed in normalized mode. +[146/270] lowendbox.com passed in default mode. +[147/270] lowendshare.com passed in normalized mode. +[148/270] lowendshare.com passed in default mode. +[149/270] luka-netconsult.com TEST CASE FAILED, ERRORS BELOW +Mode: normalized +======================================= +(root -> contacts -> admin) New key present in current data, but missing in previous data: `fax` + [+++] +496948000570 +(root -> contacts -> tech) New key present in current data, but missing in previous data: `fax` + [+++] +496948000570 +======================================= +[150/270] luka-netconsult.com TEST CASE FAILED, ERRORS BELOW +Mode: default +======================================= +(root -> contacts -> admin) New key present in current data, but missing in previous data: `fax` + [+++] +496948000570 +(root -> contacts -> tech) New key present in current data, but missing in previous data: `fax` + [+++] +496948000570 +======================================= +[151/270] microsoft.com passed in normalized mode. +[152/270] microsoft.com passed in default mode. +[153/270] mu.oz.au passed in normalized mode. +[154/270] mu.oz.au passed in default mode. +[155/270] nepasituation.com passed in normalized mode. +[156/270] nepasituation.com passed in default mode. +[157/270] nic.buzz passed in normalized mode. +[158/270] nic.buzz passed in default mode. +[159/270] nic.ir passed in normalized mode. +[160/270] nic.ir passed in default mode. +[161/270] nic.ps passed in normalized mode. +[162/270] nic.ps passed in default mode. +[163/270] nic.pw passed in normalized mode. +[164/270] nic.pw passed in default mode. +[165/270] nic.ru passed in normalized mode. +[166/270] nic.ru passed in default mode. +[167/270] noir.haus passed in normalized mode. +[168/270] noir.haus passed in default mode. +[169/270] nominet.org.uk passed in normalized mode. +[170/270] nominet.org.uk passed in default mode. +[171/270] nsa.gov passed in normalized mode. +[172/270] nsa.gov passed in default mode. +[173/270] nttpc.co.jp passed in normalized mode. +[174/270] nttpc.co.jp passed in default mode. +[175/270] nytimes.com passed in normalized mode. +[176/270] nytimes.com passed in default mode. +[177/270] oli.id.au TEST CASE FAILED, ERRORS BELOW +Mode: normalized +======================================= +(root -> contacts -> tech) New key present in current data, but missing in previous data: `email` + [+++] visit whois.ausregistry.com.au for web based whois +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `email` + [+++] visit whois.ausregistry.com.au for web based whois +======================================= +[178/270] oli.id.au TEST CASE FAILED, ERRORS BELOW +Mode: default +======================================= +(root -> contacts -> tech) New key present in current data, but missing in previous data: `email` + [+++] Visit whois.ausregistry.com.au for Web based WhoIs +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `email` + [+++] Visit whois.ausregistry.com.au for Web based WhoIs +======================================= +[179/270] ovh.fr passed in normalized mode. +[180/270] ovh.fr passed in default mode. +[181/270] pcmups.com.tw passed in normalized mode. +[182/270] pcmups.com.tw passed in default mode. +[183/270] pixelmania.asia passed in normalized mode. +[184/270] pixelmania.asia passed in default mode. +[185/270] porn.com.tw passed in normalized mode. +[186/270] porn.com.tw passed in default mode. +[187/270] prq.se passed in normalized mode. +[188/270] prq.se passed in default mode. +[189/270] quadranet.com passed in normalized mode. +[190/270] quadranet.com passed in default mode. +[191/270] realtek.com.tw passed in normalized mode. +[192/270] realtek.com.tw passed in default mode. +[193/270] redd.it passed in normalized mode. +[194/270] redd.it passed in default mode. +[195/270] ricoh.co.th passed in normalized mode. +[196/270] ricoh.co.th passed in default mode. +[197/270] rs.co.th passed in normalized mode. +[198/270] rs.co.th passed in default mode. +[199/270] servequake.com passed in normalized mode. +[200/270] servequake.com passed in default mode. +[201/270] siamparagon.co.th passed in normalized mode. +[202/270] siamparagon.co.th passed in default mode. +[203/270] simpardaz.com passed in normalized mode. +[204/270] simpardaz.com passed in default mode. +[205/270] sina.com.cn passed in normalized mode. +[206/270] sina.com.cn passed in default mode. +[207/270] singularity.fr passed in normalized mode. +[208/270] singularity.fr passed in default mode. +[209/270] starasaservice.com passed in normalized mode. +[210/270] starasaservice.com passed in default mode. +[211/270] starbucks.co.th passed in normalized mode. +[212/270] starbucks.co.th passed in default mode. +[213/270] swisscom.ch passed in normalized mode. +[214/270] swisscom.ch passed in default mode. +[215/270] sydney.edu.au TEST CASE FAILED, ERRORS BELOW +Mode: normalized +======================================= +(root -> contacts -> tech) New key present in current data, but missing in previous data: `email` + [+++] visit whois.ausregistry.com.au for web based whois +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `email` + [+++] visit whois.ausregistry.com.au for web based whois +======================================= +[216/270] sydney.edu.au TEST CASE FAILED, ERRORS BELOW +Mode: default +======================================= +(root -> contacts -> tech) New key present in current data, but missing in previous data: `email` + [+++] Visit whois.ausregistry.com.au for Web based WhoIs +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `email` + [+++] Visit whois.ausregistry.com.au for Web based WhoIs +======================================= +[217/270] tattitude.co.uk passed in normalized mode. +[218/270] tattitude.co.uk passed in default mode. +[219/270] test.de passed in normalized mode. +[220/270] test.de passed in default mode. +[221/270] textfiles.com passed in normalized mode. +[222/270] textfiles.com passed in default mode. +[223/270] the.ai passed in normalized mode. +[224/270] the.ai passed in default mode. +[225/270] theregister.com passed in normalized mode. +[226/270] theregister.com passed in default mode. +[227/270] tip.it passed in normalized mode. +[228/270] tip.it passed in default mode. +[229/270] toyota.co.th passed in normalized mode. +[230/270] toyota.co.th passed in default mode. +[231/270] twitter.com passed in normalized mode. +[232/270] twitter.com passed in default mode. +[233/270] ufpa.br passed in normalized mode. +[234/270] ufpa.br passed in default mode. +[235/270] unwire.hk passed in normalized mode. +[236/270] unwire.hk passed in default mode. +[237/270] urlte.am passed in normalized mode. +[238/270] urlte.am passed in default mode. +[239/270] via.com.tw passed in normalized mode. +[240/270] via.com.tw passed in default mode. +[241/270] vulnweb.com passed in normalized mode. +[242/270] vulnweb.com passed in default mode. +[243/270] wa.us TEST CASE FAILED, ERRORS BELOW +Mode: normalized +======================================= +(root -> contacts -> admin) Key present in previous data, but missing in current data: `facsimile` + [---] +1.5714345758 +(root -> contacts -> admin) New key present in current data, but missing in previous data: `fax` + [+++] +1.5714345758 +(root -> contacts -> tech) Key present in previous data, but missing in current data: `facsimile` + [---] +1.5714345758 +(root -> contacts -> tech) New key present in current data, but missing in previous data: `fax` + [+++] +1.5714345758 +(root -> contacts -> registrant) Key present in previous data, but missing in current data: `facsimile` + [---] +1.5714345758 +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `fax` + [+++] +1.5714345758 +(root -> contacts -> billing) Key present in previous data, but missing in current data: `facsimile` + [---] +1.5714345758 +(root -> contacts -> billing) New key present in current data, but missing in previous data: `fax` + [+++] +1.5714345758 +======================================= +[244/270] wa.us TEST CASE FAILED, ERRORS BELOW +Mode: default +======================================= +(root -> contacts -> admin) Key present in previous data, but missing in current data: `facsimile` + [---] +1.5714345758 +(root -> contacts -> admin) New key present in current data, but missing in previous data: `fax` + [+++] +1.5714345758 +(root -> contacts -> tech) Key present in previous data, but missing in current data: `facsimile` + [---] +1.5714345758 +(root -> contacts -> tech) New key present in current data, but missing in previous data: `fax` + [+++] +1.5714345758 +(root -> contacts -> registrant) Key present in previous data, but missing in current data: `facsimile` + [---] +1.5714345758 +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `fax` + [+++] +1.5714345758 +(root -> contacts -> billing) Key present in previous data, but missing in current data: `facsimile` + [---] +1.5714345758 +(root -> contacts -> billing) New key present in current data, but missing in previous data: `fax` + [+++] +1.5714345758 +======================================= +[245/270] warwick.ac.uk passed in normalized mode. +[246/270] warwick.ac.uk passed in default mode. +[247/270] whirlpool.net.au TEST CASE FAILED, ERRORS BELOW +Mode: normalized +======================================= +(root -> contacts -> tech) New key present in current data, but missing in previous data: `email` + [+++] visit whois.ausregistry.com.au for web based whois +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `email` + [+++] visit whois.ausregistry.com.au for web based whois +======================================= +[248/270] whirlpool.net.au TEST CASE FAILED, ERRORS BELOW +Mode: default +======================================= +(root -> contacts -> tech) New key present in current data, but missing in previous data: `email` + [+++] Visit whois.ausregistry.com.au for Web based WhoIs +(root -> contacts -> registrant) New key present in current data, but missing in previous data: `email` + [+++] Visit whois.ausregistry.com.au for Web based WhoIs +======================================= +[249/270] whois.com passed in normalized mode. +[250/270] whois.com passed in default mode. +[251/270] whois.us passed in normalized mode. +[252/270] whois.us passed in default mode. +[253/270] whoiser.ir passed in normalized mode. +[254/270] whoiser.ir passed in default mode. +[255/270] winamp.com passed in normalized mode. +[256/270] winamp.com passed in default mode. +[257/270] wit.ai passed in normalized mode. +[258/270] wit.ai passed in default mode. +[259/270] wosoccer.com passed in normalized mode. +[260/270] wosoccer.com passed in default mode. +[261/270] x.it passed in normalized mode. +[262/270] x.it passed in default mode. +[263/270] xboxmoments.com passed in normalized mode. +[264/270] xboxmoments.com passed in default mode. +[265/270] yahoo.com.tw passed in normalized mode. +[266/270] yahoo.com.tw passed in default mode. +[267/270] yahoo.it passed in normalized mode. +[268/270] yahoo.it passed in default mode. +[269/270] zem.org.uk passed in normalized mode. +[270/270] zem.org.uk passed in default mode. +Timing in default mode: 16ms avg, 0ms min, 59ms max +Timing in normalized mode: 23ms avg, 0ms min, 71ms max +22 tests failed, 52 errors in total. \ No newline at end of file From 13c0a4af1df1fc4cbab83c6f307a7e9c814f198d Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Wed, 8 Jun 2016 17:16:58 +0200 Subject: [PATCH 03/40] ADD: Caching for WHOIS servers. Only the top level, referred servers are not cached because it would be impossible to know which domains belong to which server FIX: Defaulted normalizing to False, since it seems to be broken. --- pwhois | 5 ++--- pythonwhois/__init__.py | 16 ++++++++++------ pythonwhois/net.py | 19 +++++++++++++++---- pythonwhois/parse.py | 1 + pythonwhois/whois_server_cache.py | 28 ++++++++++++++++++++++++++++ 5 files changed, 56 insertions(+), 13 deletions(-) create mode 100644 pythonwhois/whois_server_cache.py diff --git a/pwhois b/pwhois index 8f92fd3..1e9e5fc 100755 --- a/pwhois +++ b/pwhois @@ -42,11 +42,10 @@ if args.raw == True: print("\n--\n".join([x.encode("utf-8") for x in data])) else: if len(server_list) > 0: - parsed = pythonwhois.parse.parse_raw_whois(data, normalized=True, never_query_handles=False, + parsed = pythonwhois.parse.parse_raw_whois(data, normalized=False, never_query_handles=False, handle_server=server_list[-1]) else: - parsed = pythonwhois.parse.parse_raw_whois(data, normalized=True) - + parsed = pythonwhois.parse.parse_raw_whois(data, normalized=False) if args.json == True: print(json.dumps(parsed, default=json_fallback)) else: diff --git a/pythonwhois/__init__.py b/pythonwhois/__init__.py index 47639c3..dea44ef 100644 --- a/pythonwhois/__init__.py +++ b/pythonwhois/__init__.py @@ -1,11 +1,15 @@ from . import net, parse + def get_whois(domain, normalized=[]): - raw_data, server_list = net.get_whois_raw(domain, with_server_list=True) - # Unlisted handles will be looked up on the last WHOIS server that was queried. This may be changed to also query - # other servers in the future, if it turns out that there are cases where the last WHOIS server in the chain doesn't - # actually hold the handle contact details, but another WHOIS server in the chain does. - return parse.parse_raw_whois(raw_data, normalized=normalized, never_query_handles=False, handle_server=server_list[-1]) + raw_data, server_list = net.get_whois_raw(domain, with_server_list=True) + # Unlisted handles will be looked up on the last WHOIS server that was queried. This may be changed to also query + # other servers in the future, if it turns out that there are cases where the last WHOIS server in the chain doesn't + # actually hold the handle contact details, but another WHOIS server in the chain does. + return parse.parse_raw_whois(raw_data, normalized=normalized, never_query_handles=False, + handle_server=server_list[-1]) + def whois(*args, **kwargs): - raise Exception("The whois() method has been replaced by a different method (with a different API), since pythonwhois 2.0. Either install the older pythonwhois 1.2.3, or change your code to use the new API.") + raise Exception( + "The whois() method has been replaced by a different method (with a different API), since pythonwhois 2.0. Either install the older pythonwhois 1.2.3, or change your code to use the new API.") diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 4d7a2d0..fa47ebb 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -5,8 +5,11 @@ import sys from codecs import encode, decode +from pythonwhois.whois_server_cache import WhoisServerCache from . import shared +server_cache = WhoisServerCache() + def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=False, with_server_list=False, server_list=None): @@ -50,7 +53,13 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals target_server = exc_serv break if is_exception == False: - target_server = get_root_server(domain) + tld = get_tld(domain) + cached_server = server_cache.getServer(tld) + if cached_server is not None: + target_server = cached_server + else: + target_server = get_root_server(domain) + server_cache.putServer(tld, target_server) else: target_server = server if target_server == "whois.jprs.jp": @@ -104,9 +113,11 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals def server_is_alive(server): response = subprocess.call(["ping", "-c 1", "-w2", server], stdout=open(os.devnull, "w"), stderr=subprocess.STDOUT) - if response != 0: - return False - return True + return response == 0 + + +def get_tld(domain): + return domain.split(".")[-1] def get_root_server(domain): diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index 8341257..0013020 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -1219,6 +1219,7 @@ def is_known_abbreviation(word): def has_country(line, country): + print(line) return country in line.lower() diff --git a/pythonwhois/whois_server_cache.py b/pythonwhois/whois_server_cache.py new file mode 100644 index 0000000..ed1d55a --- /dev/null +++ b/pythonwhois/whois_server_cache.py @@ -0,0 +1,28 @@ +import ast +import os + +cache_file_name = "pythonwhois/whois_server_cache.dat" + + +def read_cache(): + if os.path.isfile(cache_file_name): + return ast.literal_eval(open(cache_file_name).read()) + + return {} + + +def write_cache(cache): + cache_file = open(cache_file_name, 'w') + cache_file.write(str(cache)) + + +class WhoisServerCache: + def __init__(self): + self.cache = read_cache() + + def getServer(self, tld): + return self.cache.get(tld) + + def putServer(self, tld, whois_server): + self.cache[tld] = whois_server + write_cache(self.cache) From e103b98c0d322db22f16fbcfab3e11eac4cf9e6b Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Thu, 9 Jun 2016 10:37:59 +0200 Subject: [PATCH 04/40] FIX: Single instance caching --- pythonwhois/caching/__init__.py | 0 pythonwhois/caching/whois_server_cache.dat | 1 + pythonwhois/caching/whois_server_cache.py | 48 ++++++++++++++++++++++ pythonwhois/net.py | 31 +++++++------- pythonwhois/whois_server_cache.py | 28 ------------- 5 files changed, 65 insertions(+), 43 deletions(-) create mode 100644 pythonwhois/caching/__init__.py create mode 100644 pythonwhois/caching/whois_server_cache.dat create mode 100644 pythonwhois/caching/whois_server_cache.py delete mode 100644 pythonwhois/whois_server_cache.py diff --git a/pythonwhois/caching/__init__.py b/pythonwhois/caching/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pythonwhois/caching/whois_server_cache.dat b/pythonwhois/caching/whois_server_cache.dat new file mode 100644 index 0000000..2b22b47 --- /dev/null +++ b/pythonwhois/caching/whois_server_cache.dat @@ -0,0 +1 @@ +{'forex': u'whois.nic.forex', 'haus': u'whois.unitedtld.com', 'eu': u'whois.eu', 'luxury': 'whois.nic.luxury', 'test': 'whois.nic.test', 'com': u'whois.verisign-grs.com'} \ No newline at end of file diff --git a/pythonwhois/caching/whois_server_cache.py b/pythonwhois/caching/whois_server_cache.py new file mode 100644 index 0000000..113bf90 --- /dev/null +++ b/pythonwhois/caching/whois_server_cache.py @@ -0,0 +1,48 @@ +import ast +import os + +cache_file_name = "pythonwhois/caching/whois_server_cache.dat" + + +def read_cache(): + if os.path.isfile(cache_file_name): + return ast.literal_eval(open(cache_file_name).read()) + + return {} + + +def write_cache(cache): + cache_file = open(cache_file_name, 'w') + cache_file.write(str(cache)) + + +class WhoisServerCache: + """ + Cache handler for easy of use. Do not instantiate. import server_cache instead. + Otherwise an inconsistent cache can happen as a result of multiple caches. + """ + + def __init__(self): + self.cache = read_cache() + + def get_server(self, tld): + """ + Get a WHOIS server for a given TLD + :param tld: The TLD to get the WHOIS server for + :return: The WHOIS server if it is known, or None otherwise + """ + return self.cache.get(tld) + + def put_server(self, tld, whois_server): + """ + Store a new WHOIS server in the cache. The cache is then also + written to disk again. Because the WHOIS servers don't change that often, + it simply writes to a file. + :param tld: The TLD to store a WHOIS server for + :param whois_server: The WHOIS server to store + """ + self.cache[tld] = whois_server + write_cache(self.cache) + + +server_cache = WhoisServerCache() diff --git a/pythonwhois/net.py b/pythonwhois/net.py index fa47ebb..a7e515b 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -5,11 +5,9 @@ import sys from codecs import encode, decode -from pythonwhois.whois_server_cache import WhoisServerCache +from pythonwhois.caching.whois_server_cache import server_cache from . import shared -server_cache = WhoisServerCache() - def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=False, with_server_list=False, server_list=None): @@ -54,12 +52,12 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals break if is_exception == False: tld = get_tld(domain) - cached_server = server_cache.getServer(tld) + cached_server = server_cache.get_server(tld) if cached_server is not None: target_server = cached_server else: target_server = get_root_server(domain) - server_cache.putServer(tld, target_server) + server_cache.put_server(tld, target_server) else: target_server = server if target_server == "whois.jprs.jp": @@ -131,13 +129,16 @@ def get_root_server(domain): def whois_request(domain, server, port=43): - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.connect((server, port)) - sock.send(("%s\r\n" % domain).encode("utf-8")) - buff = b"" - while True: - data = sock.recv(1024) - if len(data) == 0: - break - buff += data - return buff.decode("utf-8", "replace") + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect((server, port)) + sock.send(("%s\r\n" % domain).encode("utf-8")) + buff = b"" + while True: + data = sock.recv(1024) + if len(data) == 0: + break + buff += data + return buff.decode("utf-8", "replace") + except Exception: + return "" diff --git a/pythonwhois/whois_server_cache.py b/pythonwhois/whois_server_cache.py deleted file mode 100644 index ed1d55a..0000000 --- a/pythonwhois/whois_server_cache.py +++ /dev/null @@ -1,28 +0,0 @@ -import ast -import os - -cache_file_name = "pythonwhois/whois_server_cache.dat" - - -def read_cache(): - if os.path.isfile(cache_file_name): - return ast.literal_eval(open(cache_file_name).read()) - - return {} - - -def write_cache(cache): - cache_file = open(cache_file_name, 'w') - cache_file.write(str(cache)) - - -class WhoisServerCache: - def __init__(self): - self.cache = read_cache() - - def getServer(self, tld): - return self.cache.get(tld) - - def putServer(self, tld, whois_server): - self.cache[tld] = whois_server - write_cache(self.cache) From 2476e716a9e53fd736067f5e66945816362f2f4d Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Thu, 9 Jun 2016 10:58:53 +0200 Subject: [PATCH 05/40] FIX: Encoding issues. Normalizing is back on. --- pwhois | 4 ++-- pythonwhois/caching/whois_server_cache.dat | 2 +- pythonwhois/parse.py | 5 ++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pwhois b/pwhois index 1e9e5fc..d4074e3 100755 --- a/pwhois +++ b/pwhois @@ -42,10 +42,10 @@ if args.raw == True: print("\n--\n".join([x.encode("utf-8") for x in data])) else: if len(server_list) > 0: - parsed = pythonwhois.parse.parse_raw_whois(data, normalized=False, never_query_handles=False, + parsed = pythonwhois.parse.parse_raw_whois(data, normalized=True, never_query_handles=False, handle_server=server_list[-1]) else: - parsed = pythonwhois.parse.parse_raw_whois(data, normalized=False) + parsed = pythonwhois.parse.parse_raw_whois(data, normalized=True) if args.json == True: print(json.dumps(parsed, default=json_fallback)) else: diff --git a/pythonwhois/caching/whois_server_cache.dat b/pythonwhois/caching/whois_server_cache.dat index 2b22b47..a5607df 100644 --- a/pythonwhois/caching/whois_server_cache.dat +++ b/pythonwhois/caching/whois_server_cache.dat @@ -1 +1 @@ -{'forex': u'whois.nic.forex', 'haus': u'whois.unitedtld.com', 'eu': u'whois.eu', 'luxury': 'whois.nic.luxury', 'test': 'whois.nic.test', 'com': u'whois.verisign-grs.com'} \ No newline at end of file +{'forex': u'whois.nic.forex', 'bo': u'whois.nic.bo', 'haus': u'whois.unitedtld.com', 'eu': u'whois.eu', 'barclays': u'whois.nic.barclays', 'luxury': 'whois.nic.luxury', 'test': 'whois.nic.test', 'com': u'whois.verisign-grs.com'} \ No newline at end of file diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index 0013020..025211c 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -698,7 +698,7 @@ def is_string(data): def filter_characters(string, delete_characters): - return ''.join([char for char in string if char not in delete_characters]) + return ''.join([char for char in string.encode('utf-8') if char not in delete_characters]) def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_server=""): @@ -1219,8 +1219,7 @@ def is_known_abbreviation(word): def has_country(line, country): - print(line) - return country in line.lower() + return country in line.encode('utf-8').lower() def has_incorrect_known_abbreviation(line): From b8fc96b352b81ffcedeb2c5b2b808a49ba78558a Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Thu, 9 Jun 2016 11:02:07 +0200 Subject: [PATCH 06/40] REF: Changed README.md --- README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2ce1b11..7b1a6e1 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ -pythonwhois -=========== +whois-oracle, forked from pythonwhois +===================================== -A WHOIS retrieval and parsing library for Python. +Because it is all knowing! +A WHOIS retrieval and parsing library for Python, forked from pythonwhois +and updated by me. ## Dependencies From 9be54dc63bfa4874c0d1fe546c69df7e52042b13 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Thu, 9 Jun 2016 11:29:01 +0200 Subject: [PATCH 07/40] REF: Renamed some stuff to not confuse this package with pythonwhois. Still compatible though. --- pythonwhois/caching/whois_server_cache.dat | 1 - pythonwhois/caching/whois_server_cache.py | 2 +- setup.py | 18 ++++++++---------- pwhois => whois-oracle | 0 4 files changed, 9 insertions(+), 12 deletions(-) delete mode 100644 pythonwhois/caching/whois_server_cache.dat rename pwhois => whois-oracle (100%) diff --git a/pythonwhois/caching/whois_server_cache.dat b/pythonwhois/caching/whois_server_cache.dat deleted file mode 100644 index a5607df..0000000 --- a/pythonwhois/caching/whois_server_cache.dat +++ /dev/null @@ -1 +0,0 @@ -{'forex': u'whois.nic.forex', 'bo': u'whois.nic.bo', 'haus': u'whois.unitedtld.com', 'eu': u'whois.eu', 'barclays': u'whois.nic.barclays', 'luxury': 'whois.nic.luxury', 'test': 'whois.nic.test', 'com': u'whois.verisign-grs.com'} \ No newline at end of file diff --git a/pythonwhois/caching/whois_server_cache.py b/pythonwhois/caching/whois_server_cache.py index 113bf90..a4cf712 100644 --- a/pythonwhois/caching/whois_server_cache.py +++ b/pythonwhois/caching/whois_server_cache.py @@ -1,7 +1,7 @@ import ast import os -cache_file_name = "pythonwhois/caching/whois_server_cache.dat" +cache_file_name = "pythonwhois/caching/whois_server.cache" def read_cache(): diff --git a/setup.py b/setup.py index 7cc819a..6ee1390 100644 --- a/setup.py +++ b/setup.py @@ -1,16 +1,14 @@ from setuptools import setup -setup(name='pythonwhois', - version='2.4.3', +setup(name='whois-oracle', + version='1.0', description='Module for retrieving and parsing the WHOIS data for a domain. Supports most domains. No dependencies.', - author='Sven Slootweg', - author_email='pythonwhois@cryto.net', - url='http://cryto.net/pythonwhois', - packages=['pythonwhois'], - package_dir={"pythonwhois":"pythonwhois"}, - package_data={"pythonwhois":["*.dat"]}, + author='Sander ten Hoor, original by Sven Slootweg', + url='https://github.com/MasterFenrir/whois-oracle', + packages=['pythonwhois', 'pythonwhois.caching'], + package_data={"pythonwhois": ["*.dat"]}, install_requires=['argparse'], provides=['pythonwhois'], - scripts=["pwhois"], + scripts=["whois-oracle"], license="WTFPL" - ) + ) diff --git a/pwhois b/whois-oracle similarity index 100% rename from pwhois rename to whois-oracle From 5b99c51d1d0cac58ab6ba4f28d51dca5799ec8da Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Thu, 9 Jun 2016 12:01:40 +0200 Subject: [PATCH 08/40] FIX: Cache is now saved in your home folder, OS independent. --- pythonwhois/__init__.py | 7 +------ pythonwhois/caching/whois_server_cache.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pythonwhois/__init__.py b/pythonwhois/__init__.py index dea44ef..bf0c635 100644 --- a/pythonwhois/__init__.py +++ b/pythonwhois/__init__.py @@ -7,9 +7,4 @@ def get_whois(domain, normalized=[]): # other servers in the future, if it turns out that there are cases where the last WHOIS server in the chain doesn't # actually hold the handle contact details, but another WHOIS server in the chain does. return parse.parse_raw_whois(raw_data, normalized=normalized, never_query_handles=False, - handle_server=server_list[-1]) - - -def whois(*args, **kwargs): - raise Exception( - "The whois() method has been replaced by a different method (with a different API), since pythonwhois 2.0. Either install the older pythonwhois 1.2.3, or change your code to use the new API.") + handle_server=server_list[-1]) \ No newline at end of file diff --git a/pythonwhois/caching/whois_server_cache.py b/pythonwhois/caching/whois_server_cache.py index a4cf712..0f2cf89 100644 --- a/pythonwhois/caching/whois_server_cache.py +++ b/pythonwhois/caching/whois_server_cache.py @@ -1,14 +1,19 @@ import ast import os -cache_file_name = "pythonwhois/caching/whois_server.cache" +from os.path import expanduser + +home = expanduser("~") + +cache_file_name = home + "/.whois-oracle/whois_server.cache" def read_cache(): - if os.path.isfile(cache_file_name): + if os.path.exists(os.path.dirname(cache_file_name)): return ast.literal_eval(open(cache_file_name).read()) - - return {} + else: + os.makedirs(os.path.dirname(cache_file_name)) + return {} def write_cache(cache): From 40691778277630c8faa98f6de93723601ae0afcc Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Thu, 9 Jun 2016 15:22:21 +0200 Subject: [PATCH 09/40] REF: No more forced caching, can now be set manually by giving the cache a persistent storage path --- pythonwhois/caching/whois_server_cache.py | 41 +++++++++++++---------- setup.py | 2 +- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/pythonwhois/caching/whois_server_cache.py b/pythonwhois/caching/whois_server_cache.py index 0f2cf89..6193c1b 100644 --- a/pythonwhois/caching/whois_server_cache.py +++ b/pythonwhois/caching/whois_server_cache.py @@ -1,23 +1,18 @@ import ast import os -from os.path import expanduser -home = expanduser("~") - -cache_file_name = home + "/.whois-oracle/whois_server.cache" - - -def read_cache(): - if os.path.exists(os.path.dirname(cache_file_name)): - return ast.literal_eval(open(cache_file_name).read()) +def read_cache(file_path): + if os.path.isfile(file_path): + return ast.literal_eval(open(file_path).read()) else: - os.makedirs(os.path.dirname(cache_file_name)) + if os.path.dirname(file_path): + os.makedirs(os.path.dirname(file_path)) return {} -def write_cache(cache): - cache_file = open(cache_file_name, 'w') +def write_cache(cache, file_path): + cache_file = open(file_path, 'w') cache_file.write(str(cache)) @@ -28,7 +23,9 @@ class WhoisServerCache: """ def __init__(self): - self.cache = read_cache() + self.cache = {} + self.persistent = False + self.file_path = None def get_server(self, tld): """ @@ -40,14 +37,24 @@ def get_server(self, tld): def put_server(self, tld, whois_server): """ - Store a new WHOIS server in the cache. The cache is then also - written to disk again. Because the WHOIS servers don't change that often, - it simply writes to a file. + Store a new WHOIS server in the cache. If the cache is persistent, + it is also written to disk again. Because the WHOIS servers + don't change that often, it simply writes to a file. :param tld: The TLD to store a WHOIS server for :param whois_server: The WHOIS server to store """ self.cache[tld] = whois_server - write_cache(self.cache) + if self.persistent: + write_cache(self.cache, self.file_path) + + def set_persistent_location(self, file_path): + """ + Store the cache in a persistent location + :param file_path: The path to store the cache + """ + self.file_path = file_path + self.cache = read_cache(file_path) + self.persistent = True server_cache = WhoisServerCache() diff --git a/setup.py b/setup.py index 6ee1390..19fa7ff 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup setup(name='whois-oracle', - version='1.0', + version='1.0.2', description='Module for retrieving and parsing the WHOIS data for a domain. Supports most domains. No dependencies.', author='Sander ten Hoor, original by Sven Slootweg', url='https://github.com/MasterFenrir/whois-oracle', From e9201d699bf2068088e537f5ad0fb067d53f70d9 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Thu, 9 Jun 2016 16:46:48 +0200 Subject: [PATCH 10/40] ADD: Cooldown capabilities. --- pythonwhois/caching/whois_server_cache.py | 3 +- pythonwhois/net.py | 11 +++++++- pythonwhois/ratelimit/__init__.py | 0 pythonwhois/ratelimit/cool_down.py | 34 +++++++++++++++++++++++ setup.py | 2 +- 5 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 pythonwhois/ratelimit/__init__.py create mode 100644 pythonwhois/ratelimit/cool_down.py diff --git a/pythonwhois/caching/whois_server_cache.py b/pythonwhois/caching/whois_server_cache.py index 6193c1b..5bdadbc 100644 --- a/pythonwhois/caching/whois_server_cache.py +++ b/pythonwhois/caching/whois_server_cache.py @@ -44,7 +44,7 @@ def put_server(self, tld, whois_server): :param whois_server: The WHOIS server to store """ self.cache[tld] = whois_server - if self.persistent: + if self.file_path is not None: write_cache(self.cache, self.file_path) def set_persistent_location(self, file_path): @@ -54,7 +54,6 @@ def set_persistent_location(self, file_path): """ self.file_path = file_path self.cache = read_cache(file_path) - self.persistent = True server_cache = WhoisServerCache() diff --git a/pythonwhois/net.py b/pythonwhois/net.py index a7e515b..44a5139 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -6,8 +6,11 @@ from codecs import encode, decode from pythonwhois.caching.whois_server_cache import server_cache +from pythonwhois.ratelimit.cool_down import CoolDown from . import shared +cool_down_tracker = CoolDown() + def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=False, with_server_list=False, server_list=None): @@ -68,7 +71,13 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals request_domain = "=%s" % domain # Avoid partial matches else: request_domain = domain - response = whois_request(request_domain, target_server) + + if cool_down_tracker.can_use_server(target_server): + cool_down_tracker.use_server(target_server) + response = whois_request(request_domain, target_server) + else: + response = "" + if never_cut: # If the caller has requested to 'never cut' responses, he will get the original response from the server (this is # useful for callers that are only interested in the raw data). Otherwise, if the target is verisign-grs, we will diff --git a/pythonwhois/ratelimit/__init__.py b/pythonwhois/ratelimit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py new file mode 100644 index 0000000..1a1f4c3 --- /dev/null +++ b/pythonwhois/ratelimit/cool_down.py @@ -0,0 +1,34 @@ +import thread +import threading +import time + +default_cool_down = 10 +cool_down_time = 1 + + +def decrement_thread(cool_down_object): + while True: + time.sleep(cool_down_time) + cool_down_object.decrement_cool_downs() + + +class CoolDown: + def __init__(self): + self.lock = threading.Lock() + self.servers_on_cool_down = {} + thread.start_new_thread(decrement_thread, (self,)) + + def can_use_server(self, whois_server): + with self.lock: + cooldown = self.servers_on_cool_down.get(whois_server) + return cooldown is None or cooldown <= 0 + + def use_server(self, whois_server): + with self.lock: + self.servers_on_cool_down[whois_server] = default_cool_down + print self.servers_on_cool_down + + def decrement_cool_downs(self): + with self.lock: + for server, cool_down in self.servers_on_cool_down.iteritems(): + self.servers_on_cool_down[server] = cool_down - cool_down_time diff --git a/setup.py b/setup.py index 19fa7ff..041e5a9 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup setup(name='whois-oracle', - version='1.0.2', + version='1.0.3', description='Module for retrieving and parsing the WHOIS data for a domain. Supports most domains. No dependencies.', author='Sander ten Hoor, original by Sven Slootweg', url='https://github.com/MasterFenrir/whois-oracle', From 0d1f71d5b41c341f002036dc81db84a663ac1656 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Thu, 9 Jun 2016 17:01:30 +0200 Subject: [PATCH 11/40] ADD: Comments --- README.md | 6 +++--- pythonwhois/ratelimit/cool_down.py | 34 ++++++++++++++++++++++++++---- setup.py | 2 +- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 7b1a6e1..d12f271 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -whois-oracle, forked from pythonwhois -===================================== +The WHOIS Oracle, forked from pythonwhois +========================================= -Because it is all knowing! +Because it is all knowing! A WHOIS retrieval and parsing library for Python, forked from pythonwhois and updated by me. diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index 1a1f4c3..4d447c7 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -2,33 +2,59 @@ import threading import time -default_cool_down = 10 -cool_down_time = 1 +cool_down_start = 1.0 +cool_down_time = 0.5 def decrement_thread(cool_down_object): + """ + After sleeping for cool_down_time, decrement + all cool downs with cool_down_time + :param cool_down_object: + :return: + """ while True: time.sleep(cool_down_time) cool_down_object.decrement_cool_downs() class CoolDown: + """ + Handle the cooldown period for asking a WHOIS server again + """ + def __init__(self): + """ + Creates a dictionary for storing cool downs and starts + a thread for decrementing the cool down values + """ self.lock = threading.Lock() self.servers_on_cool_down = {} thread.start_new_thread(decrement_thread, (self,)) def can_use_server(self, whois_server): + """ + Check whether a server can be used again + :param whois_server: The WHOIS server to check + :return: True if the server can be used, False if not + """ with self.lock: cooldown = self.servers_on_cool_down.get(whois_server) return cooldown is None or cooldown <= 0 def use_server(self, whois_server): + """ + Tell the CoolDown instance that a WHOIS server is going to be used. + The cool down will then be reset + :param whois_server: The WHOIS server that is going to be used + """ with self.lock: - self.servers_on_cool_down[whois_server] = default_cool_down - print self.servers_on_cool_down + self.servers_on_cool_down[whois_server] = cool_down_start def decrement_cool_downs(self): + """ + Decrement all the cool downs with cool_down_time + """ with self.lock: for server, cool_down in self.servers_on_cool_down.iteritems(): self.servers_on_cool_down[server] = cool_down - cool_down_time diff --git a/setup.py b/setup.py index 041e5a9..3a797ac 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ description='Module for retrieving and parsing the WHOIS data for a domain. Supports most domains. No dependencies.', author='Sander ten Hoor, original by Sven Slootweg', url='https://github.com/MasterFenrir/whois-oracle', - packages=['pythonwhois', 'pythonwhois.caching'], + packages=['pythonwhois', 'pythonwhois.caching', 'pythonwhois.caching'], package_data={"pythonwhois": ["*.dat"]}, install_requires=['argparse'], provides=['pythonwhois'], From 0bc8ed5363e647577a49c8708421e3c1bef1e4b7 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Thu, 9 Jun 2016 17:14:54 +0200 Subject: [PATCH 12/40] FIX: Package being packaged in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3a797ac..11b4f08 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ description='Module for retrieving and parsing the WHOIS data for a domain. Supports most domains. No dependencies.', author='Sander ten Hoor, original by Sven Slootweg', url='https://github.com/MasterFenrir/whois-oracle', - packages=['pythonwhois', 'pythonwhois.caching', 'pythonwhois.caching'], + packages=['pythonwhois', 'pythonwhois.caching', 'pythonwhois.ratelimit'], package_data={"pythonwhois": ["*.dat"]}, install_requires=['argparse'], provides=['pythonwhois'], From 08ea92351a632f09c2752776df4bce9453fae1f6 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Fri, 10 Jun 2016 09:59:16 +0200 Subject: [PATCH 13/40] FIX: Package being packaged in setup.py --- pythonwhois/net.py | 1 + pythonwhois/ratelimit/cool_down.py | 80 +++++++++++++++++++++++++++--- 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 44a5139..9201acf 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -10,6 +10,7 @@ from . import shared cool_down_tracker = CoolDown() +cool_down_tracker.start() def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=False, with_server_list=False, diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index 4d447c7..3298c1e 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -2,8 +2,8 @@ import threading import time -cool_down_start = 1.0 -cool_down_time = 0.5 +default_cool_down_length = 1.0 +cool_down_period = 0.5 def decrement_thread(cool_down_object): @@ -14,7 +14,7 @@ def decrement_thread(cool_down_object): :return: """ while True: - time.sleep(cool_down_time) + time.sleep(cool_down_period) cool_down_object.decrement_cool_downs() @@ -25,11 +25,15 @@ class CoolDown: def __init__(self): """ - Creates a dictionary for storing cool downs and starts - a thread for decrementing the cool down values + Creates a dictionary for storing cool downs. """ self.lock = threading.Lock() self.servers_on_cool_down = {} + + def start(self): + """ + Start a thread decrementing all the cool down values. + """ thread.start_new_thread(decrement_thread, (self,)) def can_use_server(self, whois_server): @@ -40,7 +44,7 @@ def can_use_server(self, whois_server): """ with self.lock: cooldown = self.servers_on_cool_down.get(whois_server) - return cooldown is None or cooldown <= 0 + return cooldown is None or cooldown.current_cool_down <= 0 def use_server(self, whois_server): """ @@ -49,7 +53,13 @@ def use_server(self, whois_server): :param whois_server: The WHOIS server that is going to be used """ with self.lock: - self.servers_on_cool_down[whois_server] = cool_down_start + if whois_server not in self.servers_on_cool_down: + self.servers_on_cool_down[whois_server] = CoolDownTracker(default_cool_down_length) + self.servers_on_cool_down[whois_server].use() + print "\n" + for key, value in self.servers_on_cool_down.iteritems(): + print str(key) + " Made requests: " + str(value.request_count) + " Cooldown: " + str( + value.current_cool_down) def decrement_cool_downs(self): """ @@ -57,4 +67,58 @@ def decrement_cool_downs(self): """ with self.lock: for server, cool_down in self.servers_on_cool_down.iteritems(): - self.servers_on_cool_down[server] = cool_down - cool_down_time + self.servers_on_cool_down[server].decrement_cooldown(cool_down_period) + + +class CoolDownTracker: + """ + Keep track of cool down settings for a specific WHOIS server + """ + + def __init__(self, cool_down_length, max_requests_minute=None, max_requests_hour=None, max_requests_day=None): + """ + Create a tracker. It can accept three maximums. When a maximum is reached, it will wait a set amount of time + before trying again, which is a minute, hour and day respectively. + :param cool_down_length: The default length of the cool down + :param max_requests_minute: The maximum number of requests per minute. + :param max_requests_hour: The maximum number of requests per hour + :param max_requests_day: The maximum number of request per day + """ + self.cool_down_length = cool_down_length + self.max_requests_minute = max_requests_minute + self.max_requests_hour = max_requests_hour + self.max_requests_day = max_requests_day + + self.request_count = 0 + self.current_cool_down = 0 + + def use(self): + """ + Tell the tracker that the corresponding server is going to be used. + It will set the cool down, based on the amount of requests that already have been made + """ + self.request_count += 1 + if self.max_requests_reached(self.max_requests_minute): + self.current_cool_down = 60 + elif self.max_requests_reached(self.max_requests_hour): + self.current_cool_down = 3600 + elif self.max_requests_reached(self.max_requests_day): + self.current_cool_down = 86400 + else: + self.current_cool_down = self.cool_down_length + + def decrement_cooldown(self, decrement): + """ + Decrement the current cooldown with the given value, implying + that a given time has passed. + :param decrement: The value to decrement the current cool down value with + """ + self.current_cool_down -= decrement + + def max_requests_reached(self, limit): + """ + Check whether the maximum has been reached for a given limit. + :param limit: The limit that should be checked for + :return: True if the limit has been reached, false if not + """ + return limit is not None and self.request_count % limit == 0 From 4e7048d2c500b09cfced77da48e444d9120cfcda Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Fri, 10 Jun 2016 10:55:30 +0200 Subject: [PATCH 14/40] ADD: Configuration for cool down REF: Setting the cache and the cool down config can be called on the pythonwhois package (__init__.py) --- pythonwhois/__init__.py | 18 ++++++++++++++- pythonwhois/ratelimit/cool_down.py | 36 ++++++++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/pythonwhois/__init__.py b/pythonwhois/__init__.py index bf0c635..f8729e4 100644 --- a/pythonwhois/__init__.py +++ b/pythonwhois/__init__.py @@ -7,4 +7,20 @@ def get_whois(domain, normalized=[]): # other servers in the future, if it turns out that there are cases where the last WHOIS server in the chain doesn't # actually hold the handle contact details, but another WHOIS server in the chain does. return parse.parse_raw_whois(raw_data, normalized=normalized, never_query_handles=False, - handle_server=server_list[-1]) \ No newline at end of file + handle_server=server_list[-1]) + + +def set_persistent_cache(path_to_cache): + """ + Set a persistent cache. If the file does not yet exist, it is created. + :param path_to_cache: The place where the cache is stored or needs to be created + """ + net.server_cache.set_persistent_location(path_to_cache) + + +def set_cool_down_config(path_to_config): + """ + Set a cool down configuration file, describing specific settings for certain WHOIS servers. + :param path_to_config: The path to the configuration file, this needs to exist + """ + net.cool_down_tracker.set_cool_down_config(path_to_config) diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index 3298c1e..0367737 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -1,3 +1,4 @@ +import ConfigParser import thread import threading import time @@ -58,8 +59,13 @@ def use_server(self, whois_server): self.servers_on_cool_down[whois_server].use() print "\n" for key, value in self.servers_on_cool_down.iteritems(): - print str(key) + " Made requests: " + str(value.request_count) + " Cooldown: " + str( - value.current_cool_down) + print key + print "\tMade requests: " + str(value.request_count) + print "\tCurrent cool down: " + str(value.current_cool_down) + print "\tCool down length: " + str(value.cool_down_length) + print "\tRequests per minute: " + str(value.max_requests_minute) + print "\tRequests per hour: " + str(value.max_requests_hour) + print "\tRequests per day: " + str(value.max_requests_day) def decrement_cool_downs(self): """ @@ -69,6 +75,32 @@ def decrement_cool_downs(self): for server, cool_down in self.servers_on_cool_down.iteritems(): self.servers_on_cool_down[server].decrement_cooldown(cool_down_period) + def set_cool_down_config(self, path_to_file): + """ + Tell the CoolDown instance of a configuration file, describing specific settings + for certain WHOIS servers. This configuration will + then be read and inserted into the cool down dictionary. + :param path_to_file: The path to the configuration file + """ + config = ConfigParser.ConfigParser() + config.read(path_to_file) + for domain in config.sections(): + cool_down_length = self.get_from_config(config, domain, "cool_down_length", default_cool_down_length) + max_requests_minute = self.get_from_config(config, domain, "max_requests_minute") + max_requests_hour = self.get_from_config(config, domain, "max_requests_hour") + max_requests_day = self.get_from_config(config, domain, "max_requests_day") + with self.lock: + self.servers_on_cool_down[domain] = CoolDownTracker(cool_down_length, + max_requests_minute, + max_requests_hour, + max_requests_day) + + def get_from_config(self, config, section, key, default=None): + if config.has_option(section, key): + return config.getfloat(section, key) + else: + return default + class CoolDownTracker: """ From 57466bce23a3e3df9b80290cc53805304975fab2 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Fri, 10 Jun 2016 10:58:20 +0200 Subject: [PATCH 15/40] REF: prints REF: Comments --- pythonwhois/ratelimit/cool_down.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index 0367737..6c6934c 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -57,15 +57,6 @@ def use_server(self, whois_server): if whois_server not in self.servers_on_cool_down: self.servers_on_cool_down[whois_server] = CoolDownTracker(default_cool_down_length) self.servers_on_cool_down[whois_server].use() - print "\n" - for key, value in self.servers_on_cool_down.iteritems(): - print key - print "\tMade requests: " + str(value.request_count) - print "\tCurrent cool down: " + str(value.current_cool_down) - print "\tCool down length: " + str(value.cool_down_length) - print "\tRequests per minute: " + str(value.max_requests_minute) - print "\tRequests per hour: " + str(value.max_requests_hour) - print "\tRequests per day: " + str(value.max_requests_day) def decrement_cool_downs(self): """ @@ -96,6 +87,14 @@ def set_cool_down_config(self, path_to_file): max_requests_day) def get_from_config(self, config, section, key, default=None): + """ + Get a value from the config if it exists, otherwise return the default value + :param config: The configuration to get the value from + :param section: The section to get the value from + :param key: The key that may or may not exist + :param default: The default value to return, which is None by default + :return: The value if it exists, else default + """ if config.has_option(section, key): return config.getfloat(section, key) else: From 7d3a8aada271013bf44daa5f0ec2a0c40dab8e7e Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Fri, 10 Jun 2016 11:54:28 +0200 Subject: [PATCH 16/40] REF: Thread is started automatically again --- pythonwhois/net.py | 1 - pythonwhois/ratelimit/cool_down.py | 5 ----- 2 files changed, 6 deletions(-) diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 9201acf..44a5139 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -10,7 +10,6 @@ from . import shared cool_down_tracker = CoolDown() -cool_down_tracker.start() def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=False, with_server_list=False, diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index 6c6934c..80170a8 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -30,11 +30,6 @@ def __init__(self): """ self.lock = threading.Lock() self.servers_on_cool_down = {} - - def start(self): - """ - Start a thread decrementing all the cool down values. - """ thread.start_new_thread(decrement_thread, (self,)) def can_use_server(self, whois_server): From 97c2045be4c165d0c00be4fe7a91c21a7ca9d35c Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Fri, 10 Jun 2016 12:15:57 +0200 Subject: [PATCH 17/40] ADD: The values for the default cool down and the cool down period can now be configured --- pythonwhois/ratelimit/cool_down.py | 59 +++++++++++++++++++----------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index 80170a8..91f3b13 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -3,22 +3,33 @@ import threading import time -default_cool_down_length = 1.0 -cool_down_period = 0.5 - def decrement_thread(cool_down_object): """ After sleeping for cool_down_time, decrement all cool downs with cool_down_time :param cool_down_object: - :return: """ while True: - time.sleep(cool_down_period) + time.sleep(cool_down_object.cool_down_period) cool_down_object.decrement_cool_downs() +def get_from_config(config, section, key, default=None): + """ + Get a value from the config if it exists, otherwise return the default value + :param config: The configuration to get the value from + :param section: The section to get the value from + :param key: The key that may or may not exist + :param default: The default value to return, which is None by default + :return: The value if it exists, else default + """ + if config.has_option(section, key): + return config.getfloat(section, key) + else: + return default + + class CoolDown: """ Handle the cooldown period for asking a WHOIS server again @@ -30,6 +41,8 @@ def __init__(self): """ self.lock = threading.Lock() self.servers_on_cool_down = {} + self.default_cool_down_length = 1.0 + self.cool_down_period = 0.5 thread.start_new_thread(decrement_thread, (self,)) def can_use_server(self, whois_server): @@ -50,7 +63,7 @@ def use_server(self, whois_server): """ with self.lock: if whois_server not in self.servers_on_cool_down: - self.servers_on_cool_down[whois_server] = CoolDownTracker(default_cool_down_length) + self.servers_on_cool_down[whois_server] = CoolDownTracker(self.default_cool_down_length) self.servers_on_cool_down[whois_server].use() def decrement_cool_downs(self): @@ -59,7 +72,7 @@ def decrement_cool_downs(self): """ with self.lock: for server, cool_down in self.servers_on_cool_down.iteritems(): - self.servers_on_cool_down[server].decrement_cooldown(cool_down_period) + self.servers_on_cool_down[server].decrement_cooldown(self.cool_down_period) def set_cool_down_config(self, path_to_file): """ @@ -70,30 +83,32 @@ def set_cool_down_config(self, path_to_file): """ config = ConfigParser.ConfigParser() config.read(path_to_file) + config = self.get_and_remove_defaults_from_config(config) for domain in config.sections(): - cool_down_length = self.get_from_config(config, domain, "cool_down_length", default_cool_down_length) - max_requests_minute = self.get_from_config(config, domain, "max_requests_minute") - max_requests_hour = self.get_from_config(config, domain, "max_requests_hour") - max_requests_day = self.get_from_config(config, domain, "max_requests_day") + cool_down_length = get_from_config(config, domain, "cool_down_length", self.default_cool_down_length) + max_requests_minute = get_from_config(config, domain, "max_requests_minute") + max_requests_hour = get_from_config(config, domain, "max_requests_hour") + max_requests_day = get_from_config(config, domain, "max_requests_day") with self.lock: self.servers_on_cool_down[domain] = CoolDownTracker(cool_down_length, max_requests_minute, max_requests_hour, max_requests_day) - def get_from_config(self, config, section, key, default=None): + def get_and_remove_defaults_from_config(self, config): """ - Get a value from the config if it exists, otherwise return the default value - :param config: The configuration to get the value from - :param section: The section to get the value from - :param key: The key that may or may not exist - :param default: The default value to return, which is None by default - :return: The value if it exists, else default + Gets the general settings from the config. Then removes them + and returns the modified config. + :param config: The config to obtain the default values from + :return: The modified config, without the 'general' section """ - if config.has_option(section, key): - return config.getfloat(section, key) - else: - return default + if config.has_section("general"): + self.default_cool_down_length = get_from_config(config, "general", "default_cool_down_length", + self.default_cool_down_length) + self.cool_down_period = get_from_config(config, "general", "cool_down_period", self.cool_down_period) + config.remove_section("general") + + return config class CoolDownTracker: From 2aa665422b4e6eb15e9b5811b071a8c9e8812b36 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Sun, 12 Jun 2016 17:28:45 +0200 Subject: [PATCH 18/40] REF: Some comments and variable names --- pythonwhois/ratelimit/cool_down.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index 91f3b13..1ac0b59 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -32,12 +32,14 @@ def get_from_config(config, section, key, default=None): class CoolDown: """ - Handle the cooldown period for asking a WHOIS server again + Handle the cool down period for asking a WHOIS server again """ def __init__(self): """ - Creates a dictionary for storing cool downs. + Creates a dictionary for storing cool downs and starts + a new thread to decrement them every time after a set period + of time has passed, which is 0.5 seconds by default. """ self.lock = threading.Lock() self.servers_on_cool_down = {} @@ -72,7 +74,7 @@ def decrement_cool_downs(self): """ with self.lock: for server, cool_down in self.servers_on_cool_down.iteritems(): - self.servers_on_cool_down[server].decrement_cooldown(self.cool_down_period) + self.servers_on_cool_down[server].decrement_cool_down(self.cool_down_period) def set_cool_down_config(self, path_to_file): """ @@ -148,7 +150,7 @@ def use(self): else: self.current_cool_down = self.cool_down_length - def decrement_cooldown(self, decrement): + def decrement_cool_down(self, decrement): """ Decrement the current cooldown with the given value, implying that a given time has passed. From 393f171d283fc5766ad9bde65ca3cb1f6b895d4c Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Sun, 12 Jun 2016 18:07:09 +0200 Subject: [PATCH 19/40] ADD: Explanation to the README.md about caching and cool down --- README.md | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index d12f271..8fbae30 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,34 @@ None! All you need is the Python standard library. The manual (including install instructions) can be found in the doc/ directory. A HTML version is also viewable [here](http://cryto.net/pythonwhois). +## Cache configuration +Using pythonwhois.set_persistent_cache a cache can be set. If a cache is set, +whois-oracle will look there for WHOIS servers for TLD's. For domains with thin +WHOIS servers, only the 'head' WHOIS server is cached, not the referral servers. +Otherwise it would +be impossible to get the correct information because the information for the domain +might not be on that WHOIS server at all. + +## Cool down configuration +This feature is not useful for single lookups, but for bulk this comes in really handy. +Every WHOIS server gets a certain time before it will be asked again, to prevent spamming +and possibly refused connections. This can be configured by passing a configuration file +to pythonwhois.set_cool_down_config. This file can contain the following to elements, but doesn't have to. +`[general]` +`cool_down_period : 0.5` +`default_cool_down_length : 1` +This is the general part. Only one of them should exist. whois-oracle checks +for both these properties, but they are not both necessary. + +`[whois.eu]` +`cool_down_length : 10` +`max_requests_minute : 5` +`max_requests_hour : 20` +`max_requests_day : 50` +This is how sections for specific WHOIS servers are defined. The section +name is the name of the server and the section can contain the listed properties. +None of them are required. Multiple WHOIS servers can be added to the configuration file. + ## Goals * 100% coverage of WHOIS formats. @@ -55,14 +83,6 @@ The manual (including install instructions) can be found in the doc/ directory. Do note that `ipwhois` does not offer a normalization feature, and does not (yet) come with a command-line tool. Additionally, `ipwhois` is maintained by Philip Hane and not by me; please make sure to file bugs relating to it in the `ipwhois` repository, not in that of `pythonwhois`. -## Important update notes - -*2.4.0 and up*: A lot of changes were made to the normalization, and the performance under Python 2.x was significantly improved. The average parsing time under Python 2.7 has dropped by 94% (!), and on my system averages out at 18ms. Performance under Python 3.x is [unchanged](https://github.com/joepie91/python-whois/issues/27). `pythonwhois` will now expand a lot of abbreviations in normalized mode, such as airport codes, ISO country codes, and US/CA/AU state abbreviations. The consequence of this is that the library is now bigger (as it ships a list of these abbreviations). Also note that there *may* be licensing consequences, in particular regarding the airport code database. More information about that can be found below. - -*2.3.0 and up*: Python 3 support was fixed. Creation date parsing for contacts was fixed; correct timestamps will now be returned, rather than unformatted ones - if your application relies on the broken variant, you'll need to change your code. Some additional parameters were added to the `net` and `parse` methods to facilitate NIC handle lookups; the defaults are backwards-compatible, and these changes should not have any consequences for your code. Thai WHOIS parsing was implemented, but is a little spotty - data may occasionally be incorrectly split up. Please submit a bug report if you run across any issues. - -*2.2.0 and up*: The internal workings of `get_whois_raw` have been changed, to better facilitate parsing of WHOIS data from registries that may return multiple partial matches for a query, such as `whois.verisign-grs.com`. This change means that, by default, `get_whois_raw` will now strip out the part of such a response that does not pertain directly to the requested domain. If your application requires an unmodified raw WHOIS response and is calling `get_whois_raw` directly, you should use the new `never_cut` parameter to keep pythonwhois from doing this post-processing. As this is a potentially breaking behaviour change, the minor version has been bumped. - ## It doesn't work! * It doesn't work at all? From 85f8bd2e0a3ef3a8dc00c15930c12fb1f5110db1 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Mon, 13 Jun 2016 08:03:54 +0200 Subject: [PATCH 20/40] FIX: Clarification in the Readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8fbae30..151349d 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ to pythonwhois.set_cool_down_config. This file can contain the following to elem `cool_down_period : 0.5` `default_cool_down_length : 1` This is the general part. Only one of them should exist. whois-oracle checks -for both these properties, but they are not both necessary. +for both these properties, but they are both not necessary. `[whois.eu]` `cool_down_length : 10` From e2d8cddf64128f6338cab3daef9889afd669b971 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Mon, 13 Jun 2016 10:12:59 +0200 Subject: [PATCH 21/40] REF: Placed CoolDownTracker into its own file ENH: More comments REF: Extracted the building of custom CoolDownTrackers according to a config file into its own method ADD: If a WHOIS server is not found, an empty response is given. --- pythonwhois/net.py | 9 +- pythonwhois/ratelimit/cool_down.py | 105 +++++++-------------- pythonwhois/ratelimit/cool_down_tracker.py | 52 ++++++++++ 3 files changed, 91 insertions(+), 75 deletions(-) create mode 100644 pythonwhois/ratelimit/cool_down_tracker.py diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 44a5139..3e3ba57 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -59,8 +59,11 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals if cached_server is not None: target_server = cached_server else: - target_server = get_root_server(domain) - server_cache.put_server(tld, target_server) + try: + target_server = get_root_server(domain) + server_cache.put_server(tld, target_server) + except Exception: + target_server = "" else: target_server = server if target_server == "whois.jprs.jp": @@ -72,7 +75,7 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals else: request_domain = domain - if cool_down_tracker.can_use_server(target_server): + if target_server and cool_down_tracker.can_use_server(target_server): cool_down_tracker.use_server(target_server) response = whois_request(request_domain, target_server) else: diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index 1ac0b59..e2499d9 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -3,19 +3,21 @@ import threading import time +from pythonwhois.ratelimit.cool_down_tracker import CoolDownTracker + def decrement_thread(cool_down_object): """ After sleeping for cool_down_time, decrement all cool downs with cool_down_time - :param cool_down_object: + :param cool_down_object: An instance of CoolDown """ while True: time.sleep(cool_down_object.cool_down_period) cool_down_object.decrement_cool_downs() -def get_from_config(config, section, key, default=None): +def get_float_from_config(config, section, key, default=None): """ Get a value from the config if it exists, otherwise return the default value :param config: The configuration to get the value from @@ -79,25 +81,38 @@ def decrement_cool_downs(self): def set_cool_down_config(self, path_to_file): """ Tell the CoolDown instance of a configuration file, describing specific settings - for certain WHOIS servers. This configuration will - then be read and inserted into the cool down dictionary. + for certain WHOIS servers. This configuration will then be read and inserted into + the cool down dictionary. + If the configuration contains a general section, this will be consumed and removed from the config instance + (not the file). This is done to keep all the configuration in one file, but to be able to easily loop + over all the WHOIS server sections. :param path_to_file: The path to the configuration file """ config = ConfigParser.ConfigParser() config.read(path_to_file) - config = self.get_and_remove_defaults_from_config(config) - for domain in config.sections(): - cool_down_length = get_from_config(config, domain, "cool_down_length", self.default_cool_down_length) - max_requests_minute = get_from_config(config, domain, "max_requests_minute") - max_requests_hour = get_from_config(config, domain, "max_requests_hour") - max_requests_day = get_from_config(config, domain, "max_requests_day") + config = self.consume_defaults_from_config(config) + self.apply_cool_down_config(config) + + def apply_cool_down_config(self, config): + """ + Read all the WHOIS server sections from the configuration and build + CoolDownTracker objects for them containing the read information. + These CoolDownTracker instances are then placed in servers_on_cool_down. + :param config: A configuration file with only WHOIS server sections + """ + for whois_server in config.sections(): + cool_down_length = get_float_from_config(config, whois_server, "cool_down_length", + self.default_cool_down_length) + max_requests_minute = get_float_from_config(config, whois_server, "max_requests_minute") + max_requests_hour = get_float_from_config(config, whois_server, "max_requests_hour") + max_requests_day = get_float_from_config(config, whois_server, "max_requests_day") with self.lock: - self.servers_on_cool_down[domain] = CoolDownTracker(cool_down_length, - max_requests_minute, - max_requests_hour, - max_requests_day) + self.servers_on_cool_down[whois_server] = CoolDownTracker(cool_down_length, + max_requests_minute, + max_requests_hour, + max_requests_day) - def get_and_remove_defaults_from_config(self, config): + def consume_defaults_from_config(self, config): """ Gets the general settings from the config. Then removes them and returns the modified config. @@ -105,63 +120,9 @@ def get_and_remove_defaults_from_config(self, config): :return: The modified config, without the 'general' section """ if config.has_section("general"): - self.default_cool_down_length = get_from_config(config, "general", "default_cool_down_length", - self.default_cool_down_length) - self.cool_down_period = get_from_config(config, "general", "cool_down_period", self.cool_down_period) + self.default_cool_down_length = get_float_from_config(config, "general", "default_cool_down_length", + self.default_cool_down_length) + self.cool_down_period = get_float_from_config(config, "general", "cool_down_period", self.cool_down_period) config.remove_section("general") return config - - -class CoolDownTracker: - """ - Keep track of cool down settings for a specific WHOIS server - """ - - def __init__(self, cool_down_length, max_requests_minute=None, max_requests_hour=None, max_requests_day=None): - """ - Create a tracker. It can accept three maximums. When a maximum is reached, it will wait a set amount of time - before trying again, which is a minute, hour and day respectively. - :param cool_down_length: The default length of the cool down - :param max_requests_minute: The maximum number of requests per minute. - :param max_requests_hour: The maximum number of requests per hour - :param max_requests_day: The maximum number of request per day - """ - self.cool_down_length = cool_down_length - self.max_requests_minute = max_requests_minute - self.max_requests_hour = max_requests_hour - self.max_requests_day = max_requests_day - - self.request_count = 0 - self.current_cool_down = 0 - - def use(self): - """ - Tell the tracker that the corresponding server is going to be used. - It will set the cool down, based on the amount of requests that already have been made - """ - self.request_count += 1 - if self.max_requests_reached(self.max_requests_minute): - self.current_cool_down = 60 - elif self.max_requests_reached(self.max_requests_hour): - self.current_cool_down = 3600 - elif self.max_requests_reached(self.max_requests_day): - self.current_cool_down = 86400 - else: - self.current_cool_down = self.cool_down_length - - def decrement_cool_down(self, decrement): - """ - Decrement the current cooldown with the given value, implying - that a given time has passed. - :param decrement: The value to decrement the current cool down value with - """ - self.current_cool_down -= decrement - - def max_requests_reached(self, limit): - """ - Check whether the maximum has been reached for a given limit. - :param limit: The limit that should be checked for - :return: True if the limit has been reached, false if not - """ - return limit is not None and self.request_count % limit == 0 diff --git a/pythonwhois/ratelimit/cool_down_tracker.py b/pythonwhois/ratelimit/cool_down_tracker.py new file mode 100644 index 0000000..41c491c --- /dev/null +++ b/pythonwhois/ratelimit/cool_down_tracker.py @@ -0,0 +1,52 @@ +class CoolDownTracker: + """ + Keep track of cool down settings for a specific WHOIS server + """ + + def __init__(self, cool_down_length, max_requests_minute=None, max_requests_hour=None, max_requests_day=None): + """ + Create a tracker. It can accept three maximums. When a maximum is reached, it will wait a set amount of time + before trying again, which is a minute, hour and day respectively. + :param cool_down_length: The default length of the cool down + :param max_requests_minute: The maximum number of requests per minute. + :param max_requests_hour: The maximum number of requests per hour + :param max_requests_day: The maximum number of request per day + """ + self.cool_down_length = cool_down_length + self.max_requests_minute = max_requests_minute + self.max_requests_hour = max_requests_hour + self.max_requests_day = max_requests_day + + self.request_count = 0 + self.current_cool_down = 0 + + def use(self): + """ + Tell the tracker that the corresponding server is going to be used. + It will set the cool down, based on the amount of requests that already have been made + """ + self.request_count += 1 + if self.max_requests_reached(self.max_requests_minute): + self.current_cool_down = 60 + elif self.max_requests_reached(self.max_requests_hour): + self.current_cool_down = 3600 + elif self.max_requests_reached(self.max_requests_day): + self.current_cool_down = 86400 + else: + self.current_cool_down = self.cool_down_length + + def decrement_cool_down(self, decrement): + """ + Decrement the current cooldown with the given value, implying + that a given time has passed. + :param decrement: The value to decrement the current cool down value with + """ + self.current_cool_down -= decrement + + def max_requests_reached(self, limit): + """ + Check whether the maximum has been reached for a given limit. + :param limit: The limit that should be checked for + :return: True if the limit has been reached, false if not + """ + return limit is not None and self.request_count % limit == 0 From faa0fe049c6369989c80db708d89dd5da27ba318 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Mon, 13 Jun 2016 10:56:12 +0200 Subject: [PATCH 22/40] REF: Made the name for the method that resets the cool down clearer --- pythonwhois/ratelimit/cool_down.py | 2 +- pythonwhois/ratelimit/cool_down_tracker.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index e2499d9..4102ab7 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -68,7 +68,7 @@ def use_server(self, whois_server): with self.lock: if whois_server not in self.servers_on_cool_down: self.servers_on_cool_down[whois_server] = CoolDownTracker(self.default_cool_down_length) - self.servers_on_cool_down[whois_server].use() + self.servers_on_cool_down[whois_server].use_and_reset_cool_down() def decrement_cool_downs(self): """ diff --git a/pythonwhois/ratelimit/cool_down_tracker.py b/pythonwhois/ratelimit/cool_down_tracker.py index 41c491c..b2dedc2 100644 --- a/pythonwhois/ratelimit/cool_down_tracker.py +++ b/pythonwhois/ratelimit/cool_down_tracker.py @@ -20,7 +20,7 @@ def __init__(self, cool_down_length, max_requests_minute=None, max_requests_hour self.request_count = 0 self.current_cool_down = 0 - def use(self): + def use_and_reset_cool_down(self): """ Tell the tracker that the corresponding server is going to be used. It will set the cool down, based on the amount of requests that already have been made From 9607741a34a7aa5b5b4678c02b70b4eda94dd288 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Mon, 13 Jun 2016 11:36:47 +0200 Subject: [PATCH 23/40] FIX: Order in which the cool down is decided. ADD: Test for CoolDownTracker --- pythonwhois/ratelimit/cool_down_tracker.py | 8 +-- test.sh | 2 + test_pythonwhois/__init__.py | 0 test_pythonwhois/ratelimit/__init__.py | 0 .../ratelimit/test_cool_down_tracker.py | 51 +++++++++++++++++++ 5 files changed, 57 insertions(+), 4 deletions(-) create mode 100755 test.sh create mode 100644 test_pythonwhois/__init__.py create mode 100644 test_pythonwhois/ratelimit/__init__.py create mode 100644 test_pythonwhois/ratelimit/test_cool_down_tracker.py diff --git a/pythonwhois/ratelimit/cool_down_tracker.py b/pythonwhois/ratelimit/cool_down_tracker.py index b2dedc2..9870adb 100644 --- a/pythonwhois/ratelimit/cool_down_tracker.py +++ b/pythonwhois/ratelimit/cool_down_tracker.py @@ -26,12 +26,12 @@ def use_and_reset_cool_down(self): It will set the cool down, based on the amount of requests that already have been made """ self.request_count += 1 - if self.max_requests_reached(self.max_requests_minute): - self.current_cool_down = 60 + if self.max_requests_reached(self.max_requests_day): + self.current_cool_down = 86400 elif self.max_requests_reached(self.max_requests_hour): self.current_cool_down = 3600 - elif self.max_requests_reached(self.max_requests_day): - self.current_cool_down = 86400 + elif self.max_requests_reached(self.max_requests_minute): + self.current_cool_down = 60 else: self.current_cool_down = self.cool_down_length diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..44d1451 --- /dev/null +++ b/test.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +python -m unittest discover test_pythonwhois \ No newline at end of file diff --git a/test_pythonwhois/__init__.py b/test_pythonwhois/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test_pythonwhois/ratelimit/__init__.py b/test_pythonwhois/ratelimit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test_pythonwhois/ratelimit/test_cool_down_tracker.py b/test_pythonwhois/ratelimit/test_cool_down_tracker.py new file mode 100644 index 0000000..19872bc --- /dev/null +++ b/test_pythonwhois/ratelimit/test_cool_down_tracker.py @@ -0,0 +1,51 @@ +import unittest + +from pythonwhois.ratelimit.cool_down_tracker import CoolDownTracker + + +class CoolDownTrackerTest(unittest.TestCase): + """ + Test the stats of the CoolDownTracker. The tests are focused on choosing the correct cool down. + """ + + def test_decrement(self): + tracker_1 = CoolDownTracker(2) + tracker_1.use_and_reset_cool_down() + self.assertEqual(tracker_1.current_cool_down, 2) + + tracker_2 = CoolDownTracker(5) + tracker_2.use_and_reset_cool_down() + self.assertEqual(tracker_2.current_cool_down, 5) + + tracker_1.decrement_cool_down(1) + tracker_2.decrement_cool_down(2.5) + self.assertEqual(tracker_1.current_cool_down, 1) + self.assertEqual(tracker_2.current_cool_down, 2.5) + + def test_minute_limit_reached(self): + tracker = CoolDownTracker(1, max_requests_minute=5) + for _ in range(5): + tracker.use_and_reset_cool_down() + + self.assertEqual(tracker.current_cool_down, 60) + + def test_hour_limit_reached(self): + tracker = CoolDownTracker(1, max_requests_hour=10) + for _ in range(10): + tracker.use_and_reset_cool_down() + + self.assertEqual(tracker.current_cool_down, 3600) + + def test_day_limit_reached(self): + tracker = CoolDownTracker(1, max_requests_day=20) + for _ in range(20): + tracker.use_and_reset_cool_down() + + self.assertEqual(tracker.current_cool_down, 86400) + + def test_should_use_day_limit(self): + tracker = CoolDownTracker(1, max_requests_minute=5, max_requests_hour=13, max_requests_day=20) + for _ in range(20): + tracker.use_and_reset_cool_down() + + self.assertEqual(tracker.current_cool_down, 86400) From 271a5f2707a37aab0298902c4868f1359f114559 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Mon, 13 Jun 2016 14:59:22 +0200 Subject: [PATCH 24/40] REF: Removed threading, thanks to Wes. Thanks Wes! --- README.md | 4 +- pythonwhois/net.py | 3 +- pythonwhois/ratelimit/cool_down.py | 76 ++++++++++++++---------------- 3 files changed, 38 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 151349d..e199a18 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,8 @@ Every WHOIS server gets a certain time before it will be asked again, to prevent and possibly refused connections. This can be configured by passing a configuration file to pythonwhois.set_cool_down_config. This file can contain the following to elements, but doesn't have to. `[general]` -`cool_down_period : 0.5` `default_cool_down_length : 1` -This is the general part. Only one of them should exist. whois-oracle checks -for both these properties, but they are both not necessary. +This is the general part. Currently, only one variable can be defined. It is optional to do so. `[whois.eu]` `cool_down_length : 10` diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 3e3ba57..c9b964d 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -75,8 +75,7 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals else: request_domain = domain - if target_server and cool_down_tracker.can_use_server(target_server): - cool_down_tracker.use_server(target_server) + if target_server and cool_down_tracker.try_to_use_server(target_server): response = whois_request(request_domain, target_server) else: response = "" diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index 4102ab7..5556777 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -1,22 +1,9 @@ import ConfigParser -import thread -import threading -import time +import datetime from pythonwhois.ratelimit.cool_down_tracker import CoolDownTracker -def decrement_thread(cool_down_object): - """ - After sleeping for cool_down_time, decrement - all cool downs with cool_down_time - :param cool_down_object: An instance of CoolDown - """ - while True: - time.sleep(cool_down_object.cool_down_period) - cool_down_object.decrement_cool_downs() - - def get_float_from_config(config, section, key, default=None): """ Get a value from the config if it exists, otherwise return the default value @@ -39,15 +26,11 @@ class CoolDown: def __init__(self): """ - Creates a dictionary for storing cool downs and starts - a new thread to decrement them every time after a set period - of time has passed, which is 0.5 seconds by default. + Creates a dictionary for storing cool downs. """ - self.lock = threading.Lock() self.servers_on_cool_down = {} self.default_cool_down_length = 1.0 - self.cool_down_period = 0.5 - thread.start_new_thread(decrement_thread, (self,)) + self.last_request_time = datetime.datetime.now() def can_use_server(self, whois_server): """ @@ -55,28 +38,44 @@ def can_use_server(self, whois_server): :param whois_server: The WHOIS server to check :return: True if the server can be used, False if not """ - with self.lock: - cooldown = self.servers_on_cool_down.get(whois_server) - return cooldown is None or cooldown.current_cool_down <= 0 + cool_down = self.servers_on_cool_down.get(whois_server) + return cool_down is None or cool_down.current_cool_down <= 0 - def use_server(self, whois_server): + def try_to_use_server(self, whois_server): """ - Tell the CoolDown instance that a WHOIS server is going to be used. - The cool down will then be reset + Try to use a WHOIS server. On True, it was a success and the cool down has been reset. + On False, the server was not available yet :param whois_server: The WHOIS server that is going to be used + :return True if the server was successfully marked as used and the cool down has been reset, + False if the server was not yet available """ - with self.lock: - if whois_server not in self.servers_on_cool_down: - self.servers_on_cool_down[whois_server] = CoolDownTracker(self.default_cool_down_length) - self.servers_on_cool_down[whois_server].use_and_reset_cool_down() + self.decrement_cool_downs() + if not self.can_use_server(whois_server): + return False + + if whois_server not in self.servers_on_cool_down: + self.servers_on_cool_down[whois_server] = CoolDownTracker(self.default_cool_down_length) + self.servers_on_cool_down[whois_server].use_and_reset_cool_down() + return True def decrement_cool_downs(self): """ Decrement all the cool downs with cool_down_time """ - with self.lock: - for server, cool_down in self.servers_on_cool_down.iteritems(): - self.servers_on_cool_down[server].decrement_cool_down(self.cool_down_period) + time_diff = self.get_time_difference() + for server, cool_down in self.servers_on_cool_down.iteritems(): + self.servers_on_cool_down[server].decrement_cool_down(time_diff) + + def get_time_difference(self): + """ + Get the difference in time between te last time this was called + and now. + :return: The difference in seconds + """ + now = datetime.datetime.now() + diff = now - self.last_request_time + self.last_request_time = now + return diff.total_seconds() def set_cool_down_config(self, path_to_file): """ @@ -106,11 +105,10 @@ def apply_cool_down_config(self, config): max_requests_minute = get_float_from_config(config, whois_server, "max_requests_minute") max_requests_hour = get_float_from_config(config, whois_server, "max_requests_hour") max_requests_day = get_float_from_config(config, whois_server, "max_requests_day") - with self.lock: - self.servers_on_cool_down[whois_server] = CoolDownTracker(cool_down_length, - max_requests_minute, - max_requests_hour, - max_requests_day) + self.servers_on_cool_down[whois_server] = CoolDownTracker(cool_down_length, + max_requests_minute, + max_requests_hour, + max_requests_day) def consume_defaults_from_config(self, config): """ @@ -122,7 +120,5 @@ def consume_defaults_from_config(self, config): if config.has_section("general"): self.default_cool_down_length = get_float_from_config(config, "general", "default_cool_down_length", self.default_cool_down_length) - self.cool_down_period = get_float_from_config(config, "general", "cool_down_period", self.cool_down_period) - config.remove_section("general") return config From feb506596e64d84676e82f6933f9ffe69c16e41c Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Tue, 14 Jun 2016 10:00:08 +0200 Subject: [PATCH 25/40] REF: net.py looks a bit neater now, but I didn't go all out because then it never ends (it's mostly not my code) REF: Clarified some names ADD: A separate class for reading the config file --- pythonwhois/net.py | 155 +++++++++++------- pythonwhois/ratelimit/cool_down.py | 60 +------ pythonwhois/ratelimit/cool_down_config.py | 79 +++++++++ pythonwhois/ratelimit/cool_down_tracker.py | 8 +- .../ratelimit/test_cool_down_tracker.py | 12 +- 5 files changed, 191 insertions(+), 123 deletions(-) create mode 100644 pythonwhois/ratelimit/cool_down_config.py diff --git a/pythonwhois/net.py b/pythonwhois/net.py index c9b964d..5c7f2c2 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -7,37 +7,37 @@ from pythonwhois.caching.whois_server_cache import server_cache from pythonwhois.ratelimit.cool_down import CoolDown -from . import shared cool_down_tracker = CoolDown() +# Sometimes IANA simply won't give us the right root WHOIS server +exceptions = { + ".ac.uk": "whois.ja.net", + ".ps": "whois.pnina.ps", + ".buzz": "whois.nic.buzz", + ".moe": "whois.nic.moe", + ".arpa": "whois.iana.org", + ".bid": "whois.nic.bid", + ".int": "whois.iana.org", + ".kred": "whois.nic.kred", + ".nagoya": "whois.gmoregistry.net", + ".nyc": "whois.nic.nyc", + ".okinawa": "whois.gmoregistry.net", + ".qpon": "whois.nic.qpon", + ".sohu": "whois.gtld.knet.cn", + ".tokyo": "whois.nic.tokyo", + ".trade": "whois.nic.trade", + ".webcam": "whois.nic.webcam", + ".xn--rhqv96g": "whois.nic.xn--rhqv96g", + # The following is a bit hacky, but IANA won't return the right answer for example.com because it's a direct registration. + "example.com": "whois.verisign-grs.com" +} + def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=False, with_server_list=False, server_list=None): previous = previous or [] server_list = server_list or [] - # Sometimes IANA simply won't give us the right root WHOIS server - exceptions = { - ".ac.uk": "whois.ja.net", - ".ps": "whois.pnina.ps", - ".buzz": "whois.nic.buzz", - ".moe": "whois.nic.moe", - ".arpa": "whois.iana.org", - ".bid": "whois.nic.bid", - ".int": "whois.iana.org", - ".kred": "whois.nic.kred", - ".nagoya": "whois.gmoregistry.net", - ".nyc": "whois.nic.nyc", - ".okinawa": "whois.gmoregistry.net", - ".qpon": "whois.nic.qpon", - ".sohu": "whois.gtld.knet.cn", - ".tokyo": "whois.nic.tokyo", - ".trade": "whois.nic.trade", - ".webcam": "whois.nic.webcam", - ".xn--rhqv96g": "whois.nic.xn--rhqv96g", - # The following is a bit hacky, but IANA won't return the right answer for example.com because it's a direct registration. - "example.com": "whois.verisign-grs.com" - } if rfc3490: if sys.version_info < (3, 0): @@ -45,40 +45,9 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals else: domain = encode(domain, "idna").decode("ascii") - if len(previous) == 0 and server == "": - # Root query - is_exception = False - for exception, exc_serv in exceptions.items(): - if domain.endswith(exception): - is_exception = True - target_server = exc_serv - break - if is_exception == False: - tld = get_tld(domain) - cached_server = server_cache.get_server(tld) - if cached_server is not None: - target_server = cached_server - else: - try: - target_server = get_root_server(domain) - server_cache.put_server(tld, target_server) - except Exception: - target_server = "" - else: - target_server = server - if target_server == "whois.jprs.jp": - request_domain = "%s/e" % domain # Suppress Japanese output - elif domain.endswith(".de") and (target_server == "whois.denic.de" or target_server == "de.whois-servers.net"): - request_domain = "-T dn,ace %s" % domain # regional specific stuff - elif target_server == "whois.verisign-grs.com": - request_domain = "=%s" % domain # Avoid partial matches - else: - request_domain = domain - - if target_server and cool_down_tracker.try_to_use_server(target_server): - response = whois_request(request_domain, target_server) - else: - response = "" + target_server = get_target_server(domain, previous, server) + query = prepare_query(target_server, domain) + response = query_server(target_server, query) if never_cut: # If the caller has requested to 'never cut' responses, he will get the original response from the server (this is @@ -119,6 +88,76 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals return new_list +def query_server(whois_server, query): + """ + Send out the query, if the server is available. if the server is still in cool down, + return an empty string + :param whois_server: The WHOIS server to query + :param query: The query to send + :return: The result, or an empty string if the server is unavailable + """ + if whois_server and cool_down_tracker.try_to_use_server(whois_server): + return whois_request(query, whois_server) + else: + return "" + + +def prepare_query(whois_server, domain): + """ + Some WHOIS servers have a different way of querying. + This methods returns an appropriate query for the WHOIS server + :param domain: The domain to query + :return: The fitting query + """ + if whois_server == "whois.jprs.jp": + return "%s/e" % domain # Suppress Japanese output + elif domain.endswith(".de") and (whois_server == "whois.denic.de" or whois_server == "de.whois-servers.net"): + return "-T dn,ace %s" % domain # regional specific stuff + elif whois_server == "whois.verisign-grs.com": + return "=%s" % domain # Avoid partial matches + else: + return domain + + +def get_target_server(domain, previous_results, given_server): + """ + Get the target server based on the current situation. + :param domain: The domain to get the server for + :param previous_results: The previously acquired results, as a result of referrals + :param given_server: + :return: + """ + if len(previous_results) == 0 and given_server == "": + # Root query + for exception, exc_serv in exceptions.items(): + if domain.endswith(exception): + target_server = exc_serv + return target_server + + target_server = get_non_exception_server(domain) + return target_server + else: + return given_server + + +def get_non_exception_server(domain): + """ + Get a server that does not belong to the list of exceptions, + either by asking IANA or by looking in the cache + :param domain: The domain to get the WHOIS server for + :return: The WHOIS server to use + """ + tld = get_tld(domain) + cached_server = server_cache.get_server(tld) + if cached_server is not None: + target_server = cached_server + else: + target_server = get_root_server(domain) + server_cache.put_server(tld, target_server) + + return target_server + + def server_is_alive(server): response = subprocess.call(["ping", "-c 1", "-w2", server], stdout=open(os.devnull, "w"), stderr=subprocess.STDOUT) @@ -136,7 +175,7 @@ def get_root_server(domain): if match is None: continue return match.group(1) - raise shared.WhoisException("No root WHOIS server found for domain.") + return "" def whois_request(domain, server, port=43): diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index 5556777..371bc4d 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -1,24 +1,9 @@ -import ConfigParser import datetime +from pythonwhois.ratelimit.cool_down_config import CoolDownConfig from pythonwhois.ratelimit.cool_down_tracker import CoolDownTracker -def get_float_from_config(config, section, key, default=None): - """ - Get a value from the config if it exists, otherwise return the default value - :param config: The configuration to get the value from - :param section: The section to get the value from - :param key: The key that may or may not exist - :param default: The default value to return, which is None by default - :return: The value if it exists, else default - """ - if config.has_option(section, key): - return config.getfloat(section, key) - else: - return default - - class CoolDown: """ Handle the cool down period for asking a WHOIS server again @@ -55,7 +40,7 @@ def try_to_use_server(self, whois_server): if whois_server not in self.servers_on_cool_down: self.servers_on_cool_down[whois_server] = CoolDownTracker(self.default_cool_down_length) - self.servers_on_cool_down[whois_server].use_and_reset_cool_down() + self.servers_on_cool_down[whois_server].use_whois_server() return True def decrement_cool_downs(self): @@ -82,43 +67,8 @@ def set_cool_down_config(self, path_to_file): Tell the CoolDown instance of a configuration file, describing specific settings for certain WHOIS servers. This configuration will then be read and inserted into the cool down dictionary. - If the configuration contains a general section, this will be consumed and removed from the config instance - (not the file). This is done to keep all the configuration in one file, but to be able to easily loop - over all the WHOIS server sections. :param path_to_file: The path to the configuration file """ - config = ConfigParser.ConfigParser() - config.read(path_to_file) - config = self.consume_defaults_from_config(config) - self.apply_cool_down_config(config) - - def apply_cool_down_config(self, config): - """ - Read all the WHOIS server sections from the configuration and build - CoolDownTracker objects for them containing the read information. - These CoolDownTracker instances are then placed in servers_on_cool_down. - :param config: A configuration file with only WHOIS server sections - """ - for whois_server in config.sections(): - cool_down_length = get_float_from_config(config, whois_server, "cool_down_length", - self.default_cool_down_length) - max_requests_minute = get_float_from_config(config, whois_server, "max_requests_minute") - max_requests_hour = get_float_from_config(config, whois_server, "max_requests_hour") - max_requests_day = get_float_from_config(config, whois_server, "max_requests_day") - self.servers_on_cool_down[whois_server] = CoolDownTracker(cool_down_length, - max_requests_minute, - max_requests_hour, - max_requests_day) - - def consume_defaults_from_config(self, config): - """ - Gets the general settings from the config. Then removes them - and returns the modified config. - :param config: The config to obtain the default values from - :return: The modified config, without the 'general' section - """ - if config.has_section("general"): - self.default_cool_down_length = get_float_from_config(config, "general", "default_cool_down_length", - self.default_cool_down_length) - - return config + cool_down_config = CoolDownConfig(path_to_file, self.default_cool_down_length) + for whois_server in cool_down_config.get_sections(): + self.servers_on_cool_down[whois_server] = cool_down_config.get_cool_down_tracker_for_server(whois_server) diff --git a/pythonwhois/ratelimit/cool_down_config.py b/pythonwhois/ratelimit/cool_down_config.py new file mode 100644 index 0000000..c9fe104 --- /dev/null +++ b/pythonwhois/ratelimit/cool_down_config.py @@ -0,0 +1,79 @@ +import ConfigParser + +from pythonwhois.ratelimit.cool_down_tracker import CoolDownTracker + + +def get_float_from_config(config, section, key, default=None): + """ + Get a value from the config if it exists, otherwise return the default value + :param config: The configuration to get the value from + :param section: The section to get the value from + :param key: The key that may or may not exist + :param default: The default value to return, which is None by default + :return: The value if it exists, else default + """ + if config.has_option(section, key): + return config.getfloat(section, key) + else: + return default + + +class CoolDownConfig: + """ + Read and handle the contents of a configuration file for the cool down process. + """ + + def __init__(self, path_to_file, default_cool_down): + """ + Read the configuration file. + If the configuration contains a general section, this will be consumed and removed from the config instance + (not the file). This is done to keep all the configuration in one file, but to be able to easily loop + over all the WHOIS server sections. + :param path_to_file: The path to the configuration file + :param default_cool_down: The default value for the cool down length, in case it is not defined in the config + """ + self.config = ConfigParser.ConfigParser() + self.config.read(path_to_file) + + self.cool_down_length = default_cool_down + self.config = self.consume_defaults_from_config(self.config) + + def consume_defaults_from_config(self, config): + """ + Gets the general settings from the config. Then removes them + and returns the modified config. + :param config: The config to obtain the default values from + :return: The modified config, without the 'general' section + """ + if config.has_section("general"): + self.cool_down_length = get_float_from_config(config, "general", "default_cool_down_length") + config.remove_section("general") + return config + + def get_sections(self): + """ + Return a list of sections + :return: A list of sections + """ + return self.config.sections() + + def get_cool_down_tracker_for_server(self, whois_server): + """ + Create a new CoolDownTracker instance based on the contents of the configuration file. + If the configuration file does not have settings for this WHOIS server, a default CoolDownTracker with + the cool down length is returned. + :param whois_server: The WHOIS server to build a CoolDownTracker for + :return: A CoolDownTracker instance based on the settings + """ + if self.config.has_section(whois_server): + cool_down_length = get_float_from_config(self.config, whois_server, "cool_down_length", + self.cool_down_length) + max_requests_minute = get_float_from_config(self.config, whois_server, "max_requests_minute") + max_requests_hour = get_float_from_config(self.config, whois_server, "max_requests_hour") + max_requests_day = get_float_from_config(self.config, whois_server, "max_requests_day") + return CoolDownTracker(cool_down_length, + max_requests_minute, + max_requests_hour, + max_requests_day) + + return CoolDownTracker(self.cool_down_length) diff --git a/pythonwhois/ratelimit/cool_down_tracker.py b/pythonwhois/ratelimit/cool_down_tracker.py index 9870adb..98ceca8 100644 --- a/pythonwhois/ratelimit/cool_down_tracker.py +++ b/pythonwhois/ratelimit/cool_down_tracker.py @@ -20,7 +20,7 @@ def __init__(self, cool_down_length, max_requests_minute=None, max_requests_hour self.request_count = 0 self.current_cool_down = 0 - def use_and_reset_cool_down(self): + def use_whois_server(self): """ Tell the tracker that the corresponding server is going to be used. It will set the cool down, based on the amount of requests that already have been made @@ -35,13 +35,13 @@ def use_and_reset_cool_down(self): else: self.current_cool_down = self.cool_down_length - def decrement_cool_down(self, decrement): + def decrement_cool_down(self, seconds): """ Decrement the current cooldown with the given value, implying that a given time has passed. - :param decrement: The value to decrement the current cool down value with + :param seconds: The seconds to decrement the current cool down value with """ - self.current_cool_down -= decrement + self.current_cool_down -= seconds def max_requests_reached(self, limit): """ diff --git a/test_pythonwhois/ratelimit/test_cool_down_tracker.py b/test_pythonwhois/ratelimit/test_cool_down_tracker.py index 19872bc..98ccdf5 100644 --- a/test_pythonwhois/ratelimit/test_cool_down_tracker.py +++ b/test_pythonwhois/ratelimit/test_cool_down_tracker.py @@ -10,11 +10,11 @@ class CoolDownTrackerTest(unittest.TestCase): def test_decrement(self): tracker_1 = CoolDownTracker(2) - tracker_1.use_and_reset_cool_down() + tracker_1.use_whois_server() self.assertEqual(tracker_1.current_cool_down, 2) tracker_2 = CoolDownTracker(5) - tracker_2.use_and_reset_cool_down() + tracker_2.use_whois_server() self.assertEqual(tracker_2.current_cool_down, 5) tracker_1.decrement_cool_down(1) @@ -25,27 +25,27 @@ def test_decrement(self): def test_minute_limit_reached(self): tracker = CoolDownTracker(1, max_requests_minute=5) for _ in range(5): - tracker.use_and_reset_cool_down() + tracker.use_whois_server() self.assertEqual(tracker.current_cool_down, 60) def test_hour_limit_reached(self): tracker = CoolDownTracker(1, max_requests_hour=10) for _ in range(10): - tracker.use_and_reset_cool_down() + tracker.use_whois_server() self.assertEqual(tracker.current_cool_down, 3600) def test_day_limit_reached(self): tracker = CoolDownTracker(1, max_requests_day=20) for _ in range(20): - tracker.use_and_reset_cool_down() + tracker.use_whois_server() self.assertEqual(tracker.current_cool_down, 86400) def test_should_use_day_limit(self): tracker = CoolDownTracker(1, max_requests_minute=5, max_requests_hour=13, max_requests_day=20) for _ in range(20): - tracker.use_and_reset_cool_down() + tracker.use_whois_server() self.assertEqual(tracker.current_cool_down, 86400) From 4d87efe48ef7b74305e24eea944c9acb3ccb81c6 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Tue, 14 Jun 2016 10:06:18 +0200 Subject: [PATCH 26/40] REF: Made decrement_cool_downs a little clearer --- pythonwhois/ratelimit/cool_down.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index 371bc4d..a323a26 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -48,8 +48,8 @@ def decrement_cool_downs(self): Decrement all the cool downs with cool_down_time """ time_diff = self.get_time_difference() - for server, cool_down in self.servers_on_cool_down.iteritems(): - self.servers_on_cool_down[server].decrement_cool_down(time_diff) + for server, cool_down_tracker in self.servers_on_cool_down.iteritems(): + cool_down_tracker.decrement_cool_down(time_diff) def get_time_difference(self): """ From a01f6630da4e85e9c550d1e6f458136137244186 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Tue, 14 Jun 2016 11:37:22 +0200 Subject: [PATCH 27/40] ADD: A holder for WHOIS responses. It contains information about the success of the retrieval. --- pythonwhois/net.py | 44 +++++++++++++++++++++----- pythonwhois/response/__init__.py | 0 pythonwhois/response/whois_response.py | 16 ++++++++++ 3 files changed, 52 insertions(+), 8 deletions(-) create mode 100644 pythonwhois/response/__init__.py create mode 100644 pythonwhois/response/whois_response.py diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 5c7f2c2..0d77014 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -7,6 +7,9 @@ from pythonwhois.caching.whois_server_cache import server_cache from pythonwhois.ratelimit.cool_down import CoolDown +from pythonwhois.response.whois_response import WhoisResponse + +incomplete_result_message = "THE_WHOIS_ORACLE_INCOMPLETE_RESULT" cool_down_tracker = CoolDown() @@ -47,7 +50,8 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals target_server = get_target_server(domain, previous, server) query = prepare_query(target_server, domain) - response = query_server(target_server, query) + whois_response = query_server(target_server, query) + response = whois_response.response if never_cut: # If the caller has requested to 'never cut' responses, he will get the original response from the server (this is @@ -64,8 +68,15 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals if re.search("Domain Name: %s\n" % domain.upper(), record): response = record break - if never_cut == False: + if not never_cut: new_list = [response] + previous + + if whois_response.server_is_dead: + return build_return_value(with_server_list, new_list, server_list) + elif whois_response.request_failure or whois_response.cool_down_failure: + new_list = [incomplete_result_message] + previous + return build_return_value(with_server_list, new_list, server_list) + server_list.append(target_server) # Ignore redirects from registries who publish the registrar data themselves @@ -82,10 +93,25 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals return get_whois_raw(domain, referal_server, new_list, server_list=server_list, with_server_list=with_server_list) + return build_return_value(with_server_list, new_list, server_list) + + +def build_return_value(with_server_list, responses, server_list): + """ + Create a return value + :param with_server_list: Whether the server list should be returned as well + :param responses: The list of responses + :param server_list: The server list + :return: A list of responses without the empty ones, plus possibly a server list + """ + non_empty_responses = filter((lambda text: text is not ''), responses) + if len(non_empty_responses) == 0: + non_empty_responses = [''] + if with_server_list: - return (new_list, server_list) + return non_empty_responses, server_list else: - return new_list + return non_empty_responses def query_server(whois_server, query): @@ -99,7 +125,7 @@ def query_server(whois_server, query): if whois_server and cool_down_tracker.try_to_use_server(whois_server): return whois_request(query, whois_server) else: - return "" + return WhoisResponse(cool_down_failure=True) def prepare_query(whois_server, domain): @@ -169,7 +195,7 @@ def get_tld(domain): def get_root_server(domain): - data = whois_request(domain, "whois.iana.org") + data = whois_request(domain, "whois.iana.org").response or "" for line in [x.strip() for x in data.splitlines()]: match = re.match("refer:\s*([^\s]+)", line) if match is None: @@ -181,6 +207,7 @@ def get_root_server(domain): def whois_request(domain, server, port=43): try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(10) sock.connect((server, port)) sock.send(("%s\r\n" % domain).encode("utf-8")) buff = b"" @@ -189,6 +216,7 @@ def whois_request(domain, server, port=43): if len(data) == 0: break buff += data - return buff.decode("utf-8", "replace") + return WhoisResponse(buff.decode("utf-8", "replace")) except Exception: - return "" + server_is_dead = not server_is_alive(server) + return WhoisResponse(request_failure=True, server_is_dead=server_is_dead) diff --git a/pythonwhois/response/__init__.py b/pythonwhois/response/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pythonwhois/response/whois_response.py b/pythonwhois/response/whois_response.py new file mode 100644 index 0000000..5268797 --- /dev/null +++ b/pythonwhois/response/whois_response.py @@ -0,0 +1,16 @@ +class WhoisResponse: + """ + Holder class for WHOIS responses. Is capable of marking the retrieval as a failure. + """ + + def __init__(self, response=None, request_failure=False, cool_down_failure=False, server_is_dead=False): + """ + Hold the WHOIS response + :param response: The received response, if any + :param request_failure: If the request was a failure + :param cool_down_failure: Whether the server was unavailable due to a cool down or not + """ + self.response = response + self.request_failure = request_failure + self.cool_down_failure = cool_down_failure + self.server_is_dead = server_is_dead From d9aa27c6bc8007fb909bdfcde5577ad61fbd3f33 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Wed, 15 Jun 2016 13:27:22 +0200 Subject: [PATCH 28/40] REF: Increased default cool down length from 1 second to 2 REF: Extracted the starting of a cool down to a separate method ADD: Can now check whether a rate limit has been exceeded or not FIX: If there are now results at all, parse.py simply returns an empty dictionary instead of crashing --- pythonwhois/__init__.py | 6 +++- pythonwhois/net.py | 38 +++++++++++++++------- pythonwhois/parse.py | 2 ++ pythonwhois/ratelimit/cool_down.py | 11 ++++++- pythonwhois/ratelimit/cool_down_tracker.py | 14 ++++++++ pythonwhois/response/whois_response.py | 19 +++++++++-- setup.py | 2 +- 7 files changed, 76 insertions(+), 16 deletions(-) diff --git a/pythonwhois/__init__.py b/pythonwhois/__init__.py index f8729e4..c56c784 100644 --- a/pythonwhois/__init__.py +++ b/pythonwhois/__init__.py @@ -6,8 +6,12 @@ def get_whois(domain, normalized=[]): # Unlisted handles will be looked up on the last WHOIS server that was queried. This may be changed to also query # other servers in the future, if it turns out that there are cases where the last WHOIS server in the chain doesn't # actually hold the handle contact details, but another WHOIS server in the chain does. + if len(server_list) > 0: + handle_server = server_list[-1] + else: + handle_server = "" return parse.parse_raw_whois(raw_data, normalized=normalized, never_query_handles=False, - handle_server=server_list[-1]) + handle_server=handle_server) def set_persistent_cache(path_to_cache): diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 0d77014..3c01132 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -7,7 +7,7 @@ from pythonwhois.caching.whois_server_cache import server_cache from pythonwhois.ratelimit.cool_down import CoolDown -from pythonwhois.response.whois_response import WhoisResponse +from pythonwhois.response.whois_response import RawWhoisResponse incomplete_result_message = "THE_WHOIS_ORACLE_INCOMPLETE_RESULT" @@ -72,8 +72,14 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals new_list = [response] + previous if whois_response.server_is_dead: + # That's probably as far as we can go, the road ends here return build_return_value(with_server_list, new_list, server_list) - elif whois_response.request_failure or whois_response.cool_down_failure: + elif whois_response.request_failure: + # Mark this result as incomplete, so we can try again later + new_list = [incomplete_result_message] + previous + cool_down_tracker.warn_limit_exceeded(target_server) + return build_return_value(with_server_list, new_list, server_list) + elif whois_response.cool_down_failure: new_list = [incomplete_result_message] + previous return build_return_value(with_server_list, new_list, server_list) @@ -104,9 +110,7 @@ def build_return_value(with_server_list, responses, server_list): :param server_list: The server list :return: A list of responses without the empty ones, plus possibly a server list """ - non_empty_responses = filter((lambda text: text is not ''), responses) - if len(non_empty_responses) == 0: - non_empty_responses = [''] + non_empty_responses = filter((lambda text: text is not '' and text is not None), responses) if with_server_list: return non_empty_responses, server_list @@ -117,15 +121,15 @@ def build_return_value(with_server_list, responses, server_list): def query_server(whois_server, query): """ Send out the query, if the server is available. if the server is still in cool down, - return an empty string + return a RawWhoisResponse instance describing the failure :param whois_server: The WHOIS server to query :param query: The query to send - :return: The result, or an empty string if the server is unavailable + :return: A RawWhoisResponse containing either the response or the reason of failure """ if whois_server and cool_down_tracker.try_to_use_server(whois_server): return whois_request(query, whois_server) else: - return WhoisResponse(cool_down_failure=True) + return RawWhoisResponse(cool_down_failure=True) def prepare_query(whois_server, domain): @@ -151,7 +155,7 @@ def get_target_server(domain, previous_results, given_server): :param domain: The domain to get the server for :param previous_results: The previously acquired results, as a result of referrals :param given_server: - :return: + :return: The server to use """ if len(previous_results) == 0 and given_server == "": # Root query @@ -195,6 +199,11 @@ def get_tld(domain): def get_root_server(domain): + """ + Find the WHOIS server for a given domain + :param domain: The domain to find a WHOIS server for + :return: The WHOIS server, or an empty string if no server is found + """ data = whois_request(domain, "whois.iana.org").response or "" for line in [x.strip() for x in data.splitlines()]: match = re.match("refer:\s*([^\s]+)", line) @@ -205,6 +214,13 @@ def get_root_server(domain): def whois_request(domain, server, port=43): + """ + Request WHOIS information. Has a timeout of 10 seconds + :param domain: The domain to request WHOIS information for + :param server: The WHOIS server to use + :param port: The port to use, 43 by default + :return: A WHOIS response containing either the result, or containing information about the failure + """ try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(10) @@ -216,7 +232,7 @@ def whois_request(domain, server, port=43): if len(data) == 0: break buff += data - return WhoisResponse(buff.decode("utf-8", "replace")) + return RawWhoisResponse(buff.decode("utf-8", "replace")) except Exception: server_is_dead = not server_is_alive(server) - return WhoisResponse(request_failure=True, server_is_dead=server_is_dead) + return RawWhoisResponse(request_failure=True, server_is_dead=server_is_dead) diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index 025211c..1125587 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -702,6 +702,8 @@ def filter_characters(string, delete_characters): def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_server=""): + if len(raw_data) == 0: + return {} normalized = normalized or [] data = {} diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index a323a26..98c5929 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -14,7 +14,7 @@ def __init__(self): Creates a dictionary for storing cool downs. """ self.servers_on_cool_down = {} - self.default_cool_down_length = 1.0 + self.default_cool_down_length = 2.0 self.last_request_time = datetime.datetime.now() def can_use_server(self, whois_server): @@ -51,6 +51,15 @@ def decrement_cool_downs(self): for server, cool_down_tracker in self.servers_on_cool_down.iteritems(): cool_down_tracker.decrement_cool_down(time_diff) + def warn_limit_exceeded(self, whois_server): + """ + Warn the CoolDown instance of an exceeded limit for a WHOIS server. + The CoolDown instance will then make sure that the cool down for the WHOIS server + will be longer next time + :param whois_server: The WHOIS server the limit has been exceeded for + """ + self.servers_on_cool_down[whois_server].double_cool_down() + def get_time_difference(self): """ Get the difference in time between te last time this was called diff --git a/pythonwhois/ratelimit/cool_down_tracker.py b/pythonwhois/ratelimit/cool_down_tracker.py index 98ceca8..cf42927 100644 --- a/pythonwhois/ratelimit/cool_down_tracker.py +++ b/pythonwhois/ratelimit/cool_down_tracker.py @@ -26,6 +26,12 @@ def use_whois_server(self): It will set the cool down, based on the amount of requests that already have been made """ self.request_count += 1 + self.start_cool_down() + + def start_cool_down(self): + """ + Start a new cool_down + """ if self.max_requests_reached(self.max_requests_day): self.current_cool_down = 86400 elif self.max_requests_reached(self.max_requests_hour): @@ -50,3 +56,11 @@ def max_requests_reached(self, limit): :return: True if the limit has been reached, false if not """ return limit is not None and self.request_count % limit == 0 + + def double_cool_down(self): + """ + Double the cool down length, as in, the cool down length that is always used, + not the current cool down that is going on. + """ + self.cool_down_length *= 2 + self.start_cool_down() diff --git a/pythonwhois/response/whois_response.py b/pythonwhois/response/whois_response.py index 5268797..8c0942f 100644 --- a/pythonwhois/response/whois_response.py +++ b/pythonwhois/response/whois_response.py @@ -1,9 +1,9 @@ -class WhoisResponse: +class RawWhoisResponse: """ Holder class for WHOIS responses. Is capable of marking the retrieval as a failure. """ - def __init__(self, response=None, request_failure=False, cool_down_failure=False, server_is_dead=False): + def __init__(self, response="", request_failure=False, cool_down_failure=False, server_is_dead=False): """ Hold the WHOIS response :param response: The received response, if any @@ -14,3 +14,18 @@ def __init__(self, response=None, request_failure=False, cool_down_failure=False self.request_failure = request_failure self.cool_down_failure = cool_down_failure self.server_is_dead = server_is_dead + + if len(response) > 0: + self.request_failure = self.check_for_exceeded_limit() + + def check_for_exceeded_limit(self): + """ + Check whether the limit has been exceeded. This is done by + looking at the size of the response. If it has less than 4 lines, + it is probably not a useful response and most likely a message about spamming + the WHOIS server + :return: True if the message is really short, false if not + """ + if self.response is not None and len(self.response.splitlines()) < 4: + return True + return False diff --git a/setup.py b/setup.py index 11b4f08..b02bc83 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ description='Module for retrieving and parsing the WHOIS data for a domain. Supports most domains. No dependencies.', author='Sander ten Hoor, original by Sven Slootweg', url='https://github.com/MasterFenrir/whois-oracle', - packages=['pythonwhois', 'pythonwhois.caching', 'pythonwhois.ratelimit'], + packages=['pythonwhois', 'pythonwhois.caching', 'pythonwhois.ratelimit', 'pythonwhois.response'], package_data={"pythonwhois": ["*.dat"]}, install_requires=['argparse'], provides=['pythonwhois'], From 744ee349b0f7161af9e7aa2cc067616a78066be9 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Wed, 15 Jun 2016 14:05:33 +0200 Subject: [PATCH 29/40] REF: Renamed whois_response.py to raw_whois_response.py REF: Made timeout an argument, but with a default value --- pythonwhois/net.py | 11 ++++++----- .../{whois_response.py => raw_whois_response.py} | 0 2 files changed, 6 insertions(+), 5 deletions(-) rename pythonwhois/response/{whois_response.py => raw_whois_response.py} (100%) diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 3c01132..83ccdee 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -7,7 +7,7 @@ from pythonwhois.caching.whois_server_cache import server_cache from pythonwhois.ratelimit.cool_down import CoolDown -from pythonwhois.response.whois_response import RawWhoisResponse +from pythonwhois.response.raw_whois_response import RawWhoisResponse incomplete_result_message = "THE_WHOIS_ORACLE_INCOMPLETE_RESULT" @@ -75,7 +75,7 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals # That's probably as far as we can go, the road ends here return build_return_value(with_server_list, new_list, server_list) elif whois_response.request_failure: - # Mark this result as incomplete, so we can try again later + # Mark this result as incomplete, so we can try again later but still use the data if we have any new_list = [incomplete_result_message] + previous cool_down_tracker.warn_limit_exceeded(target_server) return build_return_value(with_server_list, new_list, server_list) @@ -213,17 +213,18 @@ def get_root_server(domain): return "" -def whois_request(domain, server, port=43): +def whois_request(domain, server, port=43, timeout=10): """ - Request WHOIS information. Has a timeout of 10 seconds + Request WHOIS information. :param domain: The domain to request WHOIS information for :param server: The WHOIS server to use :param port: The port to use, 43 by default + :param timeout: The length of the time out, 10 seconds by default :return: A WHOIS response containing either the result, or containing information about the failure """ try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.settimeout(10) + sock.settimeout(timeout) sock.connect((server, port)) sock.send(("%s\r\n" % domain).encode("utf-8")) buff = b"" diff --git a/pythonwhois/response/whois_response.py b/pythonwhois/response/raw_whois_response.py similarity index 100% rename from pythonwhois/response/whois_response.py rename to pythonwhois/response/raw_whois_response.py From 2d5e2cac71462307ef240c7e2cabf589a7dcb57c Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Wed, 15 Jun 2016 17:37:30 +0200 Subject: [PATCH 30/40] REF: Removed the fix for parsing empty responses and moved it to the whois application that uses this REF: Processed Wytse his comments --- pythonwhois/net.py | 6 +++--- pythonwhois/parse.py | 2 -- pythonwhois/ratelimit/cool_down.py | 6 +++--- pythonwhois/response/raw_whois_response.py | 10 ++++------ 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 83ccdee..4e516a1 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -79,7 +79,7 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals new_list = [incomplete_result_message] + previous cool_down_tracker.warn_limit_exceeded(target_server) return build_return_value(with_server_list, new_list, server_list) - elif whois_response.cool_down_failure: + elif whois_response.still_in_cool_down: new_list = [incomplete_result_message] + previous return build_return_value(with_server_list, new_list, server_list) @@ -110,7 +110,7 @@ def build_return_value(with_server_list, responses, server_list): :param server_list: The server list :return: A list of responses without the empty ones, plus possibly a server list """ - non_empty_responses = filter((lambda text: text is not '' and text is not None), responses) + non_empty_responses = filter((lambda text: text), responses) if with_server_list: return non_empty_responses, server_list @@ -129,7 +129,7 @@ def query_server(whois_server, query): if whois_server and cool_down_tracker.try_to_use_server(whois_server): return whois_request(query, whois_server) else: - return RawWhoisResponse(cool_down_failure=True) + return RawWhoisResponse(still_in_cool_down=True) def prepare_query(whois_server, domain): diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index 1125587..025211c 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -702,8 +702,6 @@ def filter_characters(string, delete_characters): def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_server=""): - if len(raw_data) == 0: - return {} normalized = normalized or [] data = {} diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index 98c5929..b658864 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -14,7 +14,7 @@ def __init__(self): Creates a dictionary for storing cool downs. """ self.servers_on_cool_down = {} - self.default_cool_down_length = 2.0 + self.default_cool_down_seconds = 2.0 self.last_request_time = datetime.datetime.now() def can_use_server(self, whois_server): @@ -39,7 +39,7 @@ def try_to_use_server(self, whois_server): return False if whois_server not in self.servers_on_cool_down: - self.servers_on_cool_down[whois_server] = CoolDownTracker(self.default_cool_down_length) + self.servers_on_cool_down[whois_server] = CoolDownTracker(self.default_cool_down_seconds) self.servers_on_cool_down[whois_server].use_whois_server() return True @@ -78,6 +78,6 @@ def set_cool_down_config(self, path_to_file): the cool down dictionary. :param path_to_file: The path to the configuration file """ - cool_down_config = CoolDownConfig(path_to_file, self.default_cool_down_length) + cool_down_config = CoolDownConfig(path_to_file, self.default_cool_down_seconds) for whois_server in cool_down_config.get_sections(): self.servers_on_cool_down[whois_server] = cool_down_config.get_cool_down_tracker_for_server(whois_server) diff --git a/pythonwhois/response/raw_whois_response.py b/pythonwhois/response/raw_whois_response.py index 8c0942f..e0c3a50 100644 --- a/pythonwhois/response/raw_whois_response.py +++ b/pythonwhois/response/raw_whois_response.py @@ -3,16 +3,16 @@ class RawWhoisResponse: Holder class for WHOIS responses. Is capable of marking the retrieval as a failure. """ - def __init__(self, response="", request_failure=False, cool_down_failure=False, server_is_dead=False): + def __init__(self, response="", request_failure=False, still_in_cool_down=False, server_is_dead=False): """ Hold the WHOIS response :param response: The received response, if any :param request_failure: If the request was a failure - :param cool_down_failure: Whether the server was unavailable due to a cool down or not + :param still_in_cool_down: Whether the server was unavailable due to a cool down or not """ self.response = response self.request_failure = request_failure - self.cool_down_failure = cool_down_failure + self.still_in_cool_down = still_in_cool_down self.server_is_dead = server_is_dead if len(response) > 0: @@ -26,6 +26,4 @@ def check_for_exceeded_limit(self): the WHOIS server :return: True if the message is really short, false if not """ - if self.response is not None and len(self.response.splitlines()) < 4: - return True - return False + return self.response is not None and len(self.response.splitlines()) < 4 From b0f201e9797179e61c31076009d9750d1ee4253e Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Thu, 16 Jun 2016 10:01:18 +0200 Subject: [PATCH 31/40] ENH: Wording in a comment --- pythonwhois/ratelimit/cool_down_tracker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythonwhois/ratelimit/cool_down_tracker.py b/pythonwhois/ratelimit/cool_down_tracker.py index cf42927..44a5650 100644 --- a/pythonwhois/ratelimit/cool_down_tracker.py +++ b/pythonwhois/ratelimit/cool_down_tracker.py @@ -60,7 +60,7 @@ def max_requests_reached(self, limit): def double_cool_down(self): """ Double the cool down length, as in, the cool down length that is always used, - not the current cool down that is going on. + not the current cool down that happening. """ self.cool_down_length *= 2 self.start_cool_down() From 6300274d013dd0eb9065280d8e547635084ec095 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Fri, 17 Jun 2016 11:08:49 +0200 Subject: [PATCH 32/40] FIX: Fixed failure for gg domains --- pythonwhois/parse.py | 18 ++++++++++-------- setup.py | 3 ++- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index 025211c..ac06852 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -786,16 +786,18 @@ def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_ if match is not None: data["registrar"] = [match.group(1).strip()] match = re.search("(?:Domain nameservers|Name servers):([\s\S]*?\n)\n", segment) - if match is not None: + if match: chunk = match.group(1) for match in re.findall("\s+?(.+)\n", chunk): - match = match.split()[0] - # Prevent nameserver aliases from being picked up. - if not match.startswith("[") and not match.endswith("]"): - try: - data["nameservers"].append(match.strip()) - except KeyError as e: - data["nameservers"] = [match.strip()] + match = match.split() + if match: + match = match[0] + # Prevent nameserver aliases from being picked up. + if not match.startswith("[") and not match.endswith("]"): + try: + data["nameservers"].append(match.strip()) + except KeyError as e: + data["nameservers"] = [match.strip()] # The .ie WHOIS server puts ambiguous status information in an unhelpful order match = re.search('ren-status:\s*(.+)', segment) if match is not None: diff --git a/setup.py b/setup.py index b02bc83..be9e24e 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,9 @@ from setuptools import setup setup(name='whois-oracle', - version='1.0.3', + version='1.1.1', description='Module for retrieving and parsing the WHOIS data for a domain. Supports most domains. No dependencies.', + keywords='whois cool down', author='Sander ten Hoor, original by Sven Slootweg', url='https://github.com/MasterFenrir/whois-oracle', packages=['pythonwhois', 'pythonwhois.caching', 'pythonwhois.ratelimit', 'pythonwhois.response'], From aaa7a6cba8d280f1e53d1b4498387180be0bdb0e Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Fri, 17 Jun 2016 13:08:42 +0200 Subject: [PATCH 33/40] REF: Changed the default cooldown to 4 seconds REF: Changed the time out for a WHOIS request to 3 seconds --- pythonwhois/net.py | 4 ++-- pythonwhois/ratelimit/cool_down.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 4e516a1..6449d7b 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -213,13 +213,13 @@ def get_root_server(domain): return "" -def whois_request(domain, server, port=43, timeout=10): +def whois_request(domain, server, port=43, timeout=3): """ Request WHOIS information. :param domain: The domain to request WHOIS information for :param server: The WHOIS server to use :param port: The port to use, 43 by default - :param timeout: The length of the time out, 10 seconds by default + :param timeout: The length of the time out, 3 seconds by default :return: A WHOIS response containing either the result, or containing information about the failure """ try: diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index b658864..0830626 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -14,7 +14,7 @@ def __init__(self): Creates a dictionary for storing cool downs. """ self.servers_on_cool_down = {} - self.default_cool_down_seconds = 2.0 + self.default_cool_down_seconds = 4.0 self.last_request_time = datetime.datetime.now() def can_use_server(self, whois_server): @@ -79,5 +79,6 @@ def set_cool_down_config(self, path_to_file): :param path_to_file: The path to the configuration file """ cool_down_config = CoolDownConfig(path_to_file, self.default_cool_down_seconds) + self.default_cool_down_seconds = cool_down_config.cool_down_length for whois_server in cool_down_config.get_sections(): self.servers_on_cool_down[whois_server] = cool_down_config.get_cool_down_tracker_for_server(whois_server) From 4a44d4d30c8c03f083e0bb1dd3a22ffc44047cab Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Fri, 17 Jun 2016 13:35:34 +0200 Subject: [PATCH 34/40] REF: Version increase --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index be9e24e..31e2e80 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup setup(name='whois-oracle', - version='1.1.1', + version='1.1.2', description='Module for retrieving and parsing the WHOIS data for a domain. Supports most domains. No dependencies.', keywords='whois cool down', author='Sander ten Hoor, original by Sven Slootweg', From 3092c8aa862637751044bc6b229cb77b280b1b96 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Fri, 17 Jun 2016 14:32:25 +0200 Subject: [PATCH 35/40] DEL Checking whether the referral server is alive now only happens in case of time out, because some servers block pinging which would skew the results. --- pythonwhois/net.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 6449d7b..4684b1c 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -93,7 +93,7 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals if match is not None: referal_server = match.group(2) if referal_server != server and "://" not in referal_server \ - and "www." not in referal_server and server_is_alive(referal_server): + and "www." not in referal_server: # We want to ignore anything non-WHOIS (eg. HTTP) for now, and servers that are not reachable # Referal to another WHOIS server... return get_whois_raw(domain, referal_server, new_list, server_list=server_list, From 08f9be9da281256270fed27d103c79ede4078021 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Fri, 17 Jun 2016 15:17:34 +0200 Subject: [PATCH 36/40] FIX: postal -> postalcode --- pythonwhois/parse.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index ac06852..9c46b99 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -524,7 +524,7 @@ def allow_trailing_comma_dict(regexes): "street": "Registrant[ \t\S]*Street:(?P<street>.*)", "city": "Registrant[ \t\S]*City:(?P<city>.*)", "state": "Registrant[ \t\S]*State:(?P<state>.*)", - "postalcode": "Registrant[ \t\S]*Postal Code:(?P<postal>.*)", + "postalcode": "Registrant[ \t\S]*Postal Code:(?P<postalcode>.*)", "country": "Registrant[ \t\S]*Country:(?P<country>.*)", "phone": "Registrant[ \t\S]*Phone:(?P<phone>.*)", "fax": "Registrant[ \t\S]*Fax:(?P<fax>.*)", @@ -538,7 +538,7 @@ def allow_trailing_comma_dict(regexes): "street": "Admin[ \t\S]*Street:(?P<street>.*)", "city": "Admin[ \t\S]*City:(?P<city>.*)", "state": "Admin[ \t\S]*State:(?P<state>.*)", - "postalcode": "Admin[ \t\S]*Postal Code:(?P<postal>.*)", + "postalcode": "Admin[ \t\S]*Postal Code:(?P<postalcode>.*)", "country": "Admin[ \t\S]*Country:(?P<country>.*)", "phone": "Admin[ \t\S]*Phone:(?P<phone>.*)", "fax": "Admin[ \t\S]*Fax:(?P<fax>.*)", @@ -552,7 +552,7 @@ def allow_trailing_comma_dict(regexes): "street": "Billing[ \t\S]*Street:(?P<street>.*)", "city": "Billing[ \t\S]*City:(?P<city>.*)", "state": "Billing[ \t\S]*State:(?P<state>.*)", - "postalcode": "Billing[ \t\S]*Postal Code:(?P<postal>.*)", + "postalcode": "Billing[ \t\S]*Postal Code:(?P<postalcode>.*)", "country": "Billing[ \t\S]*Country:(?P<country>.*)", "phone": "Billing[ \t\S]*Phone:(?P<phone>.*)", "fax": "Billing[ \t\S]*Fax:(?P<fax>.*)", @@ -566,7 +566,7 @@ def allow_trailing_comma_dict(regexes): "street": "Tech[ \t\S]*Street:(?P<street>.*)", "city": "Tech[ \t\S]*City:(?P<city>.*)", "state": "Tech[ \t\S]*State:(?P<state>.*)", - "postalcode": "Tech[ \t\S]*Postal Code:(?P<postal>.*)", + "postalcode": "Tech[ \t\S]*Postal Code:(?P<postalcode>.*)", "country": "Tech[ \t\S]*Country:(?P<country>.*)", "phone": "Tech[ \t\S]*Phone:(?P<phone>.*)", "fax": "Tech[ \t\S]*Fax:(?P<fax>.*)", From 91bd7107150eddf11b7e3ee8fc08d7bb0b6ec43a Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Mon, 20 Jun 2016 11:03:42 +0200 Subject: [PATCH 37/40] REF: Changed the increase in cool down rate from 2 to 1.5 ADD: If a limit is reached but the WHOIS server has already not been used for that time (say, the minute limit is reached but the server hasn't been used for a minute either) the cool down will be normal instead --- pythonwhois/ratelimit/cool_down.py | 2 +- pythonwhois/ratelimit/cool_down_tracker.py | 14 ++++++++------ setup.py | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index 0830626..81d5df4 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -58,7 +58,7 @@ def warn_limit_exceeded(self, whois_server): will be longer next time :param whois_server: The WHOIS server the limit has been exceeded for """ - self.servers_on_cool_down[whois_server].double_cool_down() + self.servers_on_cool_down[whois_server].increase_cool_down() def get_time_difference(self): """ diff --git a/pythonwhois/ratelimit/cool_down_tracker.py b/pythonwhois/ratelimit/cool_down_tracker.py index 44a5650..a1f17b0 100644 --- a/pythonwhois/ratelimit/cool_down_tracker.py +++ b/pythonwhois/ratelimit/cool_down_tracker.py @@ -32,11 +32,12 @@ def start_cool_down(self): """ Start a new cool_down """ - if self.max_requests_reached(self.max_requests_day): + time_passed = self.cool_down_length - self.current_cool_down + if time_passed < 86400 and self.max_requests_reached(self.max_requests_day): self.current_cool_down = 86400 - elif self.max_requests_reached(self.max_requests_hour): + elif time_passed < 3600 and self.max_requests_reached(self.max_requests_hour): self.current_cool_down = 3600 - elif self.max_requests_reached(self.max_requests_minute): + elif time_passed < 60 and self.max_requests_reached(self.max_requests_minute): self.current_cool_down = 60 else: self.current_cool_down = self.cool_down_length @@ -57,10 +58,11 @@ def max_requests_reached(self, limit): """ return limit is not None and self.request_count % limit == 0 - def double_cool_down(self): + def increase_cool_down(self): """ - Double the cool down length, as in, the cool down length that is always used, + Increase the cool down length, as in, the cool down length that is always used, not the current cool down that happening. + The cool down length is multiplied by 1.5 """ - self.cool_down_length *= 2 + self.cool_down_length *= 1.5 self.start_cool_down() diff --git a/setup.py b/setup.py index 31e2e80..ccdb824 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup setup(name='whois-oracle', - version='1.1.2', + version='1.1.3', description='Module for retrieving and parsing the WHOIS data for a domain. Supports most domains. No dependencies.', keywords='whois cool down', author='Sander ten Hoor, original by Sven Slootweg', From 9842f54aab172521c15528e2b7b5b7a9333b7079 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Mon, 20 Jun 2016 20:30:00 +0200 Subject: [PATCH 38/40] ADD: WhoisResult. Contains the list of responses, whether the list is complete and whether there is a WHOIS server available. This is what is returned from get_whois_raw REF: When no WHOIS server is available, just return with a WhoisResult instance describing it --- pythonwhois/__init__.py | 8 ++--- pythonwhois/net.py | 33 ++++++++++--------- ...ois_response.py => raw_response_holder.py} | 2 +- pythonwhois/response/whois_results.py | 12 +++++++ whois-oracle | 4 ++- 5 files changed, 37 insertions(+), 22 deletions(-) rename pythonwhois/response/{raw_whois_response.py => raw_response_holder.py} (97%) create mode 100644 pythonwhois/response/whois_results.py diff --git a/pythonwhois/__init__.py b/pythonwhois/__init__.py index c56c784..00ee715 100644 --- a/pythonwhois/__init__.py +++ b/pythonwhois/__init__.py @@ -2,15 +2,15 @@ def get_whois(domain, normalized=[]): - raw_data, server_list = net.get_whois_raw(domain, with_server_list=True) + final_result = net.get_whois_raw(domain, with_server_list=True) # Unlisted handles will be looked up on the last WHOIS server that was queried. This may be changed to also query # other servers in the future, if it turns out that there are cases where the last WHOIS server in the chain doesn't # actually hold the handle contact details, but another WHOIS server in the chain does. - if len(server_list) > 0: - handle_server = server_list[-1] + if len(final_result.server_list) > 0: + handle_server = final_result.server_list[-1] else: handle_server = "" - return parse.parse_raw_whois(raw_data, normalized=normalized, never_query_handles=False, + return parse.parse_raw_whois(final_result.responses, normalized=normalized, never_query_handles=False, handle_server=handle_server) diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 4684b1c..65bf74e 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -7,9 +7,8 @@ from pythonwhois.caching.whois_server_cache import server_cache from pythonwhois.ratelimit.cool_down import CoolDown -from pythonwhois.response.raw_whois_response import RawWhoisResponse - -incomplete_result_message = "THE_WHOIS_ORACLE_INCOMPLETE_RESULT" +from pythonwhois.response.raw_response_holder import RawResponseHolder +from pythonwhois.response.whois_results import WhoisResult cool_down_tracker = CoolDown() @@ -49,6 +48,8 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals domain = encode(domain, "idna").decode("ascii") target_server = get_target_server(domain, previous, server) + if not target_server: + return build_return_value(with_server_list, [], server_list, True, False) query = prepare_query(target_server, domain) whois_response = query_server(target_server, query) response = whois_response.response @@ -73,15 +74,13 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals if whois_response.server_is_dead: # That's probably as far as we can go, the road ends here - return build_return_value(with_server_list, new_list, server_list) + return build_return_value(with_server_list, new_list, server_list, True, True, ) elif whois_response.request_failure: # Mark this result as incomplete, so we can try again later but still use the data if we have any - new_list = [incomplete_result_message] + previous cool_down_tracker.warn_limit_exceeded(target_server) - return build_return_value(with_server_list, new_list, server_list) + return build_return_value(with_server_list, new_list, server_list, False, True) elif whois_response.still_in_cool_down: - new_list = [incomplete_result_message] + previous - return build_return_value(with_server_list, new_list, server_list) + return build_return_value(with_server_list, new_list, server_list, False, True) server_list.append(target_server) @@ -92,9 +91,9 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals re.IGNORECASE) if match is not None: referal_server = match.group(2) - if referal_server != server and "://" not in referal_server \ + if referal_server != target_server and "://" not in referal_server \ and "www." not in referal_server: - # We want to ignore anything non-WHOIS (eg. HTTP) for now, and servers that are not reachable + # We want to ignore anything non-WHOIS (eg. HTTP) for now # Referal to another WHOIS server... return get_whois_raw(domain, referal_server, new_list, server_list=server_list, with_server_list=with_server_list) @@ -102,20 +101,22 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals return build_return_value(with_server_list, new_list, server_list) -def build_return_value(with_server_list, responses, server_list): +def build_return_value(with_server_list, responses, server_list, complete=True, whois_server_available=True): """ Create a return value :param with_server_list: Whether the server list should be returned as well :param responses: The list of responses :param server_list: The server list + :param complete: Whether the result was complete or not + :param whois_server_available: Whether there was a WHOIS server available :return: A list of responses without the empty ones, plus possibly a server list """ non_empty_responses = filter((lambda text: text), responses) if with_server_list: - return non_empty_responses, server_list + return WhoisResult(non_empty_responses, complete, whois_server_available, server_list) else: - return non_empty_responses + return WhoisResult(non_empty_responses, complete, whois_server_available) def query_server(whois_server, query): @@ -129,7 +130,7 @@ def query_server(whois_server, query): if whois_server and cool_down_tracker.try_to_use_server(whois_server): return whois_request(query, whois_server) else: - return RawWhoisResponse(still_in_cool_down=True) + return RawResponseHolder(still_in_cool_down=True) def prepare_query(whois_server, domain): @@ -233,7 +234,7 @@ def whois_request(domain, server, port=43, timeout=3): if len(data) == 0: break buff += data - return RawWhoisResponse(buff.decode("utf-8", "replace")) + return RawResponseHolder(buff.decode("utf-8", "replace")) except Exception: server_is_dead = not server_is_alive(server) - return RawWhoisResponse(request_failure=True, server_is_dead=server_is_dead) + return RawResponseHolder(request_failure=True, server_is_dead=server_is_dead) diff --git a/pythonwhois/response/raw_whois_response.py b/pythonwhois/response/raw_response_holder.py similarity index 97% rename from pythonwhois/response/raw_whois_response.py rename to pythonwhois/response/raw_response_holder.py index e0c3a50..fd265e6 100644 --- a/pythonwhois/response/raw_whois_response.py +++ b/pythonwhois/response/raw_response_holder.py @@ -1,4 +1,4 @@ -class RawWhoisResponse: +class RawResponseHolder: """ Holder class for WHOIS responses. Is capable of marking the retrieval as a failure. """ diff --git a/pythonwhois/response/whois_results.py b/pythonwhois/response/whois_results.py new file mode 100644 index 0000000..6d0fcde --- /dev/null +++ b/pythonwhois/response/whois_results.py @@ -0,0 +1,12 @@ +class WhoisResult: + """ + Holder class for the final results. This includes all the retrieved WHOIS responses, + whether this is the complete list of responses available and whether there is a WHOIS + server available at all. + """ + + def __init__(self, responses, complete=True, whois_server_available=True, server_list=None): + self.responses = responses + self.complete = complete + self.whois_server_available = whois_server_available + self.server_list = server_list diff --git a/whois-oracle b/whois-oracle index d4074e3..c0dcd88 100755 --- a/whois-oracle +++ b/whois-oracle @@ -32,7 +32,9 @@ def json_fallback(obj): if args.file is None: - data, server_list = pythonwhois.net.get_whois_raw(args.domain[0], with_server_list=True) + final_result = pythonwhois.net.get_whois_raw(args.domain[0], with_server_list=True) + data = final_result.responses + server_list = final_result.server_list else: server_list = [] with open(args.file, "r") as f: From e1e572878981b5639f5e07abbece366a735cf46b Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Tue, 21 Jun 2016 11:42:50 +0200 Subject: [PATCH 39/40] ADD: Added a new method for compatibility with the original pythonwhois --- pythonwhois/__init__.py | 2 +- pythonwhois/net.py | 13 +++++++++++-- whois-oracle | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/pythonwhois/__init__.py b/pythonwhois/__init__.py index 00ee715..e9d096c 100644 --- a/pythonwhois/__init__.py +++ b/pythonwhois/__init__.py @@ -2,7 +2,7 @@ def get_whois(domain, normalized=[]): - final_result = net.get_whois_raw(domain, with_server_list=True) + final_result = net.get_whois_raw_wrapped(domain, with_server_list=True) # Unlisted handles will be looked up on the last WHOIS server that was queried. This may be changed to also query # other servers in the future, if it turns out that there are cases where the last WHOIS server in the chain doesn't # actually hold the handle contact details, but another WHOIS server in the chain does. diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 65bf74e..3d30219 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -38,6 +38,15 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=False, with_server_list=False, server_list=None): + final_result = get_whois_raw_wrapped(domain, server, previous, rfc3490, never_cut, with_server_list, server_list) + if with_server_list: + return final_result.responses, final_result.server_list + else: + return final_result.responses + + +def get_whois_raw_wrapped(domain, server="", previous=None, rfc3490=True, never_cut=False, with_server_list=False, + server_list=None): previous = previous or [] server_list = server_list or [] @@ -95,8 +104,8 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals and "www." not in referal_server: # We want to ignore anything non-WHOIS (eg. HTTP) for now # Referal to another WHOIS server... - return get_whois_raw(domain, referal_server, new_list, server_list=server_list, - with_server_list=with_server_list) + return get_whois_raw_wrapped(domain, referal_server, new_list, server_list=server_list, + with_server_list=with_server_list) return build_return_value(with_server_list, new_list, server_list) diff --git a/whois-oracle b/whois-oracle index c0dcd88..10d172e 100755 --- a/whois-oracle +++ b/whois-oracle @@ -32,7 +32,7 @@ def json_fallback(obj): if args.file is None: - final_result = pythonwhois.net.get_whois_raw(args.domain[0], with_server_list=True) + final_result = pythonwhois.net.get_whois_raw_wrapped(args.domain[0], with_server_list=True) data = final_result.responses server_list = final_result.server_list else: From f23688878a679e179c517036bd86e4a5c8216a60 Mon Sep 17 00:00:00 2001 From: Sander <s.ten.hoor@outlook.com> Date: Thu, 23 Jun 2016 10:34:49 +0200 Subject: [PATCH 40/40] ENH: Small version increase --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ccdb824..595532d 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup setup(name='whois-oracle', - version='1.1.3', + version='1.1.4', description='Module for retrieving and parsing the WHOIS data for a domain. Supports most domains. No dependencies.', keywords='whois cool down', author='Sander ten Hoor, original by Sven Slootweg',