Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix to get regex to work on python > 3.6 #153

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions pythonwhois/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re, sys, datetime, csv, pkgutil
from . import net, shared

try:
try:
from io import StringIO
except ImportError:
from cStringIO import StringIO
Expand All @@ -25,13 +25,13 @@ def read_dataset(filename, destination, abbrev_key, name_key, is_dict=False):
destination[line[abbrev_key]] = line[name_key]
except IOError as e:
pass

airports = {}
countries = {}
states_au = {}
states_us = {}
states_ca = {}

try:
reader = csv.reader(pkgdata("airports.dat").splitlines())

Expand All @@ -50,7 +50,7 @@ def read_dataset(filename, destination, abbrev_key, name_key, is_dict=False):

def precompile_regexes(source, flags=0):
return [re.compile(regex, flags) for regex in source]

grammar = {
"_data": {
'id': ['Domain ID:[ ]*(?P<val>.+)'],
Expand Down Expand Up @@ -201,8 +201,8 @@ def precompile_regexes(source, flags=0):
}

def preprocess_regex(regex):
# Fix for #2; prevents a ridiculous amount of varying size permutations.
regex = re.sub(r"\\s\*\(\?P<([^>]+)>\.\+\)", r"\s*(?P<\1>\S.*)", regex)
# Fix for #2; prevents a ridiculous amount of varying size permutations
regex = re.sub(r"\\s\*\(\?P<([^>]+)>\.\+\)", r"\\s*(?P<\1>\\S.*)", regex)
# Experimental fix for #18; removes unnecessary variable-size whitespace
# matching, since we're stripping results anyway.
regex = re.sub(r"\[ \]\*\(\?P<([^>]+)>\.\*\)", r"(?P<\1>.*)", regex)
Expand Down Expand Up @@ -553,7 +553,7 @@ def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_
data["nameservers"].append(match.strip())
except KeyError as e:
data["nameservers"] = [match.strip()]


data["contacts"] = parse_registrants(raw_data, never_query_handles, handle_server)

Expand Down Expand Up @@ -645,7 +645,7 @@ def normalize_data(data, normalized):
for country, source in (("united states", states_us), ("australia", states_au), ("canada", states_ca)):
if country in contact["country"].lower() and contact["state"] in source:
contact["state"] = source[contact["state"]]

for key in ("email",):
if key in contact and contact[key] is not None and (normalized == True or key in normalized):
if is_string(contact[key]):
Expand All @@ -660,7 +660,7 @@ def normalize_data(data, normalized):
for key in ("city", "organization", "state", "country"):
if key in contact and contact[key] is not None and (normalized == True or key in normalized):
contact[key] = normalize_name(contact[key], abbreviation_threshold=3, length_threshold=3)

if "name" in contact and "organization" not in contact:
lines = [x.strip() for x in contact["name"].splitlines()]
new_lines = []
Expand All @@ -674,10 +674,10 @@ def normalize_data(data, normalized):
contact["name"] = "\n".join(lines)
else:
del contact["name"]

if len(new_lines) > 0:
contact["organization"] = "\n".join(new_lines)

if "street" in contact and "organization" not in contact:
lines = [x.strip() for x in contact["street"].splitlines()]
if len(lines) > 1:
Expand All @@ -686,7 +686,7 @@ def normalize_data(data, normalized):
contact["organization"] = lines[0]
contact["street"] = "\n".join(lines[1:])
break

for key in list(contact.keys()):
try:
contact[key] = contact[key].strip(", ")
Expand Down Expand Up @@ -831,10 +831,10 @@ def remove_suffixes(data):
# Removes everything before and after the first non-whitespace continuous string.
# Used to get rid of IP suffixes for nameservers.
cleaned_list = []

for entry in data:
cleaned_list.append(re.search("([^\s]+)\s*[\s]*", entry).group(1).lstrip())

return cleaned_list

def parse_registrants(data, never_query_handles=True, handle_server=""):
Expand Down Expand Up @@ -911,7 +911,7 @@ def parse_registrants(data, never_query_handles=True, handle_server=""):
elif category == "admin":
admin_contact = data_reference
break

# Post-processing
for obj in (registrant, tech_contact, billing_contact, admin_contact):
if obj is not None:
Expand Down Expand Up @@ -986,18 +986,18 @@ def fetch_nic_contact(handle, lookup_server):
response = net.get_whois_raw(handle, lookup_server)
response = [segment.replace("\r", "") for segment in response] # Carriage returns are the devil
results = parse_nic_contact(response)

if len(results) > 0:
return results[0]
else:
raise shared.WhoisException("No contact data found in the response.")

def parse_nic_contact(data):
handle_contacts = []
for regex in nic_contact_regexes:
for segment in data:
matches = re.finditer(regex, segment)
for match in matches:
handle_contacts.append(match.groupdict())

return handle_contacts