woodgern · varun-dhar · Jul 1, 2021 · Jul 1, 2021 · Jul 5, 2021 · Jul 5, 2021
diff --git a/confusables/__init__.py b/confusables/__init__.py
@@ -4,12 +4,11 @@
 from itertools import product
 
 from .config import CONFUSABLE_MAPPING_PATH, NON_NORMAL_ASCII_CHARS
-from .utils import is_ascii
 
 
 # read confusable mappings from file, build 2-way map of the pairs
 with open(os.path.join(os.path.dirname(__file__), CONFUSABLE_MAPPING_PATH), "r") as mappings:
-    CONFUSABLE_MAP = json.loads(mappings.readline())
+    CONFUSABLE_MAP = json.load(mappings)
 
 
 def is_confusable(str1, str2):
@@ -52,20 +51,20 @@ def confusable_regex(string, include_character_padding=False):
     return regex
 
 def normalize(string, prioritize_alpha=False):
-    normal_forms = set([""])
+    normal_forms = {""}
     for char in string:
         normalized_chars = []
         confusable_chars = confusable_characters(char)
-        if not is_ascii(char) or not char.isalpha():
+        if not (char.isascii() and char.isalnum()):
             for confusable in confusable_chars:
                 if prioritize_alpha:
-                    if ((char.isalpha() and confusable.isalpha() and is_ascii(confusable)) or (not char.isalpha() and is_ascii(confusable))) and confusable not in NON_NORMAL_ASCII_CHARS:
+                    if ((char.isalpha() and confusable.isalpha() and confusable.isascii()) or (not char.isalpha() and confusable.isascii())) and confusable not in NON_NORMAL_ASCII_CHARS:
                         normal = confusable
                         if len(confusable) > 1:
                             normal = normalize(confusable)[0]
                         normalized_chars.append(normal)
                 else:
-                    if is_ascii(confusable) and confusable not in NON_NORMAL_ASCII_CHARS:
+                    if confusable.isascii() and confusable not in NON_NORMAL_ASCII_CHARS:
                         normal = confusable
                         if len(confusable) > 1:
                             normal = normalize(confusable)[0]
@@ -75,5 +74,5 @@ def normalize(string, prioritize_alpha=False):
 
         if len(normalized_chars) == 0:
             normalized_chars = [char]
-        normal_forms = set([x[0]+x[1].lower() for x in list(product(normal_forms, normalized_chars))])
+        normal_forms = {x[0]+x[1].lower() for x in list(product(normal_forms, normalized_chars))}
     return sorted(list(normal_forms))
diff --git a/confusables/parse.py b/confusables/parse.py
@@ -13,7 +13,7 @@ def _get_accented_characters(char):
 def _get_confusable_chars(character, unicode_confusable_map, depth):
     mapped_chars = unicode_confusable_map[character]
 
-    group = set([character])
+    group = {character}
     if depth <= MAX_SIMILARITY_DEPTH:
         for mapped_char in mapped_chars:
             group.update(_get_confusable_chars(mapped_char, unicode_confusable_map, depth + 1))
@@ -40,30 +40,28 @@ def parse_new_mapping_file():
                 if unicode_confusable_map.get(str1):
                     unicode_confusable_map[str1].add(str2)
                 else:
-                    unicode_confusable_map[str1] = set([str2])
+                    unicode_confusable_map[str1] = {str2}
 
                 if unicode_confusable_map.get(str2):
                     unicode_confusable_map[str2].add(str1)
                 else:
-                    unicode_confusable_map[str2] = set([str1])
+                    unicode_confusable_map[str2] = {str1}
 
                 if len(str1) == 1:
-                    case_change = str1.lower() if str1.isupper() else str1.upper()
-                    if case_change != str1:
-                        unicode_confusable_map[str1].add(case_change)
-                        if unicode_confusable_map.get(case_change) is not None:
-                            unicode_confusable_map[case_change].add(str1)
-                        else:
-                            unicode_confusable_map[case_change] = set([str1])
+                    case_change = str1.swapcase()
+                    unicode_confusable_map[str1].add(case_change)
+                    if unicode_confusable_map.get(case_change) is not None:
+                        unicode_confusable_map[case_change].add(str1)
+                    else:
+                        unicode_confusable_map[case_change] = {str1}
 
                 if len(str2) == 1:
-                    case_change = str2.lower() if str2.isupper() else str2.upper()
-                    if case_change != str2:
-                        unicode_confusable_map[str2].add(case_change)
-                        if unicode_confusable_map.get(case_change) is not None:
-                            unicode_confusable_map[case_change].add(str2)
-                        else:
-                            unicode_confusable_map[case_change] = set([str2])
+                    case_change = str2.swapcase()
+                    unicode_confusable_map[str2].add(case_change)
+                    if unicode_confusable_map.get(case_change) is not None:
+                        unicode_confusable_map[case_change].add(str2)
+                    else:
+                        unicode_confusable_map[case_change] = {str2}
 
     for char in string.ascii_lowercase:
         accented = _get_accented_characters(char)
@@ -72,7 +70,7 @@ def parse_new_mapping_file():
             if unicode_confusable_map.get(accent):
                 unicode_confusable_map[accent].add(char)
             else:
-                unicode_confusable_map[accent] = set([char])
+                unicode_confusable_map[accent] = {char}
 
     for char in string.ascii_uppercase:
         accented = _get_accented_characters(char)
@@ -81,17 +79,16 @@ def parse_new_mapping_file():
             if unicode_confusable_map.get(accent):
                 unicode_confusable_map[accent].add(char)
             else:
-                unicode_confusable_map[accent] = set([char])
+                unicode_confusable_map[accent] = {char}
 
     CONFUSABLE_MAP = {}
-    characters_to_map = list(unicode_confusable_map.keys())
-    for character in list(unicode_confusable_map.keys()):
+    for character in unicode_confusable_map.keys():
         char_group = _get_confusable_chars(character, unicode_confusable_map, 0)
 
         CONFUSABLE_MAP[character] = list(char_group)
 
     mapping_file = open(os.path.join(os.path.dirname(__file__), CONFUSABLE_MAPPING_PATH), "w")
-    mapping_file.write(json.dumps(CONFUSABLE_MAP))
+    json.dump(CONFUSABLE_MAP,mapping_file)
     mapping_file.close()
 
-parse_new_mapping_file()
+parse_new_mapping_file()
diff --git a/confusables/utils.py b/confusables/utils.py