From 5574f2afda7b8b464172e31e9999001ca90f497f Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Thu, 16 Apr 2020 13:46:11 -0700
Subject: [PATCH 001/278] add cache support with redis for embedding, ensure
 TSNE generate same results on same input

---
 kgtk/cli/text_embedding.py              | 302 +++++++++++++++---------
 kgtk/cli/text_embedding_README.md       |  13 +
 kgtk/cli/text_embedding_requirement.txt |   3 +-
 3 files changed, 203 insertions(+), 115 deletions(-)

diff --git a/kgtk/cli/text_embedding.py b/kgtk/cli/text_embedding.py
index 5e81a7422..2118da76d 100644
--- a/kgtk/cli/text_embedding.py
+++ b/kgtk/cli/text_embedding.py
@@ -22,10 +22,11 @@
 
 
 class EmbeddingVector:
-    def __init__(self, model_name=None, query_server=None):
+    def __init__(self, model_name=None, query_server=None, cache_config:dict={}):
         from sentence_transformers import SentenceTransformer,  SentencesDataset, LoggingHandler, losses, models # type: ignore
         import logging
         import re
+        self._logger = logging.getLogger(__name__)
         from collections import defaultdict
         if model_name is None:
             model_name = 'bert-base-nli-mean-tokens'
@@ -41,11 +42,25 @@ def __init__(self, model_name=None, query_server=None):
         else:
             self.model_name = model_name
             self.model = SentenceTransformer(model_name)
-        if query_server is None:
-            self.wikidata_server = "https://dsbox02.isi.edu:8888/bigdata/namespace/wdq/sparql"
+        if query_server is None or query_server == "":
+            self.wikidata_server = "https://query.wikidata.org/sparql"
         else:
             self.wikidata_server = query_server
-        self.q_nodes_descriptions = dict()
+        use_cache = cache_config.get("use_cache", False)
+        if use_cache:
+            import redis
+            host = cache_config.get("host", "dsbox01.isi.edu")
+            port = cache_config.get("port", 6379)
+            self.redis_server = redis.Redis(host=host, port=port, db=0)
+            try:
+                _ = self.redis_server.get("foo")
+                self._logger.debug("Cache server {}:{} connected!".format(host, port))
+            except:
+                self._logger.error("Cache server {}:{} is not able to be connected! Will not use cache!".format(host, port))
+                self.redis_server = None
+        else:
+            self.redis_server = None
+        self.qnodes_descriptions = dict()
         self.vectors_map = dict()
         self.vectors_2D = None
         self.gt_nodes = set()
@@ -56,7 +71,6 @@ def __init__(self, model_name=None, query_server=None):
         self.metadata = []
         self.gt_indexes = set()
         self.input_format = ""
-        self._logger = logging.getLogger(__name__)
         self.token_patern = re.compile(r"(?u)\b\w\w+\b")
 
     @staticmethod
@@ -78,14 +92,27 @@ def minDistance(word1, word2):
         return table[-1][-1]
 
 
-    def get_sentences_embedding(self, sentences: typing.List[str]):
+    def get_sentences_embedding(self, sentences: typing.List[str], qnodes: typing.List[str]):
         """
             transform a list of sentences to embedding vectors
         """
         # if sentences in self.embedding_cache:
         #     return self.embedding_cache[sentences]
         # else:
-        sentence_embeddings = self.model.encode(sentences, show_progress_bar=False)
+        from ast import literal_eval
+        if self.redis_server is not None:
+            sentence_embeddings = []
+            for each_node, each_sentence in zip(qnodes, sentences):
+                cache_res = self.redis_server.get(each_node+each_sentence)
+                if cache_res is not None:
+                    sentence_embeddings.append(literal_eval(cache_res.decode("utf-8")))
+                    # self._logger.error("{} hit!".format(each_node+each_sentence))
+                else:
+                    each_embedding = self.model.encode([each_sentence], show_progress_bar=False)
+                    sentence_embeddings.extend(each_embedding)
+                    self.redis_server.set(each_node+each_sentence, str(each_embedding[0].tolist()))
+        else:
+            sentence_embeddings = self.model.encode(sentences, show_progress_bar=False)
             # self.embedding_cache[sentences] = sentence_embeddings
         return sentence_embeddings
 
@@ -105,7 +132,7 @@ def send_sparql_query(self, query_body:str):
             results = qm.query().convert()['results']['bindings']
             return results
         except:
-            raise ValueError("Sending Sparl query to {} failed!".format(wikidata_server))
+            raise ValueError("Sending Sparl query to {} failed!".format(self.wikidata_server))
 
     def get_item_description(self, qnodes: typing.List[str]=None, target_properties:dict={}, gt_label:str=""):
         """
@@ -117,7 +144,7 @@ def get_item_description(self, qnodes: typing.List[str]=None, target_properties:
             find_all_properties = True
         else:
             find_all_properties = False
-
+        # self._logger.error(str(qnodes))
         properties_list = [[] for _ in range(4)]
         used_p_node_ids = set()
         names = ["labels", "descriptions", "isa_properties", "has_properties"]
@@ -131,97 +158,117 @@ def get_item_description(self, qnodes: typing.List[str]=None, target_properties:
             elif v == "has_properties":
                 properties_list[3].append(k)
 
-        need_find_label = "label" in properties_list[0]
-        need_find_description = "description" in properties_list[1]
-        query_qnodes = ""
-        for each in qnodes:
-            query_qnodes += "wd:{} ".format(each)
-
-        # this is used to get corresponding labels / descriptions
-        if need_find_label or need_find_description:
-            query_body = """
-                select ?item ?itemDescription ?itemLabel
-                where {
-                  values ?item {""" + query_qnodes + """ }
-                     SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
-                }
-            """
-            results = self.send_sparql_query(query_body)
-            for each in results:
-                each_node = each['item']['value'].split("/")[-1]
-                if 'itemDescription' in each:
-                    description = each['itemDescription']['value']
-                else:
-                    description = ""
-                if "itemLabel" in each:
-                    label = each['itemLabel']['value']
-                    # if each_node == self.gt[gt_label]:
-                    #     if self.minDistance(label, gt_label) > len(gt_label):
-                    #         a = "".join(self.token_patern.findall(label.lower()))
-                    #         b = "".join(self.token_patern.findall(gt_label.lower()))
-                    #         if a not in b and b not in a:
-                    #             self._logger.error("{} with {} --> {} edit distance too larger!!!".format(each_node, label, gt_label))
-                else:
-                    label = ""
-                if need_find_label:
-                    self.candidates[each_node]["label_properties"] = [label]
-                if need_find_description:
-                    self.candidates[each_node]["description_properties"] = [description]
-
-        # this is used to get corresponding P node labels
-        query_body2 = "select ?item"
-        part2 = ""
-        for name, part in zip(names, properties_list):
-            for i, each in enumerate(part):
-                if each not in {"label", "description", "all"}:
-                    used_p_node_ids.add(each)
-                    query_body2 += " ?{}_{}Label".format(name, i)
-                    part2 += """?item wdt:{} ?{}_{}. \n""".format(each, name, i)
-        query_body2 += """
+        sentences_cache_dict = {}
+        if self.redis_server is not None:
+            for each_node in qnodes:
+                cache_res = self.redis_server.get(each_node+str(properties_list))
+                if cache_res is not None:
+                    sentences_cache_dict[each_node] = cache_res
+                    # self._logger.error("{} hit!".format(each_node+str(properties_list)))
+
+        if len(sentences_cache_dict) > 0:
+            qnodes = set(qnodes) - set(sentences_cache_dict.keys())
+
+        # only need to do query when we still have remained nodes
+        if len(qnodes) > 0:
+            need_find_label = "label" in properties_list[0]
+            need_find_description = "description" in properties_list[1]
+            query_qnodes = ""
+            for each in qnodes:
+                query_qnodes += "wd:{} ".format(each)
+
+            # this is used to get corresponding labels / descriptions
+            if need_find_label or need_find_description:
+                query_body = """
+                    select ?item ?itemDescription ?itemLabel
                     where {
-                      values ?item {""" + query_qnodes + "}" 
-
-        query_body2 += part2 + """
+                      values ?item {""" + query_qnodes + """ }
                          SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
                     }
-        """
-        results2 = self.send_sparql_query(query_body2)
-        for each in results2:
-            node_name = each['item']['value'].split("/")[-1]
+                """
+                results = self.send_sparql_query(query_body)
+                for each in results:
+                    each_node = each['item']['value'].split("/")[-1]
+                    if 'itemDescription' in each:
+                        description = each['itemDescription']['value']
+                    else:
+                        description = ""
+                    if "itemLabel" in each:
+                        label = each['itemLabel']['value']
+                        # if each_node == self.gt[gt_label]:
+                        #     if self.minDistance(label, gt_label) > len(gt_label):
+                        #         a = "".join(self.token_patern.findall(label.lower()))
+                        #         b = "".join(self.token_patern.findall(gt_label.lower()))
+                        #         if a not in b and b not in a:
+                        #             self._logger.error("{} with {} --> {} edit distance too larger!!!".format(each_node, label, gt_label))
+                    else:
+                        label = ""
+                    if need_find_label:
+                        self.candidates[each_node]["label_properties"] = [label]
+                    if need_find_description:
+                        self.candidates[each_node]["description_properties"] = [description]
+
+            # this is used to get corresponding P node labels
+            query_body2 = "select ?item"
+            part2 = ""
             for name, part in zip(names, properties_list):
-                if len(part) > 0:
-                    properties_res = set()
-                    for i in range(len(part)):
-                        property_key = '{}_{}Label'.format(name, i)
-                        if property_key in each:
-                            properties_res.add(each[property_key]['value'])
-                    self.candidates[node_name][name] = properties_res
-
-        # if need get all properties, we need to run extra query
-        if find_all_properties:
-            query_body3 = """
-                select DISTINCT ?item ?p_entity ?p_entityLabel
-                where {
-                  values ?item {"""+ query_qnodes + """}
-                  ?item ?p ?o.
-                  FILTER regex(str(?p), "^http://www.wikidata.org/prop/P", "i")
-                  BIND (IRI(REPLACE(STR(?p), "http://www.wikidata.org/prop", "http://www.wikidata.org/entity")) AS ?p_entity) .
-                  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
-                }
+                for i, each in enumerate(part):
+                    if each not in {"label", "description", "all"}:
+                        used_p_node_ids.add(each)
+                        query_body2 += " ?{}_{}Label".format(name, i)
+                        part2 += """?item wdt:{} ?{}_{}. \n""".format(each, name, i)
+            query_body2 += """
+                        where {
+                          values ?item {""" + query_qnodes + "}" 
+
+            query_body2 += part2 + """
+                             SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
+                        }
             """
-            results3 = self.send_sparql_query(query_body3)
-            for each in results3:
+            results2 = self.send_sparql_query(query_body2)
+            for each in results2:
                 node_name = each['item']['value'].split("/")[-1]
-                p_node_id = each['p_entity']['value'].split("/")[-1]
-                p_node_label = each['p_entityLabel']['value']
-                if p_node_id not in used_p_node_ids:
-                    if "has_properties" in self.candidates[node_name]:
-                        self.candidates[node_name]["has_properties"].add(p_node_label)
-                    else:
-                        self.candidates[node_name]["has_properties"] = set([p_node_label])
+                for name, part in zip(names, properties_list):
+                    if len(part) > 0:
+                        properties_res = set()
+                        for i in range(len(part)):
+                            property_key = '{}_{}Label'.format(name, i)
+                            if property_key in each:
+                                properties_res.add(each[property_key]['value'])
+                        self.candidates[node_name][name] = properties_res
+
+            # if need get all properties, we need to run extra query
+            if find_all_properties:
+                query_body3 = """
+                    select DISTINCT ?item ?p_entity ?p_entityLabel
+                    where {
+                      values ?item {"""+ query_qnodes + """}
+                      ?item ?p ?o.
+                      FILTER regex(str(?p), "^http://www.wikidata.org/prop/P", "i")
+                      BIND (IRI(REPLACE(STR(?p), "http://www.wikidata.org/prop", "http://www.wikidata.org/entity")) AS ?p_entity) .
+                      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
+                    }
+                """
+                results3 = self.send_sparql_query(query_body3)
+                for each in results3:
+                    node_name = each['item']['value'].split("/")[-1]
+                    p_node_id = each['p_entity']['value'].split("/")[-1]
+                    p_node_label = each['p_entityLabel']['value']
+                    if p_node_id not in used_p_node_ids:
+                        if "has_properties" in self.candidates[node_name]:
+                            self.candidates[node_name]["has_properties"].add(p_node_label)
+                        else:
+                            self.candidates[node_name]["has_properties"] = set([p_node_label])
 
         for each_node_id in qnodes:
-            self.candidates[each_node_id]["sentence"] = self.attribute_to_sentence(self.candidates[each_node_id], each_node_id)
+            each_sentence = self.attribute_to_sentence(self.candidates[each_node_id], each_node_id)
+            self.candidates[each_node_id]["sentence"] = each_sentence
+            if self.redis_server is not None:
+                # self._logger.error("Pushed: {}".format(each_node+str(properties_list)))
+                self.redis_server.set(each_node+str(properties_list), each_sentence)
+            
+        for each_node_id, sentence in sentences_cache_dict.items():
+            self.candidates[each_node_id]["sentence"] = sentence
 
 
     def read_input(self, file_path: str, skip_nodes_set: set=None, 
@@ -234,11 +281,12 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
         from collections import defaultdict
         import pandas as pd # type: ignore
         import numpy as np
+        import math
         self.property_labels_dict = property_labels_dict
 
         if input_format == "test_format":
             self.input_format = input_format
-            input_df = pd.read_csv(file_path, dtype=str)
+            input_df = pd.read_csv(file_path)
             candidates = {}
             gt = {}
             count = 0
@@ -247,13 +295,16 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
             elif "kg_id" in input_df.columns:
                 gt_column_id = "kg_id"
             else:
-                raise ValueError("Can't find ground truth id column!")
+                raise ValueError("Can't find ground truth id column! It should either named as `GT_kg_id` or `kg_id`")
 
             for _, each in input_df.iterrows():
-                temp = str(each['candidates']).split("|")
+                if each['candidates'] is np.nan or math.isnan(each['candidates']):
+                    temp = []
+                else:
+                    temp = str(each['candidates']).split("|")
                 to_remove_q = set()
                 if each[gt_column_id] is np.nan:
-                    self._logger.error("Ignore nan value form {}".format(str(each)))
+                    self._logger.warning("Ignore NaN gt value form {}".format(str(each)))
                     each[gt_column_id] = ""
                 gt_nodes = each[gt_column_id].split(" ")
                 label = str(each["label"])
@@ -273,7 +324,7 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
                 count += len(temp)
                 self.gt_nodes.add(each[gt_column_id])
                 self.get_item_description(temp, target_properties, label)
-            
+
             self._logger.info("Totally {} rows with {} candidates loaded.".format(str(len(gt)), str(count)))
 
         elif input_format == "kgtk_format":
@@ -356,6 +407,8 @@ def get_real_label_name(self, node):
 
     def attribute_to_sentence(self, v, node_id = None):
         concated_sentence = ""
+        # sort the properties to ensure the sentence always same
+        v = {key: sorted(list(value)) for key, value in v.items() if len(value) > 0}
         if "label_properties" in v and len(v["label_properties"]) > 0:
             concated_sentence += self.get_real_label_name(v["label_properties"][0])
         if "description_properties" in v and len(v["description_properties"]) > 0:
@@ -397,11 +450,12 @@ def get_vetors(self, use_cache=True, vector_dump_file=None):
         jobs_count = 0
         counter = 0
         self._logger.info("Now generating embedding vector.")
-
         for q_node, each_item in tqdm(self.candidates.items()):
             # do process for each row(one target)
             sentence = each_item["sentence"]
-            vectors = self.get_sentences_embedding([sentence])
+            if isinstance(sentence, bytes):
+                sentence = sentence.decode("utf-8")
+            vectors = self.get_sentences_embedding([sentence], [q_node])
             self.vectors_map[q_node] = vectors[0]
         self._logger.info("Totally used {} seconds.".format(str(time.time() - start_all)))
 
@@ -492,11 +546,12 @@ def plot_result(self, use_cache=True, vector_dump_file=None,
         #     self.load_vectors(vector_dump_file, "2D")
         # else:
         vectors = list(self.vectors_map.values())
+        self.vectors_map = {k: v for k, v in sorted(self.vectors_map.items(), key=lambda item: item[0], reverse=True)}
         # use tsne to reduce dimension
         if run_TSNE:
             self._logger.warning("Start running TSNE to reduce dimension. It will take a long time.")
             start = time.time()
-            self.vectors_2D = TSNE(n_components=2).fit_transform(vectors)
+            self.vectors_2D = TSNE(n_components=2, random_state=0).fit_transform(vectors)
             # self.dump_vectors(vector_dump_file, "2D")
             self._logger.info("Totally used {} seconds.".format(time.time() - start))
 
@@ -506,10 +561,7 @@ def plot_result(self, use_cache=True, vector_dump_file=None,
             vector_map_keys = list(self.vectors_map.keys())
             for each_node in self.gt_nodes:
                 gt_indexes.add(vector_map_keys.index(each_node))
-            # load the descriptions if we don't have them
-            # if len(self.q_nodes_descriptions) == 0:
-            #     for each in self.candidates.values():
-            #         _ = self.get_item_description(each)
+
             self.metadata.append("Q_nodes\tType\tLabel\tDescription")
             for i, each in enumerate(self.vectors_map.keys()):
                 label = self.q_node_to_label[each]
@@ -551,6 +603,7 @@ def evaluate_result(self):
         """
             for the ground truth nodes, evaluate the average distance to the centroid, the lower the average distance, the better clustering results should be
         """
+        import numpy as np
         centroid = None
         gt_nodes_vectors = []
         if len(self.gt_indexes) == 0:
@@ -559,19 +612,19 @@ def evaluate_result(self):
             points = self.gt_indexes
         for i, each in enumerate(self.vectors_map.keys()):
             # label = self.q_node_to_label[each]
-            # description = self.q_nodes_descriptions.get(each, "")
+            # description = self.qnodes_descriptions.get(each, "")
             if i in points:
                 if centroid is None:
-                    centroid = self.vectors_map[each]
+                    centroid = np.array(self.vectors_map[each])
                 else:
-                    centroid += self.vectors_map[each]
+                    centroid += np.array(self.vectors_map[each])
                 gt_nodes_vectors.append(self.vectors_map[each])
         centroid = centroid / len(points)
         
         distance_sum = 0
         for each in gt_nodes_vectors:
             distance_sum += self.calculate_distance(each, centroid)
-        self._logger.warning("The average distance for the ground truth nodes to centroid is {}".format(distance_sum / len(points)))
+        self._logger.info("The average distance for the ground truth nodes to centroid is {}".format(distance_sum / len(points)))
 
     @staticmethod
     def calculate_distance(a, b):
@@ -634,7 +687,7 @@ def load_black_list_files(file_path):
     import gzip
     import re
     token_patern = re.compile(r"(?u)\b\w\w+\b")
-    q_nodes_set = set()
+    qnodes_set = set()
     for each_file in file_path:
         try:
             # tar.gz file
@@ -663,13 +716,13 @@ def load_black_list_files(file_path):
                 each = each.replace("\n", "")
                 for each_part in token_patern.findall(each):
                     if each_part[0] == "Q" and each_part[1:].isnumeric():
-                        q_nodes_set.add(each_part)
+                        qnodes_set.add(each_part)
         except Exception as e:
             _logger.error("Load black list file {} failed!".format(each_file))
             _logger.debug(e, exc_info=True)
 
-    _logger.info("Totally {} black list nodes loadded.".format(len(q_nodes_set)))
-    return q_nodes_set
+    _logger.info("Totally {} black list nodes loadded.".format(len(qnodes_set)))
+    return qnodes_set
 
 
 def main(**kwargs):
@@ -724,6 +777,7 @@ def main(**kwargs):
         input_uris = kwargs.get("input_uris", [])
         output_format = kwargs.get("output_format", "kgtk_format")
         property_labels_files = kwargs.get("property_labels_file_uri", "")
+        query_server = kwargs.get("query_server")
         properties = dict()
         all_property_relate_inputs = [kwargs.get("label_properties", ["label"]), 
                                       kwargs.get("description_properties", ["description"]),
@@ -732,7 +786,10 @@ def main(**kwargs):
                                      ]
         all_required_properties = ["label_properties", "description_properties", 
                                    "isa_properties", "has_properties"]
-
+        cache_config = {"use_cache": kwargs.get("use_cache", False), 
+                        "host": kwargs.get("cache_host", "dsbox01.isi.edu"),
+                        "port": kwargs.get("cache_port", 6379)
+                        }
         for each_property, each_input in zip(all_required_properties, all_property_relate_inputs):
             for each in each_input:
                 properties[each] = each_property
@@ -769,7 +826,7 @@ def main(**kwargs):
         for each_model_name in all_models_names:
             for each_input_file in input_uris:
                 _logger.info("Running {} model on {}".format(each_model_name, each_input_file))
-                process = EmbeddingVector(each_model_name)
+                process = EmbeddingVector(each_model_name, query_server=query_server, cache_config=cache_config)
                 process.read_input(file_path=each_input_file, skip_nodes_set=black_list_set, 
                                    input_format=input_format, target_properties=properties,
                                    property_labels_dict=property_labels_dict)
@@ -849,6 +906,23 @@ def str2bool(v):
     parser.add_argument("--run-TSNE", type=str2bool, nargs='?',  action='store',
                         default=True, dest="run_TSNE",
                         help="whether to run TSNE or not after the embedding, default is true.")
+    # cache config
+    parser.add_argument("--use-cache", type=str2bool, nargs='?',  action='store',
+                        default=False, dest="use_cache",
+                        help="whether to use cache to get some embedding vectors quicker, default is False")
+    parser.add_argument("--cache-host", nargs='?',  action='store',
+                        default="dsbox01.isi.edu", dest="cache_host",
+                        help="cache host address, default is `dsbox01.isi.edu`"
+                        )
+    parser.add_argument("--cache-port", nargs='?',  action='store',
+                        default="6379", dest="cache_port",
+                        help="cache server port, default is `6379`"
+                        )
+    # query server
+    parser.add_argument("--query-server", nargs='?',  action='store',
+                        default="", dest="query_server",
+                        help="cache host address, default is https://query.wikidata.org/sparql"
+                        )
 
 
 def run(**kwargs):
diff --git a/kgtk/cli/text_embedding_README.md b/kgtk/cli/text_embedding_README.md
index 36e834268..d9e694a17 100644
--- a/kgtk/cli/text_embedding_README.md
+++ b/kgtk/cli/text_embedding_README.md
@@ -151,6 +151,19 @@ Third column is the embeded vecotrs.
 This will have embedded vectors values after running TSNE and reduced dimension to 2-dimensions for each Q nodes. This is used for visulization. (for example, you can view it at Google's online tools here: http://projector.tensorflow.org/)
 3. Metadata for the generated vectors: This will contains the metadata information for the Q nodes generated from 2 files mentioned above. It will contains the Q node value of each vector, the type (it is a `candidate` or a `ground truth` node), the given label of the Q node and corresponding fetched description information from wikidata.
 
+#### Query / cache related
+##### --query-server
+You can change the query wikidata server address when the input format is `test_format`. The default is to use wikidata official query server, but it has limit on query time and frequency. Alternatively, you can choose to use dsbox02's one as `https://dsbox02.isi.edu:8888/bigdata/namespace/wdq/sparql` (vpn needed).
+
+##### --use-cache
+If set to be true, the system will try to get the cached results for embedding computations. The default value is False, not to use cache. Basically the cache service is a Redis server.
+
+##### --cache-host
+The host address for the Redis cache service. Default is `dsbox01.isi.edu`
+
+##### --cache-port
+The host port for the Redis cache service. Default is `6379`
+
 #### Usage of vector projector
 You can apply any of the tsv vector files along with the metadata file to display it on google's tools for further experiment.
 Step 1: Click the `Load` button on the left side of the web.
diff --git a/kgtk/cli/text_embedding_requirement.txt b/kgtk/cli/text_embedding_requirement.txt
index 4247dfe7e..5783b492f 100644
--- a/kgtk/cli/text_embedding_requirement.txt
+++ b/kgtk/cli/text_embedding_requirement.txt
@@ -2,4 +2,5 @@ sentence-transformers
 sklearn
 matplotlib
 SPARQLWrapper
-torch
\ No newline at end of file
+torch
+redis
\ No newline at end of file

From 9d36b1b028575eaecd1736cddff5b4eeb32cd851 Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Thu, 16 Apr 2020 23:46:56 -0700
Subject: [PATCH 002/278] bug fix on sentence embedding

---
 kgtk/cli/text_embedding.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kgtk/cli/text_embedding.py b/kgtk/cli/text_embedding.py
index 2118da76d..94a0c3949 100644
--- a/kgtk/cli/text_embedding.py
+++ b/kgtk/cli/text_embedding.py
@@ -298,10 +298,11 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
                 raise ValueError("Can't find ground truth id column! It should either named as `GT_kg_id` or `kg_id`")
 
             for _, each in input_df.iterrows():
-                if each['candidates'] is np.nan or math.isnan(each['candidates']):
-                    temp = []
-                else:
+                if isinstance(each["candidates"], str):
                     temp = str(each['candidates']).split("|")
+                elif each['candidates'] is np.nan or math.isnan(each['candidates']):
+                    temp = []
+                    
                 to_remove_q = set()
                 if each[gt_column_id] is np.nan:
                     self._logger.warning("Ignore NaN gt value form {}".format(str(each)))

From 0ae780f900753246f2bb5852f2e6fb43fd7ea20a Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Tue, 21 Apr 2020 15:03:15 -0700
Subject: [PATCH 003/278] bug fix on text embedding

---
 kgtk/cli/text_embedding.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/cli/text_embedding.py b/kgtk/cli/text_embedding.py
index 94a0c3949..f8c650a9b 100644
--- a/kgtk/cli/text_embedding.py
+++ b/kgtk/cli/text_embedding.py
@@ -546,8 +546,8 @@ def plot_result(self, use_cache=True, vector_dump_file=None,
         #     self._logger.info("Using cached 2D vector file!")
         #     self.load_vectors(vector_dump_file, "2D")
         # else:
-        vectors = list(self.vectors_map.values())
         self.vectors_map = {k: v for k, v in sorted(self.vectors_map.items(), key=lambda item: item[0], reverse=True)}
+        vectors = list(self.vectors_map.values())
         # use tsne to reduce dimension
         if run_TSNE:
             self._logger.warning("Start running TSNE to reduce dimension. It will take a long time.")

From 8b35fb325e1bc1ea3c1ad87aba5a792486e810c1 Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Tue, 21 Apr 2020 18:03:19 -0700
Subject: [PATCH 004/278] add model name for embedding cache key

---
 kgtk/cli/text_embedding.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/kgtk/cli/text_embedding.py b/kgtk/cli/text_embedding.py
index f8c650a9b..05404a54d 100644
--- a/kgtk/cli/text_embedding.py
+++ b/kgtk/cli/text_embedding.py
@@ -29,7 +29,7 @@ def __init__(self, model_name=None, query_server=None, cache_config:dict={}):
         self._logger = logging.getLogger(__name__)
         from collections import defaultdict
         if model_name is None:
-            model_name = 'bert-base-nli-mean-tokens'
+            self.model_name = 'bert-base-nli-mean-tokens'
         # xlnet need to be trained before using, we can't use this for now
         # elif model_name == "xlnet-base-cased":
         #     word_embedding_model = models.XLNet('xlnet-base-cased')
@@ -41,7 +41,7 @@ def __init__(self, model_name=None, query_server=None, cache_config:dict={}):
         #     self.model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
         else:
             self.model_name = model_name
-            self.model = SentenceTransformer(model_name)
+        self.model = SentenceTransformer(model_name)
         if query_server is None or query_server == "":
             self.wikidata_server = "https://query.wikidata.org/sparql"
         else:
@@ -103,14 +103,17 @@ def get_sentences_embedding(self, sentences: typing.List[str], qnodes: typing.Li
         if self.redis_server is not None:
             sentence_embeddings = []
             for each_node, each_sentence in zip(qnodes, sentences):
-                cache_res = self.redis_server.get(each_node+each_sentence)
+                query_cache_key = each_node + each_sentence
+                if self.model_name != "bert-base-wikipedia-sections-mean-tokens":
+                    query_cache_key += self.model_name
+                cache_res = self.redis_server.get(query_cache_key)
                 if cache_res is not None:
                     sentence_embeddings.append(literal_eval(cache_res.decode("utf-8")))
                     # self._logger.error("{} hit!".format(each_node+each_sentence))
                 else:
                     each_embedding = self.model.encode([each_sentence], show_progress_bar=False)
                     sentence_embeddings.extend(each_embedding)
-                    self.redis_server.set(each_node+each_sentence, str(each_embedding[0].tolist()))
+                    self.redis_server.set(query_cache_key, str(each_embedding[0].tolist()))
         else:
             sentence_embeddings = self.model.encode(sentences, show_progress_bar=False)
             # self.embedding_cache[sentences] = sentence_embeddings

From 79c53d383322a0f4eef257395fc505e7ff2254ab Mon Sep 17 00:00:00 2001
From: Divij Bhatia <divijbhatia2@gmail.com>
Date: Wed, 22 Apr 2020 23:00:04 -0700
Subject: [PATCH 005/278] added export_neo4j command

---
 README.md                |   1 +
 kgtk/cli/export_neo4j.py | 257 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 258 insertions(+)
 create mode 100644 kgtk/cli/export_neo4j.py

diff --git a/README.md b/README.md
index a8a4dcb58..0f0d09dcc 100644
--- a/README.md
+++ b/README.md
@@ -35,6 +35,7 @@ https://www.mankier.com/1/mlr
 * `sort`
 * `merge_identical_nodes`
 * `zconcat`
+* `export_neo4j`
 
 To get an information on how to use each of them, run:
 `kgtk [TOOL] -h`
diff --git a/kgtk/cli/export_neo4j.py b/kgtk/cli/export_neo4j.py
new file mode 100644
index 000000000..2b2d6826f
--- /dev/null
+++ b/kgtk/cli/export_neo4j.py
@@ -0,0 +1,257 @@
+import csv
+import re
+from pathlib import Path
+
+
+class Node:
+	def __init__(self):
+		self.properties = None
+		self.instance_type = []
+
+	def add_property(self, property_name: str, property_value: str) -> None:
+		if not self.properties:
+			self.properties = Properties()
+		self.properties.add_property(property_name, property_value)
+
+	def add_instance_type(self, instance_type):
+		self.instance_type.append(instance_type)
+
+	def serialize(self, node_id):
+		if self.properties:
+			serialized_properties = self.properties.serialize_node_properties()
+		else:
+			serialized_properties = ""
+		serialized_instance_type = ':'.join(self.instance_type)
+		serialized_node = "CREATE ({}:{} {})".format(node_id, serialized_instance_type, serialized_properties)
+		return serialized_node
+
+
+class NodeStore:
+	def __init__(self):
+		self.directory = dict()
+
+	def get_or_create(self, node_id: str) -> Node:
+		if node_id not in self.directory:
+			self.directory[node_id] = Node()
+		return self.directory[node_id]
+
+	def serialize(self):
+		for node_id, node in self.directory.items():
+			yield node.serialize(node_id)
+
+
+class Relationship:
+	def __init__(self):
+		self.name = None
+		self.src_node = None
+		self.dst_node = None
+		self.properties = None
+
+	def add_names(self, name, src_node_id, dst_node_id):
+		self.name = name
+		self.src_node = src_node_id
+		self.dst_node = dst_node_id
+
+	def add_property(self, property_name, property_value):
+		if not self.properties:
+			self.properties = Properties()
+		self.properties.add_property(property_name, property_value)
+
+	def serialize(self):
+		if self.properties:
+			serialized_properties = self.properties.serialize_qualifier_properties()
+		else:
+			serialized_properties = ""
+		serialized_relationship = "({})-[:{} {}]->({}),".format(self.src_node, self.name, serialized_properties, self.dst_node)
+		return serialized_relationship
+
+
+class RelationshipStore:
+	def __init__(self):
+		self.directory = dict()
+
+	def get(self, relationship_id) -> Relationship:
+		return self.directory.get(relationship_id, None)
+
+	def get_or_create(self, relationship_id: str) -> Relationship:
+		if relationship_id not in self.directory:
+			self.directory[relationship_id] = Relationship()
+		return self.directory[relationship_id]
+
+	def serialize(self):
+		yield "CREATE"
+		total_relationships = len(self.directory)
+		i = 0
+		for relationship_id, relationship in self.directory.items():
+			if i >= total_relationships - 1:
+				yield relationship.serialize().rstrip(",")
+			else:
+				yield relationship.serialize()
+			i += 1
+
+
+class Properties:
+	def __init__(self):
+		self.property_map = dict()
+
+	def add_property(self, property_name: str, property_value: str) -> None:
+		if property_name not in self.property_map:
+			self.property_map[property_name] = list()
+		self.property_map[property_name].append(property_value)
+
+	def serialize_node_properties(self):
+		serialized_properties = """{"""
+		for property_name, property_value in self.property_map.items():
+			property_name = clean_string(property_name)
+			property_value = [clean_string(v) for v in property_value]
+			if is_property(property_name):
+				serialized_property = "{}: ".format(property_name)
+			else:
+				serialized_property = "\"{}\": ".format(property_name)
+			if len(property_value) == 1:
+				serialized_property += "\"{}\"".format(str(property_value[0]))
+			else:
+				serialized_property += "["
+				for value in property_value:
+					serialized_property += "\"{}\", ".format(str(value))
+				serialized_property = serialized_property[:-2] + "]"
+			serialized_properties += serialized_property + ", "
+		serialized_properties = serialized_properties[:-2] + "}"
+		return serialized_properties
+
+	def serialize_qualifier_properties(self):
+		serialized_properties = ["{"]
+		for property_name, property_value in self.property_map.items():
+			property_name = clean_string(property_name)
+			property_value = [clean_string(v) for v in property_value]
+			if is_property(property_name):
+				serialized_properties.append("{}: ".format(property_name))
+			else:
+				serialized_properties.append("\"{}\": ".format(property_name))
+			serialized_properties.append("[")
+			for value in property_value:
+				if is_item(value) or is_property(value):
+					serialized_properties.append("{}, ".format(str(value)))
+				else:
+					serialized_properties.append("\"{}\", ".format(str(value)))
+			serialized_properties[-1] = serialized_properties[-1][:-2]
+			serialized_properties.append("], ")
+		serialized_properties[-1] = serialized_properties[-1][:-2]
+		serialized_properties_as_string = ''.join(serialized_properties)
+		return serialized_properties_as_string
+
+
+class Graph:
+	def __init__(self):
+		self.node_store = NodeStore()
+		self.relationship_store = RelationshipStore()
+
+	def serialize(self, output_directory):
+		file_name = str(Path(output_directory) / "results.cql")
+		with open(file_name, 'w', encoding='utf8') as output_file:
+			for node in self.node_store.serialize():
+				output_file.write(node)
+				output_file.write("\n")
+			is_first_relationship = True
+			for relationship in self.relationship_store.serialize():
+				if is_first_relationship:
+					is_first_relationship = False
+				else:
+					output_file.write("\t")
+				output_file.write(relationship)
+				output_file.write("\n")
+
+
+def is_item(string: str) -> bool:
+	item_pattern = "^Q[0-9]+$"
+	match = re.match(item_pattern, string)
+	if match:
+		return True
+	else:
+		return False
+
+
+def is_property(string: str) -> bool:
+	property_pattern = "^P[0-9]+$"
+	match = re.match(property_pattern, string)
+	if match:
+		return True
+	else:
+		return False
+
+
+def clean_string(string):
+	return string.strip("\"")
+
+
+def create_graph(statement_file_name: str, qualifier_file_name: str, statement_file_encoding: str, qualifier_file_encoding: str):
+	# required tsv format headers for statements(unordered):
+	# id, node1, property, node2
+	# or
+	# node1, property, node2, id, node1_label, node2_label, property_label
+	# required tsv format headers for qualifiers(unordered):
+	# node1, property, node2, id
+	if not statement_file_encoding:
+		statement_file_encoding = "utf8"
+	if not qualifier_file_encoding:
+		qualifier_file_encoding = "utf8"
+
+	graph = Graph()
+	node_store = graph.node_store
+	relationship_store = graph.relationship_store
+	if statement_file_name:
+		with open(statement_file_name, 'r', encoding=statement_file_encoding) as input_file:
+			statements = csv.DictReader(input_file, dialect='excel-tab', restval="")
+			for statement in statements:
+				src_node = node_store.get_or_create(statement['node1'])
+				if is_item(statement['node2']):
+					if statement['property'] == 'P31':
+						src_node.add_instance_type(statement['node2'])
+					else:
+						dst_node = node_store.get_or_create(statement['node2'])
+						if 'node2_label' in statement and statement['node2_label']:
+							dst_node.add_property("label", statement["node2_label"])
+						relationship = relationship_store.get_or_create(statement['id'])
+						relationship.add_names(statement['property'], statement['node1'], statement['node2'])
+						if 'property_label' in statement and statement['property_label']:
+							relationship.add_property("label", statement["property_label"])
+				else:
+					src_node.add_property(statement['property'], statement['node2'])
+				if 'node1_label' in statement and statement['node1_label']:
+					src_node.add_property("label", statement["node1_label"])
+
+	if qualifier_file_name:
+		with open(qualifier_file_name, 'r', encoding=qualifier_file_encoding) as input_file:
+			qualifiers = csv.DictReader(input_file, dialect='excel-tab', restval="")
+			for qualifier in qualifiers:
+				relationship = relationship_store.get(qualifier['node1'])
+				if relationship:
+					relationship.add_property(qualifier['property'], qualifier['node2'])
+	return graph
+
+
+def parser():
+	return {'help': 'Exports data to Neo4J Cypher Query Language statements.'}
+
+
+def add_arguments(parser):
+	"""
+	Parse arguments
+	Args:
+		parser (argparse.ArgumentParser)
+	"""
+	parser.add_argument('-sf', "--statement_file_path", action="store", type=str, dest="statement_file_path", help="Filepath of the statement file", default="")
+	parser.add_argument('-qf', '--qualifier_file_path', type=str, dest="qualifier_file_path", help="Filepath of the qualifier file", default="")
+	parser.add_argument('-o', '--output_directory', action="store", type=str, dest='output_directory', help="Directory where the result file will be saved", default="")
+	parser.add_argument('-se', '--statement_file_encoding', type=str, dest='statement_file_encoding', help="Encoding of the statement file, eg.: utf8", default="")
+	parser.add_argument('-qe', '--qualifier_file_encoding', type=str, dest='qualifier_file_encoding', help="Encoding of the qualifier file, eg.: utf8", default="")
+
+
+def run(statement_file_path: str, qualifier_file_path: str, output_directory: str, statement_file_encoding: str, qualifier_file_encoding: str):
+	try:
+		graph = create_graph(statement_file_path, qualifier_file_path, statement_file_encoding, qualifier_file_encoding)
+		graph.serialize(output_directory)
+	except FileNotFoundError as exception:
+		raise exception
+	except Exception as ex:
+		raise ex

From d6cf315dd6c90192b00006fb618513610c742b87 Mon Sep 17 00:00:00 2001
From: Divij Bhatia <divijbhatia2@gmail.com>
Date: Thu, 23 Apr 2020 23:03:30 -0700
Subject: [PATCH 006/278] fixed bugs

---
 kgtk/cli/export_neo4j.py | 48 ++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/kgtk/cli/export_neo4j.py b/kgtk/cli/export_neo4j.py
index 2b2d6826f..4cc951ad1 100644
--- a/kgtk/cli/export_neo4j.py
+++ b/kgtk/cli/export_neo4j.py
@@ -22,7 +22,10 @@ def serialize(self, node_id):
 		else:
 			serialized_properties = ""
 		serialized_instance_type = ':'.join(self.instance_type)
-		serialized_node = "CREATE ({}:{} {})".format(node_id, serialized_instance_type, serialized_properties)
+		if self.instance_type:
+			serialized_node = "CREATE ({}:{} {})".format(node_id, serialized_instance_type, serialized_properties)
+		else:
+			serialized_node = "CREATE ({} {})".format(node_id, serialized_properties)
 		return serialized_node
 
 
@@ -107,7 +110,7 @@ def serialize_node_properties(self):
 			if is_property(property_name):
 				serialized_property = "{}: ".format(property_name)
 			else:
-				serialized_property = "\"{}\": ".format(property_name)
+				serialized_property = "{}: ".format(property_name)
 			if len(property_value) == 1:
 				serialized_property += "\"{}\"".format(str(property_value[0]))
 			else:
@@ -127,7 +130,7 @@ def serialize_qualifier_properties(self):
 			if is_property(property_name):
 				serialized_properties.append("{}: ".format(property_name))
 			else:
-				serialized_properties.append("\"{}\": ".format(property_name))
+				serialized_properties.append("{}: ".format(property_name))
 			serialized_properties.append("[")
 			for value in property_value:
 				if is_item(value) or is_property(value):
@@ -181,7 +184,20 @@ def is_property(string: str) -> bool:
 
 
 def clean_string(string):
-	return string.strip("\"")
+	string = string.strip("\"")
+	string = string.replace("\"", "'")
+	return string
+
+
+def clean_label(label):
+	cleaned_label_list = [""] * len(label)
+	for index, char in enumerate(label):
+		if char.isalnum():
+			cleaned_label_list[index] = label[index]
+		else:
+			cleaned_label_list[index] = "_"
+	cleaned_label = ''.join(cleaned_label_list)
+	return cleaned_label
 
 
 def create_graph(statement_file_name: str, qualifier_file_name: str, statement_file_encoding: str, qualifier_file_encoding: str):
@@ -192,9 +208,9 @@ def create_graph(statement_file_name: str, qualifier_file_name: str, statement_f
 	# required tsv format headers for qualifiers(unordered):
 	# node1, property, node2, id
 	if not statement_file_encoding:
-		statement_file_encoding = "utf8"
+		statement_file_encoding = "UTF-8"
 	if not qualifier_file_encoding:
-		qualifier_file_encoding = "utf8"
+		qualifier_file_encoding = "UTF-8"
 
 	graph = Graph()
 	node_store = graph.node_store
@@ -203,20 +219,20 @@ def create_graph(statement_file_name: str, qualifier_file_name: str, statement_f
 		with open(statement_file_name, 'r', encoding=statement_file_encoding) as input_file:
 			statements = csv.DictReader(input_file, dialect='excel-tab', restval="")
 			for statement in statements:
-				src_node = node_store.get_or_create(statement['node1'])
+				src_node = node_store.get_or_create(clean_label(statement['node1']))
 				if is_item(statement['node2']):
 					if statement['property'] == 'P31':
-						src_node.add_instance_type(statement['node2'])
+						src_node.add_instance_type(clean_label(statement['node2']))
 					else:
-						dst_node = node_store.get_or_create(statement['node2'])
+						dst_node = node_store.get_or_create(clean_label(statement['node2']))
 						if 'node2_label' in statement and statement['node2_label']:
 							dst_node.add_property("label", statement["node2_label"])
-						relationship = relationship_store.get_or_create(statement['id'])
-						relationship.add_names(statement['property'], statement['node1'], statement['node2'])
+						relationship = relationship_store.get_or_create(clean_label(statement['id']))
+						relationship.add_names(clean_label(statement['property']), clean_label(statement['node1']), clean_label(statement['node2']))
 						if 'property_label' in statement and statement['property_label']:
 							relationship.add_property("label", statement["property_label"])
 				else:
-					src_node.add_property(statement['property'], statement['node2'])
+					src_node.add_property(clean_label(statement['property']), statement['node2'])
 				if 'node1_label' in statement and statement['node1_label']:
 					src_node.add_property("label", statement["node1_label"])
 
@@ -224,9 +240,9 @@ def create_graph(statement_file_name: str, qualifier_file_name: str, statement_f
 		with open(qualifier_file_name, 'r', encoding=qualifier_file_encoding) as input_file:
 			qualifiers = csv.DictReader(input_file, dialect='excel-tab', restval="")
 			for qualifier in qualifiers:
-				relationship = relationship_store.get(qualifier['node1'])
+				relationship = relationship_store.get(clean_label(qualifier['node1']))
 				if relationship:
-					relationship.add_property(qualifier['property'], qualifier['node2'])
+					relationship.add_property(clean_label(qualifier['property']), qualifier['node2'])
 	return graph
 
 
@@ -243,8 +259,8 @@ def add_arguments(parser):
 	parser.add_argument('-sf', "--statement_file_path", action="store", type=str, dest="statement_file_path", help="Filepath of the statement file", default="")
 	parser.add_argument('-qf', '--qualifier_file_path', type=str, dest="qualifier_file_path", help="Filepath of the qualifier file", default="")
 	parser.add_argument('-o', '--output_directory', action="store", type=str, dest='output_directory', help="Directory where the result file will be saved", default="")
-	parser.add_argument('-se', '--statement_file_encoding', type=str, dest='statement_file_encoding', help="Encoding of the statement file, eg.: utf8", default="")
-	parser.add_argument('-qe', '--qualifier_file_encoding', type=str, dest='qualifier_file_encoding', help="Encoding of the qualifier file, eg.: utf8", default="")
+	parser.add_argument('-se', '--statement_file_encoding', type=str, dest='statement_file_encoding', help="Encoding of the statement file, eg.: UTF-8", default="")
+	parser.add_argument('-qe', '--qualifier_file_encoding', type=str, dest='qualifier_file_encoding', help="Encoding of the qualifier file, eg.: UTF-8", default="")
 
 
 def run(statement_file_path: str, qualifier_file_path: str, output_directory: str, statement_file_encoding: str, qualifier_file_encoding: str):

From 8886320a3f2c0f7272b194ca4439d5c02e0ef19b Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 27 Apr 2020 17:30:38 -0700
Subject: [PATCH 007/278] Better handling of feedback messages.

---
 kgtk/cli/clean_data.py | 34 +++++++++++++++++++---------------
 kgtk/cli/validate.py   | 14 ++++++++------
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/kgtk/cli/clean_data.py b/kgtk/cli/clean_data.py
index 9a89ff97d..5fdb9dcd4 100644
--- a/kgtk/cli/clean_data.py
+++ b/kgtk/cli/clean_data.py
@@ -57,8 +57,11 @@ def add_arguments(parser):
                               help="The action to take when an empty line is detected.",
                               type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
 
-    parser.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
-                              help="Send errors to stdout instead of stderr", action="store_true")
+    errors_to = parser.add_mutually_exclusive_group()
+    errors_to.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
+                              help="Send errors to stdout instead of stderr (default)", action="store_true")
+    errors_to.add_argument(      "--errors-to-stderr", dest="errors_to_stderr",
+                              help="Send errors to stderr instead of stdout", action="store_true")
 
     parser.add_argument(      "--error-limit", dest="error_limit",
                               help="The maximum number of errors to report before failing", type=int, default=KgtkReader.ERROR_LIMIT_DEFAULT)
@@ -141,19 +144,20 @@ def run(input_file: typing.Optional[Path],
     # import modules locally
     from kgtk.exceptions import KGTKException
 
-    try:
-        if verbose:
-            if input_file is not None:
-                print("Cleaning data from '%s'" % str(input_file), file=sys.stderr)
-            else:
-                print ("Cleaning data from stdin", file=sys.stderr)
-            if output_file is not None:
-                print("Writing data to '%s'" % str(output_file), file=sys.stderr)
-            else:
-                print ("Writing data to stdin", file=sys.stderr)
+    # Select where to send error messages, defaulting to stderr.
+    error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr
+
+    if verbose:
+        if input_file is not None:
+            print("Cleaning data from '%s'" % str(input_file), file=error_file)
+        else:
+            print ("Cleaning data from stdin", file=error_file)
+        if output_file is not None:
+            print("Writing data to '%s'" % str(output_file), file=error_file)
+        else:
+            print ("Writing data to stdin", file=error_file)
                 
-        error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr
-
+    try:
         kr: KgtkReader = KgtkReader.open(input_file,
                                          force_column_names=force_column_names,
                                          skip_first_record=skip_first_record,
@@ -195,7 +199,7 @@ def run(input_file: typing.Optional[Path],
 
         kw.close()
         if verbose:
-            print("Copied %d clean data lines" % line_count, file=sys.stderr)
+            print("Copied %d clean data lines" % line_count, file=error_file)
         return 0
 
     except Exception as e:
diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index e027ef03e..ae480b223 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -148,16 +148,18 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
     if kgtk_files is None or len(kgtk_files) == 0:
         kgtk_files = [ None ]
 
+    # Select where to send error messages, defaulting to stderr.
+    error_file: typing.TextIO = sys.stderr if errors_to_stderr else sys.stdout
+
     try:
         kgtk_file: typing.Optional[Path]
         for kgtk_file in kgtk_files:
             if verbose:
+                print("\n====================================================")
                 if kgtk_file is not None:
-                    print("Validating '%s'" % str(kgtk_file))
+                    print("Validating '%s'" % str(kgtk_file), file=error_file)
                 else:
-                    print ("Validating from stdin")
-
-                error_file: typing.TextIO = sys.stderr if errors_to_stderr else sys.stdout
+                    print ("Validating from stdin", file=error_file)
 
                 kr: KgtkReader = KgtkReader.open(kgtk_file,
                                                  force_column_names=force_column_names,
@@ -186,14 +188,14 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
                 if header_only:
                     kr.close()
                     if verbose:
-                        print("Validated the header only.")
+                        print("Validated the header only.", file=error_file)
                 else:
                     line_count: int = 0
                     row: typing.List[str]
                     for row in kr:
                         line_count += 1
                     if verbose:
-                        print("Validated %d data lines" % line_count)
+                        print("Validated %d data lines" % line_count, file=error_file)
         return 0
 
     except SystemExit as e:

From 753365dd85585275c603e71e781ddf1f105c4ca5 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 27 Apr 2020 17:46:58 -0700
Subject: [PATCH 008/278] Add attrs to the list of required modules.

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index d25b0a5ee..c467a6c44 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,3 +11,4 @@ tqdm
 etk
 simplejson
 pyrallel.lib
+attrs

From 04c2300b546e1af87a2a63c5fa5386dd19f88094 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Mon, 27 Apr 2020 19:14:24 -0700
Subject: [PATCH 009/278] read line by line, reproduced the missing prefix bugs

---
 kgtk/cli/generate_wikidata_triples.py | 12 +++++++++---
 requirements.txt                      |  3 ++-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index 573bcf6ed..b2eab47c0 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -535,9 +535,15 @@ def replaceIllegalString(s:str)->str:
         truthy=truthy
     )
     # process stdin
-    for num, edge in enumerate(sys.stdin.readlines()):
-        if edge.startswith("#") or num == 0: # TODO First line omit
+    num_line = 0
+    while True:
+        edge = sys.stdin.readline()
+        if not edge:
+            break
+        if edge.startswith("#") or num_line == 0: # TODO First line omit
+            num_line += 1
             continue
         else:
-            generator.entryPoint(num, edge)
+            generator.entryPoint(num_line, edge)
+            num_line += 1
     generator.finalize()
diff --git a/requirements.txt b/requirements.txt
index a6817e3d1..7cb0d1c7f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,6 +7,7 @@ sh
 sklearn
 SPARQLWrapper
 tqdm
-etk
+rdflib==5.0.0
+etk==2.2.1
 simplejson
 pyrallel.lib

From 0027ae478c31b19c2f25739ac43385d843ced422 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Mon, 27 Apr 2020 19:28:20 -0700
Subject: [PATCH 010/278] separate the cli file and the class file

---
 kgtk/cli/generate_wikidata_triples.py | 419 +-------------------------
 kgtk/triple_generator.py              | 419 ++++++++++++++++++++++++++
 2 files changed, 420 insertions(+), 418 deletions(-)
 create mode 100644 kgtk/triple_generator.py

diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index b2eab47c0..880f92e3b 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -106,425 +106,8 @@ def run(
     # logging_level:str
 ):
     # import modules locally
+    from kgtk.triple_generator import TripleGenerator
     import sys
-    import warnings
-    import re
-    import requests
-    from typing import TextIO
-    import logging
-    from etk.wikidata.entity import WDItem, WDProperty
-    from kgtk.exceptions import KGTKException
-
-    class TripleGenerator:
-        """
-        A class to maintain the status of the generator
-        """
-        def __init__(
-            self,
-            propFile: str,
-            labelSet: str,
-            aliasSet: str,
-            descriptionSet: str,
-            ignore: bool,
-            n: int,
-            destFp: TextIO = sys.stdout,
-            truthy:bool =False
-        ):
-            from etk.wikidata.statement import Rank
-            from etk.etk import ETK
-            from etk.knowledge_graph import KGSchema
-            from etk.etk_module import ETKModule
-            self.ignore = ignore
-            self.propTypes = self.__setPropTypes(propFile)
-            self.labelSet, self.aliasSet, self.descriptionSet = self.__setSets(
-                labelSet, aliasSet, descriptionSet
-            )
-            self.fp = destFp
-            self.n = int(n)
-            self.read = 0
-            # ignore-logging, if not ignore, log them and move on.
-            if not self.ignore:
-                self.ignoreFile = open("ignored.log","w")
-            # corrupted statement id
-            self.corrupted_statement_id = None
-            # serialize prfix
-            kg_schema = KGSchema()
-            kg_schema.add_schema("@prefix : <http://isi.edu/> .", "ttl")
-            self.etk = ETK(kg_schema=kg_schema, modules=ETKModule)
-            self.doc = self.__setDoc()
-            self.__serialize_prefix()
-        
-        def _node_2_entity(self, node:str):
-            '''
-            A node can be Qxxx or Pxxx, return the proper entity.
-            '''
-            if node in self.propTypes:
-                entity = WDProperty(node, self.propTypes[node])
-            else:
-                entity = WDItem(TripleGenerator.replaceIllegalString(node.upper()))
-            return entity
-
-
-        def __setPropTypes(self, propFile: str):
-            from etk.wikidata.value import (
-            Item,
-            StringValue,
-            TimeValue,
-            QuantityValue,
-            MonolingualText,
-            GlobeCoordinate,
-            ExternalIdentifier,
-            URLValue
-            )
-            dataTypeMappings = {
-                "item": Item,
-                "time": TimeValue,
-                "globe-coordinate": GlobeCoordinate,
-                "quantity": QuantityValue,
-                "monolingualtext": MonolingualText,
-                "string": StringValue,
-                "external-identifier":ExternalIdentifier,
-                "url":URLValue
-            }
-            with open(propFile, "r") as fp:
-                props = fp.readlines()
-            __propTypes = {}
-            for line in props[1:]:
-                node1, _, node2 = line.split("\t")
-                try:
-                    __propTypes[node1] = dataTypeMappings[node2.strip()]
-                except:
-                    if not self.ignore:                    
-                        raise KGTKException(
-                            "DataType {} of node {} is not supported.\n".format(
-                                node2, node1
-                            )
-                        )
-            return __propTypes
-
-        def __setSets(self, labelSet: str, aliasSet: str, descriptionSet: str):
-            return (
-                set(labelSet.split(",")),
-                set(aliasSet.split(",")),
-                set(descriptionSet.split(",")),
-            )
-
-        def __setDoc(self, doc_id: str = "http://isi.edu/default-ns/projects"):
-            """
-            reset the doc object and return it. Called at initialization and after outputting triples.
-            """
-            doc = self.etk.create_document({}, doc_id=doc_id)
-            # bind prefixes
-            doc.kg.bind("wikibase", "http://wikiba.se/ontology#")
-            doc.kg.bind("wd", "http://www.wikidata.org/entity/")
-            doc.kg.bind("wdt", "http://www.wikidata.org/prop/direct/")
-            doc.kg.bind("wdtn", "http://www.wikidata.org/prop/direct-normalized/")
-            doc.kg.bind("wdno", "http://www.wikidata.org/prop/novalue/")
-            doc.kg.bind("wds", "http://www.wikidata.org/entity/statement/")
-            doc.kg.bind("wdv", "http://www.wikidata.org/value/")
-            doc.kg.bind("wdref", "http://www.wikidata.org/reference/")
-            doc.kg.bind("p", "http://www.wikidata.org/prop/")
-            doc.kg.bind("pr", "http://www.wikidata.org/prop/reference/")
-            doc.kg.bind("prv", "http://www.wikidata.org/prop/reference/value/")
-            doc.kg.bind(
-                "prn", "http://www.wikidata.org/prop/reference/value-normalized/"
-            )
-            doc.kg.bind("ps", "http://www.wikidata.org/prop/statement/")
-            doc.kg.bind("psv", "http://www.wikidata.org/prop/statement/value/")
-            doc.kg.bind(
-                "psn", "http://www.wikidata.org/prop/statement/value-normalized/"
-            )
-            doc.kg.bind("pq", "http://www.wikidata.org/prop/qualifier/")
-            doc.kg.bind("pqv", "http://www.wikidata.org/prop/qualifier/value/")
-            doc.kg.bind(
-                "pqn", "http://www.wikidata.org/prop/qualifier/value-normalized/"
-            )
-            doc.kg.bind("skos", "http://www.w3.org/2004/02/skos/core#")
-            doc.kg.bind("prov", "http://www.w3.org/ns/prov#")
-            doc.kg.bind("schema", "http://schema.org/")
-            return doc
-
-        @staticmethod
-        def _process_text_string(string:str)->[str,str]:
-            ''' 
-            '''
-            if "@" in string:
-                res = string.split("@")
-                textString = "@".join(res[:-1]).replace('"', "").replace("'", "")
-                lang = res[-1].replace('"','').replace("'","")
-                if len(lang) != 2:
-                    lang = "en"
-            else:
-                textString = string.replace('"', "").replace("'", "")
-                lang = "en"
-            return [textString, lang]
-
-        def genLabelTriple(self, node1: str, label: str, node2: str) -> bool:
-            entity = self._node_2_entity(node1)
-            textString, lang = TripleGenerator._process_text_string(node2)
-            entity.add_label(textString, lang=lang)
-            self.doc.kg.add_subject(entity)
-            return True
-
-        def genDescriptionTriple(self, node1: str, label: str, node2: str) -> bool:
-            entity = self._node_2_entity(node1)
-            textString, lang = TripleGenerator._process_text_string(node2)
-            entity.add_description(textString, lang=lang)
-            self.doc.kg.add_subject(entity)
-            return True
-
-        def genDescriptionTriple(self, node1: str, label: str, node2: str) -> bool:
-            entity = self._node_2_entity(node1)
-            textString, lang = TripleGenerator._process_text_string(node2)
-            entity.add_description(textString, lang=lang)
-            self.doc.kg.add_subject(entity)
-            return True
-
-        def genAliasTriple(self, node1: str, label: str, node2: str) -> bool:
-            entity = self._node_2_entity(node1)
-            textString, lang = TripleGenerator._process_text_string(node2)
-            entity.add_alias(textString, lang=lang)
-            self.doc.kg.add_subject(entity)
-            return True
-
-        def genPropDeclarationTriple(self, node1: str, label: str, node2: str) -> bool:
-            prop = WDProperty(node1, self.propTypes[node1])
-            self.doc.kg.add_subject(prop)
-            return True
-
-        def genNormalTriple(
-            self, node1: str, label: str, node2: str, isQualifierEdge: bool) -> bool:
-            from etk.wikidata.value import (
-            Item,
-            StringValue,
-            TimeValue,
-            QuantityValue,
-            MonolingualText,
-            GlobeCoordinate,
-            ExternalIdentifier,
-            URLValue,
-            Precision
-            )
-
-            entity = self._node_2_entity(node1)
-            # determine the edge type
-            edgeType = self.propTypes[label]
-            if edgeType == Item:
-                OBJECT = WDItem(TripleGenerator.replaceIllegalString(node2.upper()))
-            elif edgeType == TimeValue:
-                # https://www.wikidata.org/wiki/Help:Dates
-                # ^2013-01-01T00:00:00Z/11
-                # ^8000000-00-00T00:00:00Z/3
-                if re.compile("[0-9]{4}").match(node2):
-                    try:                   
-                        dateTimeString = node2 + "-01-01"
-                        OBJECT = TimeValue(
-                            value=dateTimeString, #TODO
-                            calendar=Item("Q1985727"),
-                            precision=Precision.year,
-                            time_zone=0,
-                        )
-                    except:
-                        return False
-                else:
-                    try:
-                        dateTimeString, precision = node2[1:].split("/")
-                        dateTimeString = dateTimeString[:-1] # remove "Z"
-                        # 2016-00-00T00:00:00 case
-                        if "-00-00" in dateTimeString:
-                            dateTimeString = "-01-01".join(dateTimeString.split("-00-00"))
-                        elif dateTimeString[8:10] == "00":
-                            dateTimeString = dateTimeString[:8]+"01" + dateTimeString[10:]
-                        OBJECT = TimeValue(
-                            value=dateTimeString,
-                            calendar=Item("Q1985727"),
-                            precision=precision,
-                            time_zone=0,
-                        )
-                    except: 
-                        return False
-
-                #TODO other than that, not supported. Creation of normal triple fails
-                
-
-            elif edgeType == GlobeCoordinate:
-                latitude, longitude = node2[1:].split("/")
-                OBJECT = GlobeCoordinate(
-                    latitude, longitude, 0.0001, globe=StringValue("Earth")
-                )
-
-            elif edgeType == QuantityValue:
-                # +70[+60,+80]Q743895
-                res = re.compile("([\+|\-]?[0-9]+\.?[0-9]*)(?:\[([\+|\-]?[0-9]+\.?[0-9]*),([\+|\-]?[0-9]+\.?[0-9]*)\])?([U|Q](?:[0-9]+))?").match(node2).groups()
-                amount, lower_bound, upper_bound, unit = res
-
-                # Handle extra small numbers for now. TODO
-                if TripleGenerator._is_invalid_decimal_string(amount) or TripleGenerator._is_invalid_decimal_string(lower_bound) or TripleGenerator._is_invalid_decimal_string(upper_bound):
-                    return False
-                amount = TripleGenerator._clean_number_string(amount)
-                lower_bound = TripleGenerator._clean_number_string(lower_bound)
-                upper_bound = TripleGenerator._clean_number_string(upper_bound)
-                if unit != None:
-                    if upper_bound != None and lower_bound != None:
-                        OBJECT = QuantityValue(amount, unit=Item(unit),upper_bound=upper_bound,lower_bound=lower_bound)
-                    else:
-                        OBJECT = QuantityValue(amount, unit=Item(unit))
-                else:
-                    if upper_bound != None and lower_bound != None:
-                        OBJECT = QuantityValue(amount, upper_bound=upper_bound,lower_bound=lower_bound)
-                    else:
-                        OBJECT = QuantityValue(amount)                   
-            elif edgeType == MonolingualText:
-                textString, lang = TripleGenerator._process_text_string(node2)
-                OBJECT = MonolingualText(textString, lang)
-            elif edgeType == ExternalIdentifier:
-                OBJECT = ExternalIdentifier(node2)
-            elif edge == URLValue:
-                OBJECT = URLValue(node2)
-            else:
-                # treat everything else as stringValue
-                OBJECT = StringValue(node2)
-            if isQualifierEdge:
-                # edge: e8 p9 ^2013-01-01T00:00:00Z/11
-                # create qualifier edge on previous STATEMENT and return the updated STATEMENT
-                if type(OBJECT) == WDItem:
-                    self.doc.kg.add_subject(OBJECT)
-                self.STATEMENT.add_qualifier(label.upper(), OBJECT)
-                self.doc.kg.add_subject(self.STATEMENT) #TODO maybe can be positioned better for the edge cases.
-    
-            else:
-                # edge: q1 p8 q2 e8
-                # create brand new property edge and replace STATEMENT
-                if type(OBJECT) == WDItem:
-                    self.doc.kg.add_subject(OBJECT)
-                if truthy:
-                    self.STATEMENT = entity.add_truthy_statement(label.upper(), OBJECT) 
-                else:
-                    self.STATEMENT = entity.add_statement(label.upper(), OBJECT) 
-                self.doc.kg.add_subject(entity)
-            return True
-        
-        @staticmethod
-        def _is_invalid_decimal_string(num_string):
-            '''
-            if a decimal string too small, return True TODO
-            '''
-            if num_string == None:
-                return False
-            else:
-                if abs(float(num_string)) < 0.0001 and float(num_string) != 0:
-                    return True
-                return False        
-
-        @staticmethod
-        def _clean_number_string(num):
-            from numpy import format_float_positional
-            if num == None:
-                return None
-            else:
-                return format_float_positional(float(num),trim="-")
-
-        def entryPoint(self, line_number:int , edge: str):
-            """
-            generates a list of two, the first element is the determination of the edge type using corresponding edge type
-            the second element is a bool indicating whether this is a valid property edge or qualifier edge.
-            Call corresponding downstream functions
-            """
-            edgeList = edge.strip().split("\t")
-            l = len(edgeList)
-            if l!=4:
-                return
-
-            [node1, label, node2, eID] = edgeList
-            node1, label, node2, eID = node1.strip(),label.strip(),node2.strip(),eID.strip()
-            if line_number == 0: #TODO ignore header mode
-                # by default a statement edge
-                isQualifierEdge = False
-                # print("#Debug Info: ",line_number, self.ID, eID, isQualifierEdge,self.STATEMENT)
-                self.ID = eID
-                self.corrupted_statement_id = None
-            else:
-                if node1 != self.ID:
-                    # also a new statement edge
-                    if self.read >= self.n:
-                        self.serialize()
-                    isQualifierEdge = False
-                    # print("#Debug Info: ",line_number, self.ID, node1, isQualifierEdge,self.STATEMENT)
-                    self.ID= eID
-                    self.corrupted_statement_id = None
-                else:
-                # qualifier edge or property declaration edge
-                    isQualifierEdge = True
-                    if self.corrupted_statement_id == eID:
-                        # Met a qualifier which associates with a corrupted statement
-                        return
-                    if label != "type" and node1 != self.ID:
-                        # 1. not a property declaration edge and
-                        # 2. the current qualifier's node1 is not the latest property edge id, throw errors.
-                        if not self.ignore:
-                            raise KGTKException(
-                                "Node1 {} at line {} doesn't agree with latest property edge id {}.\n".format(
-                                    node1, line_number, self.ID
-                                )
-                            )
-            if label in self.labelSet:
-                success = self.genLabelTriple(node1, label, node2)
-            elif label in self.descriptionSet:
-                success= self.genDescriptionTriple(node1, label, node2)
-            elif label in self.aliasSet:
-                success = self.genAliasTriple(node1, label, node2)
-            elif label == "type":
-                # special edge of prop declaration
-                success = self.genPropDeclarationTriple(node1, label, node2)
-            else:
-                if label in self.propTypes:
-                    success= self.genNormalTriple(node1, label, node2, isQualifierEdge)
-                else:
-                    if not self.ignore:
-                        raise KGTKException(
-                            "property {}'s type is unknown at line {}.\n".format(label, line_number)
-                        )
-                        success = False
-            if (not success) and (not isQualifierEdge) and (not self.ignore):
-                # We have a corrupted edge here.
-                self.ignoreFile.write("Corrupted statement at line number: {} with id {} with current corrupted id {}\n".format(line_number, eID, self.corrupted_statement_id))
-                self.ignoreFile.flush()
-                self.corrupted_statement_id = eID
-            else:
-                self.read += 1
-                self.corrupted_statement_id = None
-
-        def serialize(self):
-            """
-            Seriealize the triples. Used a hack to avoid serializing the prefix again.
-            """
-            docs = self.etk.process_ems(self.doc)
-            self.fp.write("\n\n".join(docs[0].kg.serialize("ttl").split("\n\n")[1:]))
-            self.fp.flush()
-            self.__reset()
-
-        def __serialize_prefix(self):
-            """
-            This function should be called only once after the doc object is initialized.
-            """
-            docs = self.etk.process_ems(self.doc)
-            self.fp.write(docs[0].kg.serialize("ttl").split("\n\n")[0] + "\n\n")
-            self.fp.flush()
-            self.__reset()
-
-        def __reset(self):
-            self.ID = None
-            self.STATEMENT = None
-            self.read = 0
-            self.doc = self.__setDoc()
-
-        def finalize(self):
-            self.serialize()
-        
-        @staticmethod
-        def replaceIllegalString(s:str)->str:
-            return s.replace(":","-")
-
     generator = TripleGenerator(
         propFile=propFile,
         labelSet=labels,
diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
new file mode 100644
index 000000000..9d86ae35d
--- /dev/null
+++ b/kgtk/triple_generator.py
@@ -0,0 +1,419 @@
+import sys
+import re
+from typing import TextIO
+from kgtk.exceptions import KGTKException
+from etk.wikidata.entity import WDItem, WDProperty
+
+class TripleGenerator:
+    """
+    A class to maintain the status of the generator
+    """
+    def __init__(
+        self,
+        propFile: str,
+        labelSet: str,
+        aliasSet: str,
+        descriptionSet: str,
+        ignore: bool,
+        n: int,
+        destFp: TextIO = sys.stdout,
+        truthy:bool =False
+    ):
+
+        import logging
+        from etk.wikidata.statement import Rank
+        from etk.etk import ETK
+        from etk.knowledge_graph import KGSchema
+        from etk.etk_module import ETKModule
+        self.ignore = ignore
+        self.propTypes = self.__setPropTypes(propFile)
+        self.labelSet, self.aliasSet, self.descriptionSet = self.__setSets(
+            labelSet, aliasSet, descriptionSet
+        )
+        self.fp = destFp
+        self.n = int(n)
+        self.read = 0
+        # ignore-logging, if not ignore, log them and move on.
+        if not self.ignore:
+            self.ignoreFile = open("ignored.log","w")
+        # corrupted statement id
+        self.corrupted_statement_id = None
+        # truthy
+        self.truthy = truthy
+        # serialize prfix
+        kg_schema = KGSchema()
+        kg_schema.add_schema("@prefix : <http://isi.edu/> .", "ttl")
+        self.etk = ETK(kg_schema=kg_schema, modules=ETKModule)
+        self.doc = self.__setDoc()
+        self.__serialize_prefix()
+    
+    def _node_2_entity(self, node:str):
+        '''
+        A node can be Qxxx or Pxxx, return the proper entity.
+        '''
+        if node in self.propTypes:
+            entity = WDProperty(node, self.propTypes[node])
+        else:
+            entity = WDItem(TripleGenerator.replaceIllegalString(node.upper()))
+        return entity
+
+
+    def __setPropTypes(self, propFile: str):
+        from etk.wikidata.value import (
+        Item,
+        StringValue,
+        TimeValue,
+        QuantityValue,
+        MonolingualText,
+        GlobeCoordinate,
+        ExternalIdentifier,
+        URLValue
+        )
+        dataTypeMappings = {
+            "item": Item,
+            "time": TimeValue,
+            "globe-coordinate": GlobeCoordinate,
+            "quantity": QuantityValue,
+            "monolingualtext": MonolingualText,
+            "string": StringValue,
+            "external-identifier":ExternalIdentifier,
+            "url":URLValue
+        }
+        with open(propFile, "r") as fp:
+            props = fp.readlines()
+        __propTypes = {}
+        for line in props[1:]:
+            node1, _, node2 = line.split("\t")
+            try:
+                __propTypes[node1] = dataTypeMappings[node2.strip()]
+            except:
+                if not self.ignore:                    
+                    raise KGTKException(
+                        "DataType {} of node {} is not supported.\n".format(
+                            node2, node1
+                        )
+                    )
+        return __propTypes
+
+    def __setSets(self, labelSet: str, aliasSet: str, descriptionSet: str):
+        return (
+            set(labelSet.split(",")),
+            set(aliasSet.split(",")),
+            set(descriptionSet.split(",")),
+        )
+
+    def __setDoc(self, doc_id: str = "http://isi.edu/default-ns/projects"):
+        """
+        reset the doc object and return it. Called at initialization and after outputting triples.
+        """
+        doc = self.etk.create_document({}, doc_id=doc_id)
+        # bind prefixes
+        doc.kg.bind("wikibase", "http://wikiba.se/ontology#")
+        doc.kg.bind("wd", "http://www.wikidata.org/entity/")
+        doc.kg.bind("wdt", "http://www.wikidata.org/prop/direct/")
+        doc.kg.bind("wdtn", "http://www.wikidata.org/prop/direct-normalized/")
+        doc.kg.bind("wdno", "http://www.wikidata.org/prop/novalue/")
+        doc.kg.bind("wds", "http://www.wikidata.org/entity/statement/")
+        doc.kg.bind("wdv", "http://www.wikidata.org/value/")
+        doc.kg.bind("wdref", "http://www.wikidata.org/reference/")
+        doc.kg.bind("p", "http://www.wikidata.org/prop/")
+        doc.kg.bind("pr", "http://www.wikidata.org/prop/reference/")
+        doc.kg.bind("prv", "http://www.wikidata.org/prop/reference/value/")
+        doc.kg.bind(
+            "prn", "http://www.wikidata.org/prop/reference/value-normalized/"
+        )
+        doc.kg.bind("ps", "http://www.wikidata.org/prop/statement/")
+        doc.kg.bind("psv", "http://www.wikidata.org/prop/statement/value/")
+        doc.kg.bind(
+            "psn", "http://www.wikidata.org/prop/statement/value-normalized/"
+        )
+        doc.kg.bind("pq", "http://www.wikidata.org/prop/qualifier/")
+        doc.kg.bind("pqv", "http://www.wikidata.org/prop/qualifier/value/")
+        doc.kg.bind(
+            "pqn", "http://www.wikidata.org/prop/qualifier/value-normalized/"
+        )
+        doc.kg.bind("skos", "http://www.w3.org/2004/02/skos/core#")
+        doc.kg.bind("prov", "http://www.w3.org/ns/prov#")
+        doc.kg.bind("schema", "http://schema.org/")
+        return doc
+
+    @staticmethod
+    def _process_text_string(string:str)->[str,str]:
+        ''' 
+        '''
+        if "@" in string:
+            res = string.split("@")
+            textString = "@".join(res[:-1]).replace('"', "").replace("'", "")
+            lang = res[-1].replace('"','').replace("'","")
+            if len(lang) != 2:
+                lang = "en"
+        else:
+            textString = string.replace('"', "").replace("'", "")
+            lang = "en"
+        return [textString, lang]
+
+    def genLabelTriple(self, node1: str, label: str, node2: str) -> bool:
+        entity = self._node_2_entity(node1)
+        textString, lang = TripleGenerator._process_text_string(node2)
+        entity.add_label(textString, lang=lang)
+        self.doc.kg.add_subject(entity)
+        return True
+
+    def genDescriptionTriple(self, node1: str, label: str, node2: str) -> bool:
+        entity = self._node_2_entity(node1)
+        textString, lang = TripleGenerator._process_text_string(node2)
+        entity.add_description(textString, lang=lang)
+        self.doc.kg.add_subject(entity)
+        return True
+
+    def genDescriptionTriple(self, node1: str, label: str, node2: str) -> bool:
+        entity = self._node_2_entity(node1)
+        textString, lang = TripleGenerator._process_text_string(node2)
+        entity.add_description(textString, lang=lang)
+        self.doc.kg.add_subject(entity)
+        return True
+
+    def genAliasTriple(self, node1: str, label: str, node2: str) -> bool:
+        entity = self._node_2_entity(node1)
+        textString, lang = TripleGenerator._process_text_string(node2)
+        entity.add_alias(textString, lang=lang)
+        self.doc.kg.add_subject(entity)
+        return True
+
+    def genPropDeclarationTriple(self, node1: str, label: str, node2: str) -> bool:
+        prop = WDProperty(node1, self.propTypes[node1])
+        self.doc.kg.add_subject(prop)
+        return True
+
+    def genNormalTriple(
+        self, node1: str, label: str, node2: str, isQualifierEdge: bool) -> bool:
+        from etk.wikidata.value import (
+        Item,
+        StringValue,
+        TimeValue,
+        QuantityValue,
+        MonolingualText,
+        GlobeCoordinate,
+        ExternalIdentifier,
+        URLValue,
+        Precision
+        )
+
+        entity = self._node_2_entity(node1)
+        # determine the edge type
+        edgeType = self.propTypes[label]
+        if edgeType == Item:
+            OBJECT = WDItem(TripleGenerator.replaceIllegalString(node2.upper()))
+        elif edgeType == TimeValue:
+            # https://www.wikidata.org/wiki/Help:Dates
+            # ^2013-01-01T00:00:00Z/11
+            # ^8000000-00-00T00:00:00Z/3
+            if re.compile("[0-9]{4}").match(node2):
+                try:                   
+                    dateTimeString = node2 + "-01-01"
+                    OBJECT = TimeValue(
+                        value=dateTimeString, #TODO
+                        calendar=Item("Q1985727"),
+                        precision=Precision.year,
+                        time_zone=0,
+                    )
+                except:
+                    return False
+            else:
+                try:
+                    dateTimeString, precision = node2[1:].split("/")
+                    dateTimeString = dateTimeString[:-1] # remove "Z"
+                    # 2016-00-00T00:00:00 case
+                    if "-00-00" in dateTimeString:
+                        dateTimeString = "-01-01".join(dateTimeString.split("-00-00"))
+                    elif dateTimeString[8:10] == "00":
+                        dateTimeString = dateTimeString[:8]+"01" + dateTimeString[10:]
+                    OBJECT = TimeValue(
+                        value=dateTimeString,
+                        calendar=Item("Q1985727"),
+                        precision=precision,
+                        time_zone=0,
+                    )
+                except: 
+                    return False
+
+            #TODO other than that, not supported. Creation of normal triple fails
+            
+
+        elif edgeType == GlobeCoordinate:
+            latitude, longitude = node2[1:].split("/")
+            OBJECT = GlobeCoordinate(
+                latitude, longitude, 0.0001, globe=StringValue("Earth")
+            )
+
+        elif edgeType == QuantityValue:
+            # +70[+60,+80]Q743895
+            res = re.compile("([\+|\-]?[0-9]+\.?[0-9]*)(?:\[([\+|\-]?[0-9]+\.?[0-9]*),([\+|\-]?[0-9]+\.?[0-9]*)\])?([U|Q](?:[0-9]+))?").match(node2).groups()
+            amount, lower_bound, upper_bound, unit = res
+
+            # Handle extra small numbers for now. TODO
+            if TripleGenerator._is_invalid_decimal_string(amount) or TripleGenerator._is_invalid_decimal_string(lower_bound) or TripleGenerator._is_invalid_decimal_string(upper_bound):
+                return False
+            amount = TripleGenerator._clean_number_string(amount)
+            lower_bound = TripleGenerator._clean_number_string(lower_bound)
+            upper_bound = TripleGenerator._clean_number_string(upper_bound)
+            if unit != None:
+                if upper_bound != None and lower_bound != None:
+                    OBJECT = QuantityValue(amount, unit=Item(unit),upper_bound=upper_bound,lower_bound=lower_bound)
+                else:
+                    OBJECT = QuantityValue(amount, unit=Item(unit))
+            else:
+                if upper_bound != None and lower_bound != None:
+                    OBJECT = QuantityValue(amount, upper_bound=upper_bound,lower_bound=lower_bound)
+                else:
+                    OBJECT = QuantityValue(amount)                   
+        elif edgeType == MonolingualText:
+            textString, lang = TripleGenerator._process_text_string(node2)
+            OBJECT = MonolingualText(textString, lang)
+        elif edgeType == ExternalIdentifier:
+            OBJECT = ExternalIdentifier(node2)
+        elif edgeType == URLValue:
+            OBJECT = URLValue(node2)
+        else:
+            # treat everything else as stringValue
+            OBJECT = StringValue(node2)
+        if isQualifierEdge:
+            # edge: e8 p9 ^2013-01-01T00:00:00Z/11
+            # create qualifier edge on previous STATEMENT and return the updated STATEMENT
+            if type(OBJECT) == WDItem:
+                self.doc.kg.add_subject(OBJECT)
+            self.STATEMENT.add_qualifier(label.upper(), OBJECT)
+            self.doc.kg.add_subject(self.STATEMENT) #TODO maybe can be positioned better for the edge cases.
+
+        else:
+            # edge: q1 p8 q2 e8
+            # create brand new property edge and replace STATEMENT
+            if type(OBJECT) == WDItem:
+                self.doc.kg.add_subject(OBJECT)
+            if self.truthy:
+                self.STATEMENT = entity.add_truthy_statement(label.upper(), OBJECT) 
+            else:
+                self.STATEMENT = entity.add_statement(label.upper(), OBJECT) 
+            self.doc.kg.add_subject(entity)
+        return True
+    
+    @staticmethod
+    def _is_invalid_decimal_string(num_string):
+        '''
+        if a decimal string too small, return True TODO
+        '''
+        if num_string == None:
+            return False
+        else:
+            if abs(float(num_string)) < 0.0001 and float(num_string) != 0:
+                return True
+            return False        
+
+    @staticmethod
+    def _clean_number_string(num):
+        from numpy import format_float_positional
+        if num == None:
+            return None
+        else:
+            return format_float_positional(float(num),trim="-")
+
+    def entryPoint(self, line_number:int , edge: str):
+        """
+        generates a list of two, the first element is the determination of the edge type using corresponding edge type
+        the second element is a bool indicating whether this is a valid property edge or qualifier edge.
+        Call corresponding downstream functions
+        """
+        edgeList = edge.strip().split("\t")
+        l = len(edgeList)
+        if l!=4:
+            return
+
+        [node1, label, node2, eID] = edgeList
+        node1, label, node2, eID = node1.strip(),label.strip(),node2.strip(),eID.strip()
+        if line_number == 0: #TODO ignore header mode
+            # by default a statement edge
+            isQualifierEdge = False
+            # print("#Debug Info: ",line_number, self.ID, eID, isQualifierEdge,self.STATEMENT)
+            self.ID = eID
+            self.corrupted_statement_id = None
+        else:
+            if node1 != self.ID:
+                # also a new statement edge
+                if self.read >= self.n:
+                    self.serialize()
+                isQualifierEdge = False
+                # print("#Debug Info: ",line_number, self.ID, node1, isQualifierEdge,self.STATEMENT)
+                self.ID= eID
+                self.corrupted_statement_id = None
+            else:
+            # qualifier edge or property declaration edge
+                isQualifierEdge = True
+                if self.corrupted_statement_id == eID:
+                    # Met a qualifier which associates with a corrupted statement
+                    return
+                if label != "type" and node1 != self.ID:
+                    # 1. not a property declaration edge and
+                    # 2. the current qualifier's node1 is not the latest property edge id, throw errors.
+                    if not self.ignore:
+                        raise KGTKException(
+                            "Node1 {} at line {} doesn't agree with latest property edge id {}.\n".format(
+                                node1, line_number, self.ID
+                            )
+                        )
+        if label in self.labelSet:
+            success = self.genLabelTriple(node1, label, node2)
+        elif label in self.descriptionSet:
+            success= self.genDescriptionTriple(node1, label, node2)
+        elif label in self.aliasSet:
+            success = self.genAliasTriple(node1, label, node2)
+        elif label == "type":
+            # special edge of prop declaration
+            success = self.genPropDeclarationTriple(node1, label, node2)
+        else:
+            if label in self.propTypes:
+                success= self.genNormalTriple(node1, label, node2, isQualifierEdge)
+            else:
+                if not self.ignore:
+                    raise KGTKException(
+                        "property {}'s type is unknown at line {}.\n".format(label, line_number)
+                    )
+                    success = False
+        if (not success) and (not isQualifierEdge) and (not self.ignore):
+            # We have a corrupted edge here.
+            self.ignoreFile.write("Corrupted statement at line number: {} with id {} with current corrupted id {}\n".format(line_number, eID, self.corrupted_statement_id))
+            self.ignoreFile.flush()
+            self.corrupted_statement_id = eID
+        else:
+            self.read += 1
+            self.corrupted_statement_id = None
+
+    def serialize(self):
+        """
+        Seriealize the triples. Used a hack to avoid serializing the prefix again.
+        """
+        docs = self.etk.process_ems(self.doc)
+        self.fp.write("\n\n".join(docs[0].kg.serialize("ttl").split("\n\n")[1:]))
+        self.fp.flush()
+        self.__reset()
+
+    def __serialize_prefix(self):
+        """
+        This function should be called only once after the doc object is initialized.
+        """
+        docs = self.etk.process_ems(self.doc)
+        self.fp.write(docs[0].kg.serialize("ttl").split("\n\n")[0] + "\n\n")
+        self.fp.flush()
+        self.__reset()
+
+    def __reset(self):
+        self.ID = None
+        self.STATEMENT = None
+        self.read = 0
+        self.doc = self.__setDoc()
+
+    def finalize(self):
+        self.serialize()
+    
+    @staticmethod
+    def replaceIllegalString(s:str)->str:
+        return s.replace(":","-")
\ No newline at end of file

From f8d61c0e7255c2419da0c8f9323ae0eff249177a Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Mon, 27 Apr 2020 19:43:41 -0700
Subject: [PATCH 011/278] comply with PEP8 naming conventions, rename some
 varialbes

---
 kgtk/cli/generate_wikidata_triples.py |  16 +-
 kgtk/triple_generator.py              | 260 ++++++++++++--------------
 2 files changed, 129 insertions(+), 147 deletions(-)

diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index 880f92e3b..ee17e77f2 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -31,7 +31,7 @@ def add_arguments(parser):
     Parse arguments
     Args:
         parser (argparse.ArgumentParser)
-        propFile: str, labelSet: str, aliasSet: str, descriptionSet: str, n: str, dest: Any  --output-n-lines --generate-truthy
+        prop_file: str, labelSet: str, aliasSet: str, descriptionSet: str, n: str, dest: Any  --output-n-lines --generate-truthy
     """
     parser.add_argument(
         "-lp",
@@ -63,7 +63,7 @@ def add_arguments(parser):
         action="store",
         type=str,
         help="path to the file which contains the property datatype mapping in kgtk format.",
-        dest="propFile",
+        dest="prop_file",
     )
     parser.add_argument(
         "-n",
@@ -99,7 +99,7 @@ def run(
     labels: str,
     aliases: str,
     descriptions: str,
-    propFile: str,
+    prop_file: str,
     n: int,
     truthy: bool,
     ignore: bool,
@@ -109,10 +109,10 @@ def run(
     from kgtk.triple_generator import TripleGenerator
     import sys
     generator = TripleGenerator(
-        propFile=propFile,
-        labelSet=labels,
-        aliasSet=aliases,
-        descriptionSet=descriptions,
+        prop_file=prop_file,
+        label_set=labels,
+        alias_set=aliases,
+        description_set=descriptions,
         n=n,
         ignore=ignore,
         truthy=truthy
@@ -127,6 +127,6 @@ def run(
             num_line += 1
             continue
         else:
-            generator.entryPoint(num_line, edge)
+            generator.entry_point(num_line, edge)
             num_line += 1
     generator.finalize()
diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 9d86ae35d..7dafe800f 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -3,6 +3,17 @@
 from typing import TextIO
 from kgtk.exceptions import KGTKException
 from etk.wikidata.entity import WDItem, WDProperty
+from etk.wikidata.value import ( 
+Precision,
+Item,
+StringValue,
+TimeValue,
+QuantityValue,
+MonolingualText,
+GlobeCoordinate,
+ExternalIdentifier,
+URLValue
+)
 
 class TripleGenerator:
     """
@@ -10,13 +21,13 @@ class TripleGenerator:
     """
     def __init__(
         self,
-        propFile: str,
-        labelSet: str,
-        aliasSet: str,
-        descriptionSet: str,
+        prop_file: str,
+        label_set: str,
+        alias_set: str,
+        description_set: str,
         ignore: bool,
         n: int,
-        destFp: TextIO = sys.stdout,
+        dest_fp: TextIO = sys.stdout,
         truthy:bool =False
     ):
 
@@ -26,16 +37,16 @@ def __init__(
         from etk.knowledge_graph import KGSchema
         from etk.etk_module import ETKModule
         self.ignore = ignore
-        self.propTypes = self.__setPropTypes(propFile)
-        self.labelSet, self.aliasSet, self.descriptionSet = self.__setSets(
-            labelSet, aliasSet, descriptionSet
+        self.prop_types = self.set_properties(prop_file)
+        self.label_set, self.alias_set, self.description_set = self.__setSets(
+            label_set, alias_set, description_set
         )
-        self.fp = destFp
+        self.fp = dest_fp
         self.n = int(n)
-        self.read = 0
+        self.read_num_of_lines = 0
         # ignore-logging, if not ignore, log them and move on.
         if not self.ignore:
-            self.ignoreFile = open("ignored.log","w")
+            self.ignore_file = open("ignored.log","w")
         # corrupted statement id
         self.corrupted_statement_id = None
         # truthy
@@ -51,25 +62,15 @@ def _node_2_entity(self, node:str):
         '''
         A node can be Qxxx or Pxxx, return the proper entity.
         '''
-        if node in self.propTypes:
-            entity = WDProperty(node, self.propTypes[node])
+        if node in self.prop_types:
+            entity = WDProperty(node, self.prop_types[node])
         else:
             entity = WDItem(TripleGenerator.replaceIllegalString(node.upper()))
         return entity
 
 
-    def __setPropTypes(self, propFile: str):
-        from etk.wikidata.value import (
-        Item,
-        StringValue,
-        TimeValue,
-        QuantityValue,
-        MonolingualText,
-        GlobeCoordinate,
-        ExternalIdentifier,
-        URLValue
-        )
-        dataTypeMappings = {
+    def set_properties(self, prop_file: str):
+        datatype_mapping = {
             "item": Item,
             "time": TimeValue,
             "globe-coordinate": GlobeCoordinate,
@@ -79,13 +80,13 @@ def __setPropTypes(self, propFile: str):
             "external-identifier":ExternalIdentifier,
             "url":URLValue
         }
-        with open(propFile, "r") as fp:
+        with open(prop_file, "r") as fp:
             props = fp.readlines()
-        __propTypes = {}
+        prop_types = {}
         for line in props[1:]:
             node1, _, node2 = line.split("\t")
             try:
-                __propTypes[node1] = dataTypeMappings[node2.strip()]
+                prop_types[node1] = datatype_mapping[node2.strip()]
             except:
                 if not self.ignore:                    
                     raise KGTKException(
@@ -93,13 +94,13 @@ def __setPropTypes(self, propFile: str):
                             node2, node1
                         )
                     )
-        return __propTypes
+        return prop_types
 
-    def __setSets(self, labelSet: str, aliasSet: str, descriptionSet: str):
+    def __setSets(self, label_set: str, alias_set: str, description_set: str):
         return (
-            set(labelSet.split(",")),
-            set(aliasSet.split(",")),
-            set(descriptionSet.split(",")),
+            set(label_set.split(",")),
+            set(alias_set.split(",")),
+            set(description_set.split(",")),
         )
 
     def __setDoc(self, doc_id: str = "http://isi.edu/default-ns/projects"):
@@ -138,80 +139,61 @@ def __setDoc(self, doc_id: str = "http://isi.edu/default-ns/projects"):
         return doc
 
     @staticmethod
-    def _process_text_string(string:str)->[str,str]:
+    def process_text_string(string:str)->[str,str]:
         ''' 
         '''
         if "@" in string:
             res = string.split("@")
-            textString = "@".join(res[:-1]).replace('"', "").replace("'", "")
+            text_string = "@".join(res[:-1]).replace('"', "").replace("'", "")
             lang = res[-1].replace('"','').replace("'","")
             if len(lang) != 2:
                 lang = "en"
         else:
-            textString = string.replace('"', "").replace("'", "")
+            text_string = string.replace('"', "").replace("'", "")
             lang = "en"
-        return [textString, lang]
+        return [text_string, lang]
 
-    def genLabelTriple(self, node1: str, label: str, node2: str) -> bool:
+    def generate_label_triple(self, node1: str, label: str, node2: str) -> bool:
         entity = self._node_2_entity(node1)
-        textString, lang = TripleGenerator._process_text_string(node2)
-        entity.add_label(textString, lang=lang)
+        text_string, lang = TripleGenerator.process_text_string(node2)
+        entity.add_label(text_string, lang=lang)
         self.doc.kg.add_subject(entity)
         return True
 
-    def genDescriptionTriple(self, node1: str, label: str, node2: str) -> bool:
+    def generate_description_triple(self, node1: str, label: str, node2: str) -> bool:
         entity = self._node_2_entity(node1)
-        textString, lang = TripleGenerator._process_text_string(node2)
-        entity.add_description(textString, lang=lang)
+        text_string, lang = TripleGenerator.process_text_string(node2)
+        entity.add_description(text_string, lang=lang)
         self.doc.kg.add_subject(entity)
         return True
 
-    def genDescriptionTriple(self, node1: str, label: str, node2: str) -> bool:
+    def generate_alias_triple(self, node1: str, label: str, node2: str) -> bool:
         entity = self._node_2_entity(node1)
-        textString, lang = TripleGenerator._process_text_string(node2)
-        entity.add_description(textString, lang=lang)
+        text_string, lang = TripleGenerator.process_text_string(node2)
+        entity.add_alias(text_string, lang=lang)
         self.doc.kg.add_subject(entity)
         return True
 
-    def genAliasTriple(self, node1: str, label: str, node2: str) -> bool:
-        entity = self._node_2_entity(node1)
-        textString, lang = TripleGenerator._process_text_string(node2)
-        entity.add_alias(textString, lang=lang)
-        self.doc.kg.add_subject(entity)
-        return True
-
-    def genPropDeclarationTriple(self, node1: str, label: str, node2: str) -> bool:
-        prop = WDProperty(node1, self.propTypes[node1])
+    def generate_prop_declaration_triple(self, node1: str, label: str, node2: str) -> bool:
+        prop = WDProperty(node1, self.prop_types[node1])
         self.doc.kg.add_subject(prop)
         return True
 
-    def genNormalTriple(
-        self, node1: str, label: str, node2: str, isQualifierEdge: bool) -> bool:
-        from etk.wikidata.value import (
-        Item,
-        StringValue,
-        TimeValue,
-        QuantityValue,
-        MonolingualText,
-        GlobeCoordinate,
-        ExternalIdentifier,
-        URLValue,
-        Precision
-        )
-
+    def generate_normal_triple(
+        self, node1: str, label: str, node2: str, is_qualifier_edge: bool) -> bool:
         entity = self._node_2_entity(node1)
         # determine the edge type
-        edgeType = self.propTypes[label]
-        if edgeType == Item:
-            OBJECT = WDItem(TripleGenerator.replaceIllegalString(node2.upper()))
-        elif edgeType == TimeValue:
+        edge_type = self.prop_types[label]
+        if edge_type == Item:
+            object = WDItem(TripleGenerator.replaceIllegalString(node2.upper()))
+        elif edge_type == TimeValue:
             # https://www.wikidata.org/wiki/Help:Dates
             # ^2013-01-01T00:00:00Z/11
             # ^8000000-00-00T00:00:00Z/3
             if re.compile("[0-9]{4}").match(node2):
                 try:                   
                     dateTimeString = node2 + "-01-01"
-                    OBJECT = TimeValue(
+                    object = TimeValue(
                         value=dateTimeString, #TODO
                         calendar=Item("Q1985727"),
                         precision=Precision.year,
@@ -228,7 +210,7 @@ def genNormalTriple(
                         dateTimeString = "-01-01".join(dateTimeString.split("-00-00"))
                     elif dateTimeString[8:10] == "00":
                         dateTimeString = dateTimeString[:8]+"01" + dateTimeString[10:]
-                    OBJECT = TimeValue(
+                    object = TimeValue(
                         value=dateTimeString,
                         calendar=Item("Q1985727"),
                         precision=precision,
@@ -240,65 +222,65 @@ def genNormalTriple(
             #TODO other than that, not supported. Creation of normal triple fails
             
 
-        elif edgeType == GlobeCoordinate:
+        elif edge_type == GlobeCoordinate:
             latitude, longitude = node2[1:].split("/")
-            OBJECT = GlobeCoordinate(
+            object = GlobeCoordinate(
                 latitude, longitude, 0.0001, globe=StringValue("Earth")
             )
 
-        elif edgeType == QuantityValue:
+        elif edge_type == QuantityValue:
             # +70[+60,+80]Q743895
             res = re.compile("([\+|\-]?[0-9]+\.?[0-9]*)(?:\[([\+|\-]?[0-9]+\.?[0-9]*),([\+|\-]?[0-9]+\.?[0-9]*)\])?([U|Q](?:[0-9]+))?").match(node2).groups()
             amount, lower_bound, upper_bound, unit = res
 
             # Handle extra small numbers for now. TODO
-            if TripleGenerator._is_invalid_decimal_string(amount) or TripleGenerator._is_invalid_decimal_string(lower_bound) or TripleGenerator._is_invalid_decimal_string(upper_bound):
+            if TripleGenerator.is_invalid_decimal_string(amount) or TripleGenerator.is_invalid_decimal_string(lower_bound) or TripleGenerator.is_invalid_decimal_string(upper_bound):
                 return False
-            amount = TripleGenerator._clean_number_string(amount)
-            lower_bound = TripleGenerator._clean_number_string(lower_bound)
-            upper_bound = TripleGenerator._clean_number_string(upper_bound)
+            amount = TripleGenerator.clean_number_string(amount)
+            lower_bound = TripleGenerator.clean_number_string(lower_bound)
+            upper_bound = TripleGenerator.clean_number_string(upper_bound)
             if unit != None:
                 if upper_bound != None and lower_bound != None:
-                    OBJECT = QuantityValue(amount, unit=Item(unit),upper_bound=upper_bound,lower_bound=lower_bound)
+                    object = QuantityValue(amount, unit=Item(unit),upper_bound=upper_bound,lower_bound=lower_bound)
                 else:
-                    OBJECT = QuantityValue(amount, unit=Item(unit))
+                    object = QuantityValue(amount, unit=Item(unit))
             else:
                 if upper_bound != None and lower_bound != None:
-                    OBJECT = QuantityValue(amount, upper_bound=upper_bound,lower_bound=lower_bound)
+                    object = QuantityValue(amount, upper_bound=upper_bound,lower_bound=lower_bound)
                 else:
-                    OBJECT = QuantityValue(amount)                   
-        elif edgeType == MonolingualText:
-            textString, lang = TripleGenerator._process_text_string(node2)
-            OBJECT = MonolingualText(textString, lang)
-        elif edgeType == ExternalIdentifier:
-            OBJECT = ExternalIdentifier(node2)
-        elif edgeType == URLValue:
-            OBJECT = URLValue(node2)
+                    object = QuantityValue(amount)                   
+        elif edge_type == MonolingualText:
+            text_string, lang = TripleGenerator.process_text_string(node2)
+            object = MonolingualText(text_string, lang)
+        elif edge_type == ExternalIdentifier:
+            object = ExternalIdentifier(node2)
+        elif edge_type == URLValue:
+            object = URLValue(node2)
         else:
             # treat everything else as stringValue
-            OBJECT = StringValue(node2)
-        if isQualifierEdge:
+            object = StringValue(node2)
+        if is_qualifier_edge:
             # edge: e8 p9 ^2013-01-01T00:00:00Z/11
             # create qualifier edge on previous STATEMENT and return the updated STATEMENT
-            if type(OBJECT) == WDItem:
-                self.doc.kg.add_subject(OBJECT)
-            self.STATEMENT.add_qualifier(label.upper(), OBJECT)
-            self.doc.kg.add_subject(self.STATEMENT) #TODO maybe can be positioned better for the edge cases.
+            if type(object) == WDItem:
+                self.doc.kg.add_subject(object)
+            self.to_append_statement.add_qualifier(label.upper(), object)
+            self.doc.kg.add_subject(self.to_append_statement) #TODO maybe can be positioned better for the edge cases.
 
         else:
             # edge: q1 p8 q2 e8
             # create brand new property edge and replace STATEMENT
-            if type(OBJECT) == WDItem:
-                self.doc.kg.add_subject(OBJECT)
+            if type(object) == WDItem:
+                self.doc.kg.add_subject(object)
             if self.truthy:
-                self.STATEMENT = entity.add_truthy_statement(label.upper(), OBJECT) 
+                self.to_append_statement = entity.add_truthy_statement(label.upper(), object) 
             else:
-                self.STATEMENT = entity.add_statement(label.upper(), OBJECT) 
+                self.to_append_statement = entity.add_statement(label.upper(), object) 
             self.doc.kg.add_subject(entity)
         return True
     
     @staticmethod
-    def _is_invalid_decimal_string(num_string):
+    def is_invalid_decimal_string(num_string):
         '''
         if a decimal string too small, return True TODO
         '''
@@ -310,81 +292,81 @@ def _is_invalid_decimal_string(num_string):
             return False        
 
     @staticmethod
-    def _clean_number_string(num):
+    def clean_number_string(num):
         from numpy import format_float_positional
         if num == None:
             return None
         else:
             return format_float_positional(float(num),trim="-")
 
-    def entryPoint(self, line_number:int , edge: str):
+    def entry_point(self, line_number:int , edge: str):
         """
         generates a list of two, the first element is the determination of the edge type using corresponding edge type
         the second element is a bool indicating whether this is a valid property edge or qualifier edge.
         Call corresponding downstream functions
         """
-        edgeList = edge.strip().split("\t")
-        l = len(edgeList)
+        edge_list = edge.strip().split("\t")
+        l = len(edge_list)
         if l!=4:
             return
 
-        [node1, label, node2, eID] = edgeList
-        node1, label, node2, eID = node1.strip(),label.strip(),node2.strip(),eID.strip()
+        [node1, label, node2, e_id] = edge_list
+        node1, label, node2, e_id = node1.strip(),label.strip(),node2.strip(),e_id.strip()
         if line_number == 0: #TODO ignore header mode
             # by default a statement edge
-            isQualifierEdge = False
-            # print("#Debug Info: ",line_number, self.ID, eID, isQualifierEdge,self.STATEMENT)
-            self.ID = eID
+            is_qualifier_edge = False
+            # print("#Debug Info: ",line_number, self.to_append_statement_id, e_id, is_qualifier_edge,self.to_append_statement)
+            self.to_append_statement_id = e_id
             self.corrupted_statement_id = None
         else:
-            if node1 != self.ID:
+            if node1 != self.to_append_statement_id:
                 # also a new statement edge
-                if self.read >= self.n:
+                if self.read_num_of_lines >= self.n:
                     self.serialize()
-                isQualifierEdge = False
-                # print("#Debug Info: ",line_number, self.ID, node1, isQualifierEdge,self.STATEMENT)
-                self.ID= eID
+                is_qualifier_edge = False
+                # print("#Debug Info: ",line_number, self.to_append_statement_id, node1, is_qualifier_edge,self.to_append_statement)
+                self.to_append_statement_id= e_id
                 self.corrupted_statement_id = None
             else:
             # qualifier edge or property declaration edge
-                isQualifierEdge = True
-                if self.corrupted_statement_id == eID:
+                is_qualifier_edge = True
+                if self.corrupted_statement_id == e_id:
                     # Met a qualifier which associates with a corrupted statement
                     return
-                if label != "type" and node1 != self.ID:
+                if label != "type" and node1 != self.to_append_statement_id:
                     # 1. not a property declaration edge and
                     # 2. the current qualifier's node1 is not the latest property edge id, throw errors.
                     if not self.ignore:
                         raise KGTKException(
                             "Node1 {} at line {} doesn't agree with latest property edge id {}.\n".format(
-                                node1, line_number, self.ID
+                                node1, line_number, self.to_append_statement_id
                             )
                         )
-        if label in self.labelSet:
-            success = self.genLabelTriple(node1, label, node2)
-        elif label in self.descriptionSet:
-            success= self.genDescriptionTriple(node1, label, node2)
-        elif label in self.aliasSet:
-            success = self.genAliasTriple(node1, label, node2)
+        if label in self.label_set:
+            success = self.generate_label_triple(node1, label, node2)
+        elif label in self.description_set:
+            success= self.generate_description_triple(node1, label, node2)
+        elif label in self.alias_set:
+            success = self.generate_alias_triple(node1, label, node2)
         elif label == "type":
             # special edge of prop declaration
-            success = self.genPropDeclarationTriple(node1, label, node2)
+            success = self.generate_prop_declaration_triple(node1, label, node2)
         else:
-            if label in self.propTypes:
-                success= self.genNormalTriple(node1, label, node2, isQualifierEdge)
+            if label in self.prop_types:
+                success= self.generate_normal_triple(node1, label, node2, is_qualifier_edge)
             else:
                 if not self.ignore:
                     raise KGTKException(
                         "property {}'s type is unknown at line {}.\n".format(label, line_number)
                     )
                     success = False
-        if (not success) and (not isQualifierEdge) and (not self.ignore):
+        if (not success) and (not is_qualifier_edge) and (not self.ignore):
             # We have a corrupted edge here.
-            self.ignoreFile.write("Corrupted statement at line number: {} with id {} with current corrupted id {}\n".format(line_number, eID, self.corrupted_statement_id))
-            self.ignoreFile.flush()
-            self.corrupted_statement_id = eID
+            self.ignore_file.write("Corrupted statement at line number: {} with id {} with current corrupted id {}\n".format(line_number, e_id, self.corrupted_statement_id))
+            self.ignore_file.flush()
+            self.corrupted_statement_id = e_id
         else:
-            self.read += 1
+            self.read_num_of_lines += 1
             self.corrupted_statement_id = None
 
     def serialize(self):
@@ -406,9 +388,9 @@ def __serialize_prefix(self):
         self.__reset()
 
     def __reset(self):
-        self.ID = None
-        self.STATEMENT = None
-        self.read = 0
+        self.to_append_statement_id = None
+        self.to_append_statement = None
+        self.read_num_of_lines = 0
         self.doc = self.__setDoc()
 
     def finalize(self):

From f7fa7e9a0c81c7c0bd025186c3c8d2f52aa1bbac Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Mon, 27 Apr 2020 22:38:17 -0700
Subject: [PATCH 012/278] fix the bug of missing prefix

---
 kgtk/triple_generator.py | 119 ++++++++++++++++-----------------------
 1 file changed, 48 insertions(+), 71 deletions(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 7dafe800f..ec328c16d 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -3,6 +3,10 @@
 from typing import TextIO
 from kgtk.exceptions import KGTKException
 from etk.wikidata.entity import WDItem, WDProperty
+from etk.etk_module import ETKModule
+from etk.etk import ETK
+from etk.knowledge_graph import KGSchema
+from etk.wikidata import wiki_namespaces
 from etk.wikidata.value import ( 
 Precision,
 Item,
@@ -15,6 +19,7 @@
 URLValue
 )
 
+
 class TripleGenerator:
     """
     A class to maintain the status of the generator
@@ -30,15 +35,10 @@ def __init__(
         dest_fp: TextIO = sys.stdout,
         truthy:bool =False
     ):
-
-        import logging
         from etk.wikidata.statement import Rank
-        from etk.etk import ETK
-        from etk.knowledge_graph import KGSchema
-        from etk.etk_module import ETKModule
         self.ignore = ignore
         self.prop_types = self.set_properties(prop_file)
-        self.label_set, self.alias_set, self.description_set = self.__setSets(
+        self.label_set, self.alias_set, self.description_set = self.set_sets(
             label_set, alias_set, description_set
         )
         self.fp = dest_fp
@@ -50,13 +50,9 @@ def __init__(
         # corrupted statement id
         self.corrupted_statement_id = None
         # truthy
-        self.truthy = truthy
-        # serialize prfix
-        kg_schema = KGSchema()
-        kg_schema.add_schema("@prefix : <http://isi.edu/> .", "ttl")
-        self.etk = ETK(kg_schema=kg_schema, modules=ETKModule)
-        self.doc = self.__setDoc()
-        self.__serialize_prefix()
+        self.truthy = truthy        
+        self.reset_etk_doc()
+        self.serialize_prefix()
     
     def _node_2_entity(self, node:str):
         '''
@@ -96,47 +92,54 @@ def set_properties(self, prop_file: str):
                     )
         return prop_types
 
-    def __setSets(self, label_set: str, alias_set: str, description_set: str):
+    def set_sets(self, label_set: str, alias_set: str, description_set: str):
         return (
             set(label_set.split(",")),
             set(alias_set.split(",")),
             set(description_set.split(",")),
         )
 
-    def __setDoc(self, doc_id: str = "http://isi.edu/default-ns/projects"):
+    def reset_etk_doc(self, doc_id: str = "http://isi.edu/default-ns/projects"):
         """
         reset the doc object and return it. Called at initialization and after outputting triples.
         """
-        doc = self.etk.create_document({}, doc_id=doc_id)
-        # bind prefixes
-        doc.kg.bind("wikibase", "http://wikiba.se/ontology#")
-        doc.kg.bind("wd", "http://www.wikidata.org/entity/")
-        doc.kg.bind("wdt", "http://www.wikidata.org/prop/direct/")
-        doc.kg.bind("wdtn", "http://www.wikidata.org/prop/direct-normalized/")
-        doc.kg.bind("wdno", "http://www.wikidata.org/prop/novalue/")
-        doc.kg.bind("wds", "http://www.wikidata.org/entity/statement/")
-        doc.kg.bind("wdv", "http://www.wikidata.org/value/")
-        doc.kg.bind("wdref", "http://www.wikidata.org/reference/")
-        doc.kg.bind("p", "http://www.wikidata.org/prop/")
-        doc.kg.bind("pr", "http://www.wikidata.org/prop/reference/")
-        doc.kg.bind("prv", "http://www.wikidata.org/prop/reference/value/")
-        doc.kg.bind(
-            "prn", "http://www.wikidata.org/prop/reference/value-normalized/"
-        )
-        doc.kg.bind("ps", "http://www.wikidata.org/prop/statement/")
-        doc.kg.bind("psv", "http://www.wikidata.org/prop/statement/value/")
-        doc.kg.bind(
-            "psn", "http://www.wikidata.org/prop/statement/value-normalized/"
-        )
-        doc.kg.bind("pq", "http://www.wikidata.org/prop/qualifier/")
-        doc.kg.bind("pqv", "http://www.wikidata.org/prop/qualifier/value/")
-        doc.kg.bind(
-            "pqn", "http://www.wikidata.org/prop/qualifier/value-normalized/"
-        )
-        doc.kg.bind("skos", "http://www.w3.org/2004/02/skos/core#")
-        doc.kg.bind("prov", "http://www.w3.org/ns/prov#")
-        doc.kg.bind("schema", "http://schema.org/")
-        return doc
+        kg_schema = KGSchema()
+        kg_schema.add_schema("@prefix : <http://isi.edu/> .", "ttl")
+        self.etk = ETK(kg_schema=kg_schema, modules=ETKModule)
+        self.doc = self.etk.create_document({}, doc_id=doc_id)
+        for k, v in wiki_namespaces.items():
+            self.doc.kg.bind(k, v) 
+    
+    def serialize(self):
+        """
+        Seriealize the triples. Used a hack to avoid serializing the prefix again.
+        """
+        docs = self.etk.process_ems(self.doc)
+        self.fp.write("\n\n".join(docs[0].kg.serialize("ttl").split("\n\n")[1:]))
+        self.fp.flush()
+        self.reset()
+
+    def serialize_prefix(self):
+        """
+        This function should be called only once after the doc object is initialized.
+        In order to serialize the prefix at the very begining it has to be printed per the change of rdflib 4.2.2->5.0.0
+        Relevent issue: https://github.com/RDFLib/rdflib/issues/965
+        """
+        for k, v in wiki_namespaces.items():
+            line = "@prefix " + k + " " + v + " .\n" 
+            self.fp.write(line)
+        self.fp.write("\n")
+        self.fp.flush()
+        self.reset()
+
+    def reset(self):
+        self.to_append_statement_id = None
+        self.to_append_statement = None
+        self.read_num_of_lines = 0
+        self.reset_etk_doc()
+
+    def finalize(self):
+        self.serialize()
 
     @staticmethod
     def process_text_string(string:str)->[str,str]:
@@ -369,32 +372,6 @@ def entry_point(self, line_number:int , edge: str):
             self.read_num_of_lines += 1
             self.corrupted_statement_id = None
 
-    def serialize(self):
-        """
-        Seriealize the triples. Used a hack to avoid serializing the prefix again.
-        """
-        docs = self.etk.process_ems(self.doc)
-        self.fp.write("\n\n".join(docs[0].kg.serialize("ttl").split("\n\n")[1:]))
-        self.fp.flush()
-        self.__reset()
-
-    def __serialize_prefix(self):
-        """
-        This function should be called only once after the doc object is initialized.
-        """
-        docs = self.etk.process_ems(self.doc)
-        self.fp.write(docs[0].kg.serialize("ttl").split("\n\n")[0] + "\n\n")
-        self.fp.flush()
-        self.__reset()
-
-    def __reset(self):
-        self.to_append_statement_id = None
-        self.to_append_statement = None
-        self.read_num_of_lines = 0
-        self.doc = self.__setDoc()
-
-    def finalize(self):
-        self.serialize()
     
     @staticmethod
     def replaceIllegalString(s:str)->str:

From b6314a53a999490fa8111da6800651913fe4d2ae Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Mon, 27 Apr 2020 22:44:54 -0700
Subject: [PATCH 013/278] added the gzip file support with '-gz yes' option

---
 kgtk/cli/generate_wikidata_triples.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index ee17e77f2..bafa0cb99 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -83,12 +83,20 @@ def add_arguments(parser):
     )
     parser.add_argument(
         "-ig",
-        "--ig",
+        "--ignore",
         action="store",
         type=str2bool,
         help="if set to yes, ignore various kinds of exceptions and mistakes and log them to a log file with line number in input file, rather than stopping. logging",
         dest="ignore",
     )
+    parser.add_argument(
+        "-gz",
+        "--use-gz",
+        action="store",
+        type=str2bool,
+        help="if set to yes, read from compressed gz file",
+        dest="use_gz",
+    )
     # logging level
     # parser.add_argument('-l', '--logging-level', action='store', dest='logging_level',
     #         default="info", choices=("error", "warning", "info", "debug"),
@@ -103,9 +111,10 @@ def run(
     n: int,
     truthy: bool,
     ignore: bool,
-    # logging_level:str
+    use_gz: bool
 ):
     # import modules locally
+    import gzip
     from kgtk.triple_generator import TripleGenerator
     import sys
     generator = TripleGenerator(
@@ -119,8 +128,12 @@ def run(
     )
     # process stdin
     num_line = 0
+    if use_gz:
+        fp = gzip.open(sys.stdin.buffer, 'rt')
+    else:
+        fp = sys.stdin
     while True:
-        edge = sys.stdin.readline()
+        edge = fp.readline()
         if not edge:
             break
         if edge.startswith("#") or num_line == 0: # TODO First line omit

From 69ffc1c703f57ab91a21bd13a0eb34ad09eaeed3 Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Tue, 28 Apr 2020 09:48:45 -0700
Subject: [PATCH 014/278] code clean

---
 kgtk/cli/text_embedding.py | 84 +++++++++++++-------------------------
 1 file changed, 28 insertions(+), 56 deletions(-)

diff --git a/kgtk/cli/text_embedding.py b/kgtk/cli/text_embedding.py
index 05404a54d..3a439c8ce 100644
--- a/kgtk/cli/text_embedding.py
+++ b/kgtk/cli/text_embedding.py
@@ -1,5 +1,6 @@
 import sys
 import typing
+from kgtk.exceptions import KGTKException
 
 ALL_EMBEDDING_MODELS_NAMES = [
 "bert-base-nli-cls-token",
@@ -41,7 +42,9 @@ def __init__(self, model_name=None, query_server=None, cache_config:dict={}):
         #     self.model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
         else:
             self.model_name = model_name
-        self.model = SentenceTransformer(model_name)
+        self._logger.info("Using model {}".format(self.model_name))
+        self.model = SentenceTransformer(self.model_name)
+        # setup redis cache server
         if query_server is None or query_server == "":
             self.wikidata_server = "https://query.wikidata.org/sparql"
         else:
@@ -135,7 +138,7 @@ def send_sparql_query(self, query_body:str):
             results = qm.query().convert()['results']['bindings']
             return results
         except:
-            raise ValueError("Sending Sparl query to {} failed!".format(self.wikidata_server))
+            raise KGTKException("Sending Sparl query to {} failed!".format(self.wikidata_server))
 
     def get_item_description(self, qnodes: typing.List[str]=None, target_properties:dict={}, gt_label:str=""):
         """
@@ -147,7 +150,6 @@ def get_item_description(self, qnodes: typing.List[str]=None, target_properties:
             find_all_properties = True
         else:
             find_all_properties = False
-        # self._logger.error(str(qnodes))
         properties_list = [[] for _ in range(4)]
         used_p_node_ids = set()
         names = ["labels", "descriptions", "isa_properties", "has_properties"]
@@ -166,8 +168,7 @@ def get_item_description(self, qnodes: typing.List[str]=None, target_properties:
             for each_node in qnodes:
                 cache_res = self.redis_server.get(each_node+str(properties_list))
                 if cache_res is not None:
-                    sentences_cache_dict[each_node] = cache_res
-                    # self._logger.error("{} hit!".format(each_node+str(properties_list)))
+                    sentences_cache_dict[each_node] = cache_res.decode("utf-8")
 
         if len(sentences_cache_dict) > 0:
             qnodes = set(qnodes) - set(sentences_cache_dict.keys())
@@ -267,7 +268,6 @@ def get_item_description(self, qnodes: typing.List[str]=None, target_properties:
             each_sentence = self.attribute_to_sentence(self.candidates[each_node_id], each_node_id)
             self.candidates[each_node_id]["sentence"] = each_sentence
             if self.redis_server is not None:
-                # self._logger.error("Pushed: {}".format(each_node+str(properties_list)))
                 self.redis_server.set(each_node+str(properties_list), each_sentence)
             
         for each_node_id, sentence in sentences_cache_dict.items():
@@ -285,6 +285,7 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
         import pandas as pd # type: ignore
         import numpy as np
         import math
+
         self.property_labels_dict = property_labels_dict
 
         if input_format == "test_format":
@@ -298,7 +299,7 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
             elif "kg_id" in input_df.columns:
                 gt_column_id = "kg_id"
             else:
-                raise ValueError("Can't find ground truth id column! It should either named as `GT_kg_id` or `kg_id`")
+                raise KGTKException("Can't find ground truth id column! It should either named as `GT_kg_id` or `kg_id`")
 
             for _, each in input_df.iterrows():
                 if isinstance(each["candidates"], str):
@@ -318,8 +319,8 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
                 if label == "":
                     self._logger.error("Skip a row with no label given: as {}".format(str(each)))
                     continue
-                # candidates[each['label']] = temp
                 temp.extend(gt_nodes)
+
                 for each_q in temp:
                     self.q_node_to_label[each_q] = label
                     if skip_nodes_set is not None and each_q in skip_nodes_set:
@@ -344,7 +345,7 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
                 # get header
                 headers = f.readline().replace("\n", "").split("\t")
                 if len(headers) < 3:
-                    raise ValueError("No enough columns found on given input file. Only {} columns given but at least 3 needed.".format(len(headers)))
+                    raise KGTKException("No enough columns found on given input file. Only {} columns given but at least 3 needed.".format(len(headers)))
                 elif "node" in headers and "property" in headers and "value" in headers:
                     column_references = {"node": headers.index("node"), 
                                          "property": headers.index("property"),
@@ -355,7 +356,7 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
                                          "value": 2}
                 else:
                     missing_column = set(["node", "property", "value"]) - set(headers)
-                    raise ValueError("Missing column {}".format(missing_column))
+                    raise KGTKException("Missing column {}".format(missing_column))
                 self._logger.debug("column index information: ")
                 self._logger.debug(str(column_references))
                 # read contents
@@ -370,7 +371,7 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
                     if "@" in node_value and node_value[0] != "@":
                         node_value_org = node_value
                         node_value = node_value[:node_value.index("@")]
-                        # print("{} --> {}".format(node_value_org, node_value))
+
                     # remove extra double quote " and single quote '
                     if node_value[0]== '"' and node_value[-1] == '"':
                         node_value = node_value[1:-1]
@@ -397,7 +398,7 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
                         each_node_attributes["has_properties"].append(node_value)
                         
         else:
-            raise ValueError("Unkonwn input format {}".format(input_format))
+            raise KGTKException("Unkonwn input format {}".format(input_format))
 
         self._logger.info("Totally {} Q nodes loaded.".format(len(self.candidates)))
         self.vector_dump_file = "dump_vectors_{}_{}.pkl".format(file_path[:file_path.rfind(".")], self. model_name)
@@ -543,12 +544,6 @@ def plot_result(self, use_cache=True, vector_dump_file=None,
         import time
         from sklearn.manifold import TSNE # type: ignore
 
-        # if vector_dump_file is None:
-        #     vector_dump_file = self.vector_dump_file.replace(".pkl", "_2D.pkl")
-        # if use_cache and os.path.exists(vector_dump_file):
-        #     self._logger.info("Using cached 2D vector file!")
-        #     self.load_vectors(vector_dump_file, "2D")
-        # else:
         self.vectors_map = {k: v for k, v in sorted(self.vectors_map.items(), key=lambda item: item[0], reverse=True)}
         vectors = list(self.vectors_map.values())
         # use tsne to reduce dimension
@@ -560,7 +555,6 @@ def plot_result(self, use_cache=True, vector_dump_file=None,
             self._logger.info("Totally used {} seconds.".format(time.time() - start))
 
         if input_format == "test_format":
-            # # start plot
             gt_indexes = set()
             vector_map_keys = list(self.vectors_map.keys())
             for each_node in self.gt_nodes:
@@ -615,8 +609,6 @@ def evaluate_result(self):
         else:
             points = self.gt_indexes
         for i, each in enumerate(self.vectors_map.keys()):
-            # label = self.q_node_to_label[each]
-            # description = self.qnodes_descriptions.get(each, "")
             if i in points:
                 if centroid is None:
                     centroid = np.array(self.vectors_map[each])
@@ -633,25 +625,13 @@ def evaluate_result(self):
     @staticmethod
     def calculate_distance(a, b):
         if len(a) != len(b):
-            raise ValueError("Vector dimension are different!")
+            raise KGTKException("Vector dimension are different!")
         dist = 0
         for v1, v2 in zip(a,b):
             dist += (v1 - v2) **2
         dist = dist ** 0.5
         return dist
 
-# removed
-# def load_embedding_model_names():
-#     names = []
-#     import os
-#     model_file_path = os.path.join(repr(__file__).replace("'","").replace("/text_embedding.py", ""), "all_embedding_models_names.txt")
-#     if os.path.exists(model_file_path):
-#         with open(model_file_path, "r") as f:
-#             for each_line in f.readlines():
-#                 names.append(each_line.replace("\n", ""))
-#     else:
-#         raise ValueError("Embedding model names list file lost! Please check.")
-#     return names
 
 def load_property_labels_file(input_files: typing.List[str]):
     labels_dict = {}
@@ -663,7 +643,7 @@ def load_property_labels_file(input_files: typing.List[str]):
                 if headers is None:
                     headers = each_line
                     if len(headers) < 2:
-                        raise ValueError("No enough columns found on given input file. Only {} columns given but at least 2 needed.".format(len(headers)))
+                        raise KGTKException("No enough columns found on given input file. Only {} columns given but at least 2 needed.".format(len(headers)))
                     elif "predicate" in headers and "label" in headers:
                         column_references = {"predicate": headers.index("predicate"), 
                                              "label": headers.index("label")}
@@ -672,7 +652,7 @@ def load_property_labels_file(input_files: typing.List[str]):
                                              "label": headers.index("label"),
                                              }
                     else:
-                        raise ValueError("Can't determine which column is label column for label file!")
+                        raise KGTKException("Can't determine which column is label column for label file!")
 
                 else:
                     node_id = each_line[column_references["predicate"]]
@@ -730,18 +710,21 @@ def load_black_list_files(file_path):
 
 
 def main(**kwargs):
-    # setup logger format
-    # console = logging.StreamHandler()
-    # console.setLevel(logging.DEBUG)
-    # formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(name)s %(lineno)d -- %(message)s", '%m-%d %H:%M:%S')
-    # console.setFormatter(formatter)
-    # logging.getLogger('').addHandler(console)
     from kgtk.exceptions import KGTKException
     try:
         import logging
         import os
         import time
         from time import strftime
+        import torch
+        import typing
+        import pandas as pd
+        import string
+        import math
+        import re
+        import argparse
+        import pickle
+
         logging_level = kwargs.get("logging_level", "warning")
         if logging_level == "info":
             logging_level_class = logging.INFO
@@ -753,7 +736,6 @@ def main(**kwargs):
             logging_level_class = logging.ERROR
         else:
             logging_level_class = logging.WARNING
-
         if logging_level != "none":
             logger_path = os.path.join(os.environ.get("HOME"), "kgtk_text_embedding_log_{}.log".format(strftime("%Y-%m-%d-%H-%M")))
             logging.basicConfig(level=logging_level_class,
@@ -763,15 +745,6 @@ def main(**kwargs):
                         filemode='w')
         _logger = logging.getLogger(__name__)
         _logger.warning("Running with logging level {}".format(_logger.getEffectiveLevel()))
-        import torch
-        import typing
-
-        import pandas as pd
-        import string
-        import math
-        import re
-        import argparse
-        import pickle
 
         # get input parameters from kwargs
         output_uri = kwargs.get("output_uri", "")
@@ -797,7 +770,6 @@ def main(**kwargs):
         for each_property, each_input in zip(all_required_properties, all_property_relate_inputs):
             for each in each_input:
                 properties[each] = each_property
-
         
         output_properties = {
             "metatada_properties": kwargs.get("metatada_properties", []),
@@ -809,9 +781,9 @@ def main(**kwargs):
         if isinstance(input_uris, str):
             input_uris = [input_uris]
         if len(all_models_names) == 0:
-            raise ValueError("No embedding vector model name given!")
+            raise KGTKException("No embedding vector model name given!")
         if len(input_uris) == 0:
-            raise ValueError("No input file path given!")
+            raise KGTKException("No input file path given!")
 
         if output_uri == "":
             output_uri = os.getenv("HOME") # os.getcwd()
@@ -925,7 +897,7 @@ def str2bool(v):
     # query server
     parser.add_argument("--query-server", nargs='?',  action='store',
                         default="", dest="query_server",
-                        help="cache host address, default is https://query.wikidata.org/sparql"
+                        help="sparql query endpoint used for test_format input files, default is https://query.wikidata.org/sparql"
                         )
 
 

From a6572bd8b791365d24c20c8ff6b2edd1b6721f8c Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Tue, 28 Apr 2020 13:43:26 -0700
Subject: [PATCH 015/278] a one-time solution for uri validation error rooted
 in rdflib

---
 kgtk/triple_generator.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index ec328c16d..12158ee81 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -7,6 +7,7 @@
 from etk.etk import ETK
 from etk.knowledge_graph import KGSchema
 from etk.wikidata import wiki_namespaces
+import rfc3986
 from etk.wikidata.value import ( 
 Precision,
 Item,
@@ -74,7 +75,7 @@ def set_properties(self, prop_file: str):
             "monolingualtext": MonolingualText,
             "string": StringValue,
             "external-identifier":ExternalIdentifier,
-            "url":URLValue
+            "url":StringValue
         }
         with open(prop_file, "r") as fp:
             props = fp.readlines()
@@ -258,7 +259,10 @@ def generate_normal_triple(
         elif edge_type == ExternalIdentifier:
             object = ExternalIdentifier(node2)
         elif edge_type == URLValue:
-            object = URLValue(node2)
+            if TripleGenerator.is_valid_uri_with_scheme_and_host(node2):
+                object = URLValue(node2)
+            else:
+                return False
         else:
             # treat everything else as stringValue
             object = StringValue(node2)
@@ -294,6 +298,18 @@ def is_invalid_decimal_string(num_string):
                 return True
             return False        
 
+    @staticmethod
+    def is_valid_uri_with_scheme_and_host(uri:str):
+        '''
+        https://github.com/python-hyper/rfc3986/issues/30#issuecomment-461661883
+        '''
+        try:
+            uri = rfc3986.URIReference.from_string(uri)
+            rfc3986.validators.Validator().require_presence_of("scheme", "host").check_validity_of("scheme", "host").validate(uri)
+            return True
+        except :
+            return False
+
     @staticmethod
     def clean_number_string(num):
         from numpy import format_float_positional

From f0250c9bca554175382a52488d057f94c2f8164b Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Tue, 28 Apr 2020 20:50:09 -0700
Subject: [PATCH 016/278] add language detection for label/description/string,
 etc

---
 kgtk/triple_generator.py | 32 ++++++++++++++++++++++++++++----
 requirements.txt         |  1 +
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 12158ee81..d958bf2c8 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -8,6 +8,7 @@
 from etk.knowledge_graph import KGSchema
 from etk.wikidata import wiki_namespaces
 import rfc3986
+from langdetect import detect
 from etk.wikidata.value import ( 
 Precision,
 Item,
@@ -127,7 +128,7 @@ def serialize_prefix(self):
         Relevent issue: https://github.com/RDFLib/rdflib/issues/965
         """
         for k, v in wiki_namespaces.items():
-            line = "@prefix " + k + " " + v + " .\n" 
+            line = "@prefix " + k + ": <" + v + "> .\n" 
             self.fp.write(line)
         self.fp.write("\n")
         self.fp.flush()
@@ -145,16 +146,17 @@ def finalize(self):
     @staticmethod
     def process_text_string(string:str)->[str,str]:
         ''' 
+        detect language
         '''
         if "@" in string:
             res = string.split("@")
             text_string = "@".join(res[:-1]).replace('"', "").replace("'", "")
             lang = res[-1].replace('"','').replace("'","")
             if len(lang) != 2:
-                lang = "en"
+                lang = detect(text_string)
         else:
             text_string = string.replace('"', "").replace("'", "")
-            lang = "en"
+            lang = detect(text_string)
         return [text_string, lang]
 
     def generate_label_triple(self, node1: str, label: str, node2: str) -> bool:
@@ -194,7 +196,18 @@ def generate_normal_triple(
             # https://www.wikidata.org/wiki/Help:Dates
             # ^2013-01-01T00:00:00Z/11
             # ^8000000-00-00T00:00:00Z/3
-            if re.compile("[0-9]{4}").match(node2):
+            if re.compile("[12]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])").match(node2):
+                try:
+                    dateTimeString = node2
+                    object = TimeValue(
+                        value=dateTimeString, #TODO
+                        calendar=Item("Q1985727"),
+                        precision=Precision.year,
+                        time_zone=0,
+                    )
+                except:
+                    return False
+            elif re.compile("[12]\d{3}").match(node2):
                 try:                   
                     dateTimeString = node2 + "-01-01"
                     object = TimeValue(
@@ -205,6 +218,17 @@ def generate_normal_triple(
                     )
                 except:
                     return False
+            elif re.compile("[12]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])").match(node2):
+                try:
+                    dateTimeString = node2
+                    object = TimeValue(
+                        value=dateTimeString, #TODO
+                        calendar=Item("Q1985727"),
+                        precision=Precision.year,
+                        time_zone=0,
+                    )
+                except:
+                    return False
             else:
                 try:
                     dateTimeString, precision = node2[1:].split("/")
diff --git a/requirements.txt b/requirements.txt
index 7cb0d1c7f..36e6ec5bf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,6 +7,7 @@ sh
 sklearn
 SPARQLWrapper
 tqdm
+langdetect
 rdflib==5.0.0
 etk==2.2.1
 simplejson

From 137a9a35a7b14d7546143f8eb452919bc1d66a72 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Tue, 28 Apr 2020 21:20:58 -0700
Subject: [PATCH 017/278] fix empty string language tag issue

---
 kgtk/cli/generate_wikidata_triples.py |  5 +++--
 kgtk/triple_generator.py              | 12 +++++++++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index bafa0cb99..4f0022000 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -127,7 +127,7 @@ def run(
         truthy=truthy
     )
     # process stdin
-    num_line = 0
+    num_line = 1
     if use_gz:
         fp = gzip.open(sys.stdin.buffer, 'rt')
     else:
@@ -136,10 +136,11 @@ def run(
         edge = fp.readline()
         if not edge:
             break
-        if edge.startswith("#") or num_line == 0: # TODO First line omit
+        if edge.startswith("#") or num_line == 1: # TODO First line omit
             num_line += 1
             continue
         else:
+            print(num_line)
             generator.entry_point(num_line, edge)
             num_line += 1
     generator.finalize()
diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index d958bf2c8..087cca112 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -76,7 +76,7 @@ def set_properties(self, prop_file: str):
             "monolingualtext": MonolingualText,
             "string": StringValue,
             "external-identifier":ExternalIdentifier,
-            "url":StringValue
+            "url":URLValue
         }
         with open(prop_file, "r") as fp:
             props = fp.readlines()
@@ -148,12 +148,18 @@ def process_text_string(string:str)->[str,str]:
         ''' 
         detect language
         '''
+        if len(string)==0:
+            return ["","en"]
         if "@" in string:
             res = string.split("@")
             text_string = "@".join(res[:-1]).replace('"', "").replace("'", "")
             lang = res[-1].replace('"','').replace("'","")
-            if len(lang) != 2:
-                lang = detect(text_string)
+            try:
+                detected_lang = detect(text_string) 
+                if detected_lang != lang:
+                    lang = detected_lang
+            except:
+                lang = "en"        
         else:
             text_string = string.replace('"', "").replace("'", "")
             lang = detect(text_string)

From 5ef1e199415eba144b1bbb4f8c255ef267ec5d62 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Wed, 29 Apr 2020 14:25:31 -0700
Subject: [PATCH 018/278] remove language detection in triple generator,
 pre-compile regex

---
 kgtk/cli/generate_wikidata_triples.py |  1 -
 kgtk/triple_generator.py              | 39 +++++++++------------------
 2 files changed, 13 insertions(+), 27 deletions(-)

diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index 4f0022000..4d57637b7 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -140,7 +140,6 @@ def run(
             num_line += 1
             continue
         else:
-            print(num_line)
             generator.entry_point(num_line, edge)
             num_line += 1
     generator.finalize()
diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 087cca112..e947d4e96 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -8,7 +8,6 @@
 from etk.knowledge_graph import KGSchema
 from etk.wikidata import wiki_namespaces
 import rfc3986
-from langdetect import detect
 from etk.wikidata.value import ( 
 Precision,
 Item,
@@ -55,6 +54,9 @@ def __init__(
         self.truthy = truthy        
         self.reset_etk_doc()
         self.serialize_prefix()
+        self.yyyy_mm_dd_pattern = re.compile("[12]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])")
+        self.yyyy_pattern = re.compile("[12]\d{3}")
+        self.quantity_pattern = re.compile("([\+|\-]?[0-9]+\.?[0-9]*)(?:\[([\+|\-]?[0-9]+\.?[0-9]*),([\+|\-]?[0-9]+\.?[0-9]*)\])?([U|Q](?:[0-9]+))?")
     
     def _node_2_entity(self, node:str):
         '''
@@ -63,7 +65,7 @@ def _node_2_entity(self, node:str):
         if node in self.prop_types:
             entity = WDProperty(node, self.prop_types[node])
         else:
-            entity = WDItem(TripleGenerator.replaceIllegalString(node.upper()))
+            entity = WDItem(TripleGenerator.replace_illegal_string(node.upper()))
         return entity
 
 
@@ -146,7 +148,7 @@ def finalize(self):
     @staticmethod
     def process_text_string(string:str)->[str,str]:
         ''' 
-        detect language
+        Language detection is removed from triple generation. The user is responsible for detect the language
         '''
         if len(string)==0:
             return ["","en"]
@@ -154,15 +156,11 @@ def process_text_string(string:str)->[str,str]:
             res = string.split("@")
             text_string = "@".join(res[:-1]).replace('"', "").replace("'", "")
             lang = res[-1].replace('"','').replace("'","")
-            try:
-                detected_lang = detect(text_string) 
-                if detected_lang != lang:
-                    lang = detected_lang
-            except:
-                lang = "en"        
+            if len(lang) > 2:
+                lang ="en"      
         else:
             text_string = string.replace('"', "").replace("'", "")
-            lang = detect(text_string)
+            lang = "en"
         return [text_string, lang]
 
     def generate_label_triple(self, node1: str, label: str, node2: str) -> bool:
@@ -197,12 +195,12 @@ def generate_normal_triple(
         # determine the edge type
         edge_type = self.prop_types[label]
         if edge_type == Item:
-            object = WDItem(TripleGenerator.replaceIllegalString(node2.upper()))
+            object = WDItem(TripleGenerator.replace_illegal_string(node2.upper()))
         elif edge_type == TimeValue:
             # https://www.wikidata.org/wiki/Help:Dates
             # ^2013-01-01T00:00:00Z/11
             # ^8000000-00-00T00:00:00Z/3
-            if re.compile("[12]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])").match(node2):
+            if self.yyyy_mm_dd_pattern.match(node2):
                 try:
                     dateTimeString = node2
                     object = TimeValue(
@@ -213,7 +211,7 @@ def generate_normal_triple(
                     )
                 except:
                     return False
-            elif re.compile("[12]\d{3}").match(node2):
+            elif self.yyyy_pattern.match(node2):
                 try:                   
                     dateTimeString = node2 + "-01-01"
                     object = TimeValue(
@@ -224,17 +222,6 @@ def generate_normal_triple(
                     )
                 except:
                     return False
-            elif re.compile("[12]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])").match(node2):
-                try:
-                    dateTimeString = node2
-                    object = TimeValue(
-                        value=dateTimeString, #TODO
-                        calendar=Item("Q1985727"),
-                        precision=Precision.year,
-                        time_zone=0,
-                    )
-                except:
-                    return False
             else:
                 try:
                     dateTimeString, precision = node2[1:].split("/")
@@ -264,7 +251,7 @@ def generate_normal_triple(
 
         elif edge_type == QuantityValue:
             # +70[+60,+80]Q743895
-            res = re.compile("([\+|\-]?[0-9]+\.?[0-9]*)(?:\[([\+|\-]?[0-9]+\.?[0-9]*),([\+|\-]?[0-9]+\.?[0-9]*)\])?([U|Q](?:[0-9]+))?").match(node2).groups()
+            res = self.quantity_pattern.match(node2).groups()
             amount, lower_bound, upper_bound, unit = res
 
             # Handle extra small numbers for now. TODO
@@ -420,5 +407,5 @@ def entry_point(self, line_number:int , edge: str):
 
     
     @staticmethod
-    def replaceIllegalString(s:str)->str:
+    def replace_illegal_string(s:str)->str:
         return s.replace(":","-")
\ No newline at end of file

From f4bb3925b872f7e6c0694aad9656d6d414ba8550 Mon Sep 17 00:00:00 2001
From: greatyyx <bigyyx@gmail.com>
Date: Wed, 29 Apr 2020 14:48:35 -0700
Subject: [PATCH 019/278] add test case example for dummy

---
 kgtk/tests/__init__.py       |  0
 kgtk/tests/test_cli_dummy.py | 26 ++++++++++++++++++++++++++
 2 files changed, 26 insertions(+)
 create mode 100644 kgtk/tests/__init__.py
 create mode 100644 kgtk/tests/test_cli_dummy.py

diff --git a/kgtk/tests/__init__.py b/kgtk/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/kgtk/tests/test_cli_dummy.py b/kgtk/tests/test_cli_dummy.py
new file mode 100644
index 000000000..bd92fd6a6
--- /dev/null
+++ b/kgtk/tests/test_cli_dummy.py
@@ -0,0 +1,26 @@
+import unittest
+from kgtk.cli_entry import cli_entry
+from kgtk.cli.dummy import run
+from kgtk.exceptions import KGTKException
+
+
+class TestDummy(unittest.TestCase):
+
+    def test_module(self):
+        # test separate module files
+        pass
+
+    def test_run(self):
+        # test run function
+        # exceptions here are not trapped by KGTKExceptionHandler
+        with self.assertRaises(KGTKException):
+            run(name='kgtk', info=None, error=True, _debug=False)
+
+    def test_cli(self):
+        # test command from cli entry
+        assert cli_entry('kgtk', 'dummy', 'normal_test') == 0
+        assert cli_entry('kgtk', 'dummy', 'test_exception', '-e') != 0
+
+
+if __name__ == '__main__':
+    unittest.main()

From 5c56ba30d84d8171bdbeccd57c3c44430eb8d642 Mon Sep 17 00:00:00 2001
From: greatyyx <bigyyx@gmail.com>
Date: Wed, 29 Apr 2020 15:00:10 -0700
Subject: [PATCH 020/278] rename test dummy

---
 kgtk/tests/{test_cli_dummy.py => test_dummy.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename kgtk/tests/{test_cli_dummy.py => test_dummy.py} (100%)

diff --git a/kgtk/tests/test_cli_dummy.py b/kgtk/tests/test_dummy.py
similarity index 100%
rename from kgtk/tests/test_cli_dummy.py
rename to kgtk/tests/test_dummy.py

From 5ec943c7c057425bbc6149978cea858e65d88051 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Wed, 29 Apr 2020 16:02:58 -0700
Subject: [PATCH 021/278] include more bad chars that Blazegraph doesn't accept
 in entity name; removed the upper() processing. It is the user's duty to
 verify that P000_author and P000_AUTHOR are two different properties.
 Processing here may cause trouble if the difference is intended.

---
 kgtk/triple_generator.py | 21 ++++++++++++++-------
 requirements.txt         |  1 -
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index e947d4e96..ae5316abe 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -20,7 +20,8 @@
 URLValue
 )
 
-
+BAD_CHARS = [":", "-", "&", ",", " ",
+             "(", ")", "\'", '\"', "/", "\\", "[", "]", ";"]
 class TripleGenerator:
     """
     A class to maintain the status of the generator
@@ -57,6 +58,7 @@ def __init__(
         self.yyyy_mm_dd_pattern = re.compile("[12]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])")
         self.yyyy_pattern = re.compile("[12]\d{3}")
         self.quantity_pattern = re.compile("([\+|\-]?[0-9]+\.?[0-9]*)(?:\[([\+|\-]?[0-9]+\.?[0-9]*),([\+|\-]?[0-9]+\.?[0-9]*)\])?([U|Q](?:[0-9]+))?")
+
     
     def _node_2_entity(self, node:str):
         '''
@@ -65,7 +67,7 @@ def _node_2_entity(self, node:str):
         if node in self.prop_types:
             entity = WDProperty(node, self.prop_types[node])
         else:
-            entity = WDItem(TripleGenerator.replace_illegal_string(node.upper()))
+            entity = WDItem(TripleGenerator.replace_illegal_string(node()))
         return entity
 
 
@@ -195,7 +197,7 @@ def generate_normal_triple(
         # determine the edge type
         edge_type = self.prop_types[label]
         if edge_type == Item:
-            object = WDItem(TripleGenerator.replace_illegal_string(node2.upper()))
+            object = WDItem(TripleGenerator.replace_illegal_string(node2()))
         elif edge_type == TimeValue:
             # https://www.wikidata.org/wiki/Help:Dates
             # ^2013-01-01T00:00:00Z/11
@@ -288,7 +290,7 @@ def generate_normal_triple(
             # create qualifier edge on previous STATEMENT and return the updated STATEMENT
             if type(object) == WDItem:
                 self.doc.kg.add_subject(object)
-            self.to_append_statement.add_qualifier(label.upper(), object)
+            self.to_append_statement.add_qualifier(label(), object)
             self.doc.kg.add_subject(self.to_append_statement) #TODO maybe can be positioned better for the edge cases.
 
         else:
@@ -297,9 +299,9 @@ def generate_normal_triple(
             if type(object) == WDItem:
                 self.doc.kg.add_subject(object)
             if self.truthy:
-                self.to_append_statement = entity.add_truthy_statement(label.upper(), object) 
+                self.to_append_statement = entity.add_truthy_statement(label(), object) 
             else:
-                self.to_append_statement = entity.add_statement(label.upper(), object) 
+                self.to_append_statement = entity.add_statement(label(), object) 
             self.doc.kg.add_subject(entity)
         return True
     
@@ -408,4 +410,9 @@ def entry_point(self, line_number:int , edge: str):
     
     @staticmethod
     def replace_illegal_string(s:str)->str:
-        return s.replace(":","-")
\ No newline at end of file
+        '''
+        this function serves as the last gate of keeping illegal characters outside of entity creation.
+        '''
+        for char in BAD_CHARS:
+            s = s.replace(char,"_")
+        return s
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 36e6ec5bf..7cb0d1c7f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,7 +7,6 @@ sh
 sklearn
 SPARQLWrapper
 tqdm
-langdetect
 rdflib==5.0.0
 etk==2.2.1
 simplejson

From 3616bf4658a19181cab6dc204d244da6c73b2e3f Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Wed, 29 Apr 2020 16:08:59 -0700
Subject: [PATCH 022/278] fix bug caused by removing upper

---
 kgtk/triple_generator.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index ae5316abe..57a32907b 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -67,7 +67,7 @@ def _node_2_entity(self, node:str):
         if node in self.prop_types:
             entity = WDProperty(node, self.prop_types[node])
         else:
-            entity = WDItem(TripleGenerator.replace_illegal_string(node()))
+            entity = WDItem(TripleGenerator.replace_illegal_string(node))
         return entity
 
 
@@ -197,7 +197,7 @@ def generate_normal_triple(
         # determine the edge type
         edge_type = self.prop_types[label]
         if edge_type == Item:
-            object = WDItem(TripleGenerator.replace_illegal_string(node2()))
+            object = WDItem(TripleGenerator.replace_illegal_string(node2))
         elif edge_type == TimeValue:
             # https://www.wikidata.org/wiki/Help:Dates
             # ^2013-01-01T00:00:00Z/11
@@ -290,7 +290,7 @@ def generate_normal_triple(
             # create qualifier edge on previous STATEMENT and return the updated STATEMENT
             if type(object) == WDItem:
                 self.doc.kg.add_subject(object)
-            self.to_append_statement.add_qualifier(label(), object)
+            self.to_append_statement.add_qualifier(label, object)
             self.doc.kg.add_subject(self.to_append_statement) #TODO maybe can be positioned better for the edge cases.
 
         else:
@@ -299,9 +299,9 @@ def generate_normal_triple(
             if type(object) == WDItem:
                 self.doc.kg.add_subject(object)
             if self.truthy:
-                self.to_append_statement = entity.add_truthy_statement(label(), object) 
+                self.to_append_statement = entity.add_truthy_statement(label, object) 
             else:
-                self.to_append_statement = entity.add_statement(label(), object) 
+                self.to_append_statement = entity.add_statement(label, object) 
             self.doc.kg.add_subject(entity)
         return True
     

From d414f54efc5e03a85516d6e8a315c59509f2d175 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 29 Apr 2020 16:55:43 -0700
Subject: [PATCH 023/278] Add the iso 639 library.

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 439daf31e..42c81a774 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,3 +13,4 @@ etk==2.2.1
 simplejson
 pyrallel.lib
 attrs
+iso-639

From 5a89adbd0c2a41355b4876b5c75d859883987356 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 30 Apr 2020 12:51:26 -0700
Subject: [PATCH 024/278] Add KGTK File data types.  Add boolean values.  Mark
 certain header conplaints as warnings. Add a KGTK data type validator.

---
 kgtk/join/kgtkformat.py |  16 +-
 kgtk/join/kgtkvalue.py  | 372 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 385 insertions(+), 3 deletions(-)
 create mode 100644 kgtk/join/kgtkvalue.py

diff --git a/kgtk/join/kgtkformat.py b/kgtk/join/kgtkformat.py
index 760589d82..69b28dd6e 100644
--- a/kgtk/join/kgtkformat.py
+++ b/kgtk/join/kgtkformat.py
@@ -3,6 +3,7 @@
 
 """
 
+from enum import Enum
 import sys
 import typing
 
@@ -21,6 +22,15 @@ class KgtkFormat:
     # There is only one required column in a node file:
     ID_COLUMN_NAMES: typing.List[str] = ["id", "ID"]
 
+    class DataTypes(Enum):
+        NUMBER = 0
+        STRING = 1
+        STRUCTURED_LITERAL = 2
+        SYMBOL = 3
+
+    TRUE_SYMBOL: str = "True"
+    FALSE_SYMBOL: str = "False"
+
     @classmethod
     def _yelp(cls,
               msg: str,
@@ -96,11 +106,11 @@ def check_column_name(cls,
             if ''.join(column_name.split()) != column_name.strip():
                 results.append("Column name '%s' contains internal white space" % column_name)
         if "," in column_name:
-            results.append("Column name '%s' contains a comma (,)" % column_name)
+            results.append("Warning: Column name '%s' contains a comma (,)" % column_name)
         if "|" in column_name:
-            results.append("Column name '%s' contains a vertical bar (|)" % column_name)
+            results.append("Warning: Column name '%s' contains a vertical bar (|)" % column_name)
         if ";" in column_name:
-            results.append("Column name '%s' contains a semicolon (;)" % column_name)
+            results.append("Warning: Column name '%s' contains a semicolon (;)" % column_name)
         return results
     
 
diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
new file mode 100644
index 000000000..27435b6e7
--- /dev/null
+++ b/kgtk/join/kgtkvalue.py
@@ -0,0 +1,372 @@
+"""
+Constants and helpers for the KGTK file format.
+
+"""
+
+from argparse import ArgumentParser
+import attr
+from iso639 import languages # type: ignore
+import re
+import sys
+import typing
+
+from kgtk.join.kgtkformat import KgtkFormat
+
+@attr.s(slots=True, frozen=False)
+class KgtkValue(KgtkFormat):
+    value: str = attr.ib(validator=attr.validators.instance_of(str))
+
+    
+    split_list_re: typing.Pattern = re.compile(r"(?<!\\)" + "\\" + KgtkFormat.LIST_SEPARATOR)
+
+    # Cache the list of values.
+    values: typing.Optional[typing.List[str]] = None
+
+    def get_list(self)->typing.List[str]:
+        if self.values is None:
+            self.values = KgtkValue.split_list_re.split(self.value)
+        return self.values
+
+    def get_item(self, idx: typing.Optional[int])-> str:
+        if idx is None:
+            return self.value
+        else:
+            return self.get_list()[idx]
+
+    def is_list(self)->bool:
+        return len(self.get_list()) > 1
+
+    def get_values(self)->typing.List['KgtkValue']:
+        """
+        Convert the value into a list of KgtkValues.
+        """
+        if not self.is_list:
+            return [ self ]
+        else:
+            result: typing.List['KgtkValue'] = [ ]
+            v: str
+            for v in self.get_list():
+                result.append(KgtkValue(v))
+            return result
+
+    def is_empty(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the value is empty.
+        """
+        if self.is_list() and idx is None:
+            return False
+        
+        v: str = self.get_item(idx)
+        return len(v) == 0
+
+    def is_number(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the first character is 0-9,_,-,. .
+        """
+        if self.is_list() and idx is None:
+            return False
+        
+        v: str = self.get_item(idx)
+        return v.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", "."))
+    
+    def is_valid_number(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the first character is 0-9,_,-,.
+        and Python can parse it.
+        """
+        if self.is_list() and idx is None:
+            return False
+        
+        v: str = self.get_item(idx)
+        if not v.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", ".")):
+            return False
+        try:
+            i: int = int(v, 0) # The 0 allows prefixes: 0b, 0o, and 0x.
+            return True
+        except ValueError:
+            try:
+                f: float = float(v)
+                return True
+            except ValueError:
+                return False
+        
+    
+    def is_string(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the first character  is '"'.
+        """
+        if self.is_list() and idx is None:
+            return False
+        
+        v: str = self.get_item(idx)
+        return v.startswith('"')
+
+    string_re: typing.Pattern = re.compile(r'^"(?:[^"]|\\.)*"$')
+
+    def is_valid_string(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the first character  is '"',
+        the last character is '"', and the only internal '"' is
+        escaped by backslash.
+        """
+        if self.is_list() and idx is None:
+            return False
+        
+        v: str = self.get_item(idx)
+        if not v.startswith('"'):
+            return False
+        m: typing.Optional[typing.Match] = KgtkValue.string_re.match(v)
+        return m is not None
+
+    def is_structured_literal(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the first character  is ^@'!.
+        """
+        if self.is_list() and idx is None:
+            return False
+        
+        v: str = self.get_item(idx)
+        return v.startswith(("^", "@", "'", "!"))
+
+    def is_symbol(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if not a number, string, nor structured literal.
+        """
+        if self.is_list() and idx is None:
+            return False
+
+        return not (self.is_number(idx) or self.is_string(idx) or self.is_structured_literal(idx))
+
+    def is_boolean(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the value matches one of the special boolean symbols..
+        """
+        if self.is_list() and idx is None:
+            return False
+
+        v: str = self.get_item(idx)
+        return v == KgtkFormat.TRUE_SYMBOL or v == KgtkFormat.FALSE_SYMBOL
+
+    
+    def is_language_qualified_string(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the first character is '
+        """
+        if self.is_list() and idx is None:
+            return False
+
+        v: str = self.get_item(idx)
+        return v.startswith("'")
+
+    language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'(?:[^']|\\.)*')@(?P<lang>[a-zA-Z][a-zA-Z])$")
+
+    def is_valid_language_qualified_string(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the value looks like a language-qualified string.
+        """
+        if self.is_list() and idx is None:
+            return False
+
+        v: str = self.get_item(idx)
+        m: typing.Optional[typing.Match] = KgtkValue.language_qualified_string_re.match(v)
+        if m is None:
+            return False
+
+        # Validate the language code:
+        lang: str = m.group("lang")
+        # print("lang: %s" % lang)
+        try:
+            languages.get(alpha2=lang.lower())
+            return True
+        except KeyError:
+            return False
+
+    def is_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the first character is @
+        """
+        if self.is_list() and idx is None:
+            return False
+
+        v: str = self.get_item(idx)
+        return v.startswith("@")
+
+    location_coordinates_re: typing.Pattern = re.compile(r"^@[-+]?\d{3}\.\d{5}/[-+]?\d{3}\.\d{5}$")
+
+    def is_valid_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the value looks like valid location coordinates.
+        """
+        if self.is_list() and idx is None:
+            return False
+
+        v: str = self.get_item(idx)
+        m: typing.Optional[typing.Match] = KgtkValue.location_coordinates_re.match(v)
+        return m is not None
+
+    def is_date_and_times(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the first character is ^
+        """
+        if self.is_list() and idx is None:
+            return False
+
+        v: str = self.get_item(idx)
+        return v.startswith("^")
+
+    date_and_times_re: typing.Pattern = re.compile(r"^\^(?P<year>[0-9]{4})(?P<hyphen>-)?(?P<month>1[0-2]|0[1-9])(?(hyphen)-)(?P<day>3[01]|0[1-9]|[12][0-9])T(?P<hour>2[0-3]|[01][0-9])(?(hyphen):)(?P<minute>[0-5][0-9])(?(hyphen):)(?P<second>[0-5][0-9])(?P<zone>Z|\+[0-9][0-9](?::[0-9][0-9])?)?(?P<precision>/[0-9])?$")
+
+    def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the value looks like valid date and times
+        literal based on ISO-8601.
+        """
+        if self.is_list() and idx is None:
+            return False
+
+        v: str = self.get_item(idx)
+        m: typing.Optional[typing.Match] = KgtkValue.date_and_times_re.match(v)
+        return m is not None
+
+    def is_extension(self,  idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the first character is !
+        """
+        if self.is_list() and idx is None:
+            return False
+
+        v: str = self.get_item(idx)
+        return v.startswith("!")
+
+        
+    def is_valid_literal(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the value looks like a valid literal.
+        """
+        if self.is_list() and idx is None:
+            return False
+
+        if self.is_string(idx):
+            return self.is_valid_string(idx)
+        elif self.is_number(idx):
+            return self.is_valid_number(idx)
+        elif self.is_structured_literal(idx):
+            if self.is_language_qualified_string(idx):
+                return self.is_valid_language_qualified_string(idx)
+            elif self.is_location_coordinates(idx):
+                return self.is_valid_location_coordinates(idx)
+            elif self.is_date_and_times(idx):
+                return self.is_valid_date_and_times(idx)
+            elif self.is_extension(idx):
+                return False # no validation presently available.
+            else:
+                return False # Quantities will reach here at present.
+        else:
+            return False
+
+    def is_valid_item(self, idx: typing.Optional[int] = None)->bool:
+        if self.is_list() and idx is None:
+            return False
+
+        if self.is_empty(idx):
+            return True
+        elif self.is_valid_literal(idx):
+            return True
+        else:
+            return self.is_symbol(idx) # Should always be True
+
+    def is_valid(self)->bool:
+        """
+        Is this a valid KGTK cell value?  If the value is a list, are all the
+        components valid?
+        """        
+        result: bool = True
+        kv: KgtkValue
+        for kv in self.get_values():
+            result = result and kv.is_valid_item()
+        return result
+
+    def describe(self, idx: typing.Optional[int] = None)->str:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return a string that descrubes the value.
+        """
+        if self.is_list() and idx is None:
+            result: str = ""
+            kv: KgtkValue
+            first: bool = True
+            for kv in self.get_values():
+                if first:
+                    first = not first
+                else:
+                    result += KgtkFormat.LIST_SEPARATOR
+                result += kv.describe()
+            return result
+
+        if self.is_empty(idx):
+            return "Empty"
+        elif self.is_string(idx):
+            if self.is_valid_string(idx):
+                return "String"
+            else:
+                return "Invalid String"
+        elif self.is_number(idx):
+            if self.is_valid_number(idx):
+                return "Number"
+            else:
+                return "Invalid Number"
+        elif self.is_structured_literal(idx):
+            if self.is_language_qualified_string(idx):
+                if self.is_valid_language_qualified_string(idx):
+                    return "Language Qualified String"
+                else:
+                    return "Invalid Language Qualified String"
+            elif self.is_location_coordinates(idx):
+                if self.is_valid_location_coordinates(idx):
+                    return "Location Coordinates"
+                else:
+                    return "Invalid Location Coordinates"
+            elif self.is_date_and_times(idx):
+                if self.is_valid_date_and_times(idx):
+                    return "Date and Times"
+                else:
+                    return "Invalid Date and Times"
+            elif self.is_extension(idx):
+                return "Extension (unvalidated)"
+            else:
+                return "Invalid Structured Literal"
+        else:
+            return "Symbol"
+
+def main():
+    """
+    Test the KGTK value vparser.
+    """
+    parser = ArgumentParser()
+    parser.add_argument(dest="values", help="The values(s) to test", type=str, nargs="+")
+    parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
+    parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
+    args = parser.parse_args()
+
+    value: str
+    for value in args.values:
+        print("%s: %s" % (value, KgtkValue(value).describe()))
+
+if __name__ == "__main__":
+    main()

From b2574dbd463ec5ed4b8a954be6993c37c5bf2b53 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 30 Apr 2020 12:52:56 -0700
Subject: [PATCH 025/278] Fix a comment.

---
 kgtk/join/kgtkvalue.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 27435b6e7..34023a4ba 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -304,8 +304,7 @@ def is_valid(self)->bool:
 
     def describe(self, idx: typing.Optional[int] = None)->str:
         """
-        Return False if this value is a list and idx is None.
-        Otherwise, return a string that descrubes the value.
+        Return a string that describes the value.
         """
         if self.is_list() and idx is None:
             result: str = ""

From 7e6e21b85e93ce9c6636a0efbcfc9d60134c1030 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 30 Apr 2020 13:15:13 -0700
Subject: [PATCH 026/278] Document the number and string formats.  Validate the
 coordinates better.

---
 kgtk/join/kgtkvalue.py | 59 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 5 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 34023a4ba..c9369efd6 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -1,5 +1,7 @@
 """
-Constants and helpers for the KGTK file format.
+Validate KGTK File data types.
+
+Dimensioned quantities are not supported.
 
 """
 
@@ -76,6 +78,20 @@ def is_valid_number(self, idx: typing.Optional[int] = None)->bool:
         Return False if this value is a list and idx is None.
         Otherwise, return True if the first character is 0-9,_,-,.
         and Python can parse it.
+
+        Examples:
+        1
+        123
+        -123
+        +123
+        0b101
+        0o277
+        0x24F
+        .4
+        0.4
+        10.
+        10.4
+        10.4e10
         """
         if self.is_list() and idx is None:
             return False
@@ -98,6 +114,11 @@ def is_string(self, idx: typing.Optional[int] = None)->bool:
         """
         Return False if this value is a list and idx is None.
         Otherwise, return True if the first character  is '"'.
+
+        Strings begin and end with double quote (").  Any internal double
+        quotes must be escaped with backslash (\").  Triple-double quoted
+        strings are not supported by KGTK File Vormat v2.
+
         """
         if self.is_list() and idx is None:
             return False
@@ -111,8 +132,8 @@ def is_valid_string(self, idx: typing.Optional[int] = None)->bool:
         """
         Return False if this value is a list and idx is None.
         Otherwise, return True if the first character  is '"',
-        the last character is '"', and the only internal '"' is
-        escaped by backslash.
+        the last character is '"', and any internal '"' characters are
+        escaped by backslashes.
         """
         if self.is_list() and idx is None:
             return False
@@ -202,19 +223,45 @@ def is_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
         v: str = self.get_item(idx)
         return v.startswith("@")
 
-    location_coordinates_re: typing.Pattern = re.compile(r"^@[-+]?\d{3}\.\d{5}/[-+]?\d{3}\.\d{5}$")
+    location_coordinates_re: typing.Pattern = re.compile(r"^@(?P<lat>[-+]?\d{3}\.\d{5})/(?P<lon>[-+]?\d{3}\.\d{5})$")
 
     def is_valid_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
         """
         Return False if this value is a list and idx is None.
         Otherwise, return True if the value looks like valid location coordinates.
+
+        Note: The coordinates must look exactly like the examples in KGTK
+        File Format v2, excelt for optional +/- characters.
+
+        @043.26193/010.92708
         """
         if self.is_list() and idx is None:
             return False
 
         v: str = self.get_item(idx)
         m: typing.Optional[typing.Match] = KgtkValue.location_coordinates_re.match(v)
-        return m is not None
+        if m is None:
+            return False
+
+        # Latitude runs from -90 to +90
+        latstr: str = m.group("lat")
+        try:
+            lat: float = float(latstr)
+            if  lat < -90. or lat > 90.:
+                return False
+        except ValueError:
+            return False
+
+        # Longitude runs from -180 to +180
+        lonstr: str = m.group("lon")
+        try:
+            lon: float = float(lonstr)
+            if lon < -180. or lon > 180.:
+                return False
+        except ValueError:
+            return False
+
+        return True
 
     def is_date_and_times(self, idx: typing.Optional[int] = None)->bool:
         """
@@ -234,6 +281,8 @@ def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
         Return False if this value is a list and idx is None.
         Otherwise, return True if the value looks like valid date and times
         literal based on ISO-8601.
+
+        TODO: validate the calendar date, eg fail if 31-Apr-2020.
         """
         if self.is_list() and idx is None:
             return False

From 30341662c195184700b0acefc606cad8616cd56c Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Thu, 30 Apr 2020 13:34:09 -0700
Subject: [PATCH 027/278] added line-by-line option and set up a quick test by
 running script directly

---
 kgtk/cli/generate_wikidata_triples.py | 72 +++++++++++++++++++++------
 1 file changed, 56 insertions(+), 16 deletions(-)

diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index 4d57637b7..f3eefd02a 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -97,10 +97,14 @@ def add_arguments(parser):
         help="if set to yes, read from compressed gz file",
         dest="use_gz",
     )
-    # logging level
-    # parser.add_argument('-l', '--logging-level', action='store', dest='logging_level',
-    #         default="info", choices=("error", "warning", "info", "debug"),
-    #         help="set up the logging level, default is INFO level")
+    parser.add_argument(
+        "-lbl",
+        "--line-by-line",
+        action="store",
+        type=str2bool,
+        help="if set to yes, read from standard input line by line, otherwise loads whole file into memory",
+        dest="line_by_line",
+    )
 
 
 def run(
@@ -111,7 +115,8 @@ def run(
     n: int,
     truthy: bool,
     ignore: bool,
-    use_gz: bool
+    use_gz: bool,
+    line_by_line: bool,
 ):
     # import modules locally
     import gzip
@@ -127,19 +132,54 @@ def run(
         truthy=truthy
     )
     # process stdin
-    num_line = 1
     if use_gz:
         fp = gzip.open(sys.stdin.buffer, 'rt')
     else:
         fp = sys.stdin
-    while True:
-        edge = fp.readline()
-        if not edge:
-            break
-        if edge.startswith("#") or num_line == 1: # TODO First line omit
-            num_line += 1
-            continue
-        else:
-            generator.entry_point(num_line, edge)
-            num_line += 1
+    if line_by_line:
+        print("#line-by-line")
+        num_line = 1
+        while True:
+            edge = fp.readline()
+            if not edge:
+                break
+            if edge.startswith("#") or num_line == 1: # TODO First line omit
+                num_line += 1
+                continue
+            else:
+                generator.entry_point(num_line, edge)
+                num_line += 1
+    else:
+        # not line by line
+        print("#not line-by-line")
+        for num, edge in enumerate(fp.readlines()):
+            if edge.startswith("#") or num == 0:
+                continue
+            else:
+                generator.entry_point(num+1,edge)
     generator.finalize()
+
+# testing profiling locally with direct call
+
+if __name__ == "__main__":
+    import gzip
+    from kgtk.triple_generator import TripleGenerator
+    import sys
+    with open("/tmp/gwt.log","w") as dest_fp:
+        generator = TripleGenerator(
+            prop_file="/Users/rongpeng/Documents/ISI/Covid19/covid_data/v1.3/heng_props.tsv",
+            label_set="label",
+            alias_set="aliases",
+            description_set="descriptions",
+            n=10000,
+            ignore=True,
+            truthy=True,
+            dest_fp = dest_fp
+        )   
+        with open("/Users/rongpeng/Documents/ISI/Covid19/covid_data/v1.3/kgtk_sample_sorted.tsv","r") as fp:
+            for num, edge in enumerate(fp.readlines()):
+                if edge.startswith("#") or num == 0:
+                    continue
+                else:
+                    generator.entry_point(num+1,edge)
+            generator.finalize() 
\ No newline at end of file

From 6f10ba6d3952a5bd3d8a65ca87d493b0d9e517fd Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Thu, 30 Apr 2020 13:35:56 -0700
Subject: [PATCH 028/278] added comments about visualization

---
 kgtk/cli/generate_wikidata_triples.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index f3eefd02a..c87495434 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -160,7 +160,9 @@ def run(
     generator.finalize()
 
 # testing profiling locally with direct call
-
+# pip3 install snakeviz
+# run `snakeviz /tmp/tmp.dat` to visualize the call stacks.
+# python3 -m cProfile -o /tmp/tmp.dat  generate_wikidata_triples.py
 if __name__ == "__main__":
     import gzip
     from kgtk.triple_generator import TripleGenerator

From 0995ea077451843af924b4bbb9fb04c480b33401 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 30 Apr 2020 13:55:48 -0700
Subject: [PATCH 029/278] Add a check for invalid KGTK values.

---
 kgtk/cli/validate.py    | 12 ++++++++++++
 kgtk/join/edgereader.py |  7 +++++--
 kgtk/join/kgtkformat.py |  5 ++++-
 kgtk/join/kgtkreader.py | 35 +++++++++++++++++++++++++++++++++--
 kgtk/join/nodereader.py |  7 +++++--
 5 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index ae480b223..13c91484f 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -90,6 +90,10 @@ def add_arguments(parser):
     parser.add_argument(      "--header-only", dest="header_only",
                               help="Process the only the header of the input file.", action="store_true")
 
+    parser.add_argument(      "--invalid-value-action", dest="invalid_value_action",
+                              help="The action to take when a data cell value is invalid.",
+                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
+
     parser.add_argument(      "--long-line-action", dest="long_line_action",
                               help="The action to take when a long line is detected.",
                               type=ValidationAction, action=EnumNameAction, default=ValidationAction.COMPLAIN)
@@ -106,6 +110,10 @@ def add_arguments(parser):
     parser.add_argument(      "--truncate-long-lines", dest="truncate_long_lines",
                               help="Remove excess trailing columns in long lines.", action='store_true')
 
+    parser.add_argument(      "--unsafe-column-name-action", dest="unsafe_column_name_action",
+                              help="The action to take when a column name is unsafe.",
+                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
+
     parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
     
     parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
@@ -132,7 +140,9 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
         blank_id_line_action: typing.Optional[ValidationAction] = None,
         short_line_action: ValidationAction = ValidationAction.COMPLAIN,
         long_line_action: ValidationAction = ValidationAction.COMPLAIN,
+        invalid_value_action: ValidationAction = ValidationAction.REPORT,
         header_error_action: ValidationAction = ValidationAction.EXIT,
+        unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
         compression_type: typing.Optional[str] = None,
         gzip_in_parallel: bool = False,
         gzip_queue_size: int = KgtkReader.GZIP_QUEUE_SIZE_DEFAULT,
@@ -177,7 +187,9 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
                                                  blank_id_line_action=blank_id_line_action,
                                                  short_line_action=short_line_action,
                                                  long_line_action=long_line_action,
+                                                 invalid_value_action=invalid_value_action,
                                                  header_error_action=header_error_action,
+                                                 unsafe_column_name_action=unsafe_column_name_action,
                                                  compression_type=compression_type,
                                                  gzip_in_parallel=gzip_in_parallel,
                                                  gzip_queue_size=gzip_queue_size,
diff --git a/kgtk/join/edgereader.py b/kgtk/join/edgereader.py
index 77d654e62..b5ff87ff5 100644
--- a/kgtk/join/edgereader.py
+++ b/kgtk/join/edgereader.py
@@ -34,6 +34,7 @@ def open_edge_file(cls,
                        blank_node2_line_action: ValidationAction = ValidationAction.EXCLUDE,
                        short_line_action: ValidationAction = ValidationAction.EXCLUDE,
                        long_line_action: ValidationAction = ValidationAction.EXCLUDE,
+                       invalid_value_action: ValidationAction = ValidationAction.REPORT,
                        header_error_action: ValidationAction = ValidationAction.EXIT,
                        unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
                        compression_type: typing.Optional[str] = None,
@@ -104,6 +105,7 @@ def open_edge_file(cls,
                    blank_node2_line_action=blank_node2_line_action,
                    short_line_action=short_line_action,
                    long_line_action=long_line_action,
+                   invalid_value_action=invalid_value_action,
                    header_error_action=header_error_action,
                    unsafe_column_name_action=unsafe_column_name_action,
                    compression_type=compression_type,
@@ -115,7 +117,7 @@ def open_edge_file(cls,
                    very_verbose=very_verbose,
         )
 
-    def _ignore_if_blank_fields(self, values: typing.List[str], line: str):
+    def _ignore_if_blank_fields(self, values: typing.List[str], line: str)->bool:
         # Ignore line_action with blank node1 fields.  This code comes after
         # filling missing trailing columns, although it could be reworked
         # to come first.
@@ -131,7 +133,7 @@ def _ignore_if_blank_fields(self, values: typing.List[str], line: str):
                 return self.exclude_line(self.blank_node2_line_action, "node2 is blank", line)
         return False # Do not ignore this line
 
-    def _skip_reserved_fields(self, column_name):
+    def _skip_reserved_fields(self, column_name)->bool:
         if self.node1_column_idx >= 0 and column_name in self.NODE1_COLUMN_NAMES:
             return True
         if self.node2_column_idx >= 0 and column_name in self.NODE2_COLUMN_NAMES:
@@ -176,6 +178,7 @@ def main():
                                      blank_node2_line_action=args.blank_node2_line_action,
                                      short_line_action=args.short_line_action,
                                      long_line_action=args.long_line_action,
+                                     invalid_value_action=args.invalid_value_action,
                                      header_error_action=args.header_error_action,
                                      unsafe_column_name_action=args.unsafe_column_name_action,
                                      compression_type=args.compression_type,
diff --git a/kgtk/join/kgtkformat.py b/kgtk/join/kgtkformat.py
index 69b28dd6e..9ab4612fd 100644
--- a/kgtk/join/kgtkformat.py
+++ b/kgtk/join/kgtkformat.py
@@ -8,6 +8,7 @@
 import typing
 
 from kgtk.join.validationaction import ValidationAction
+from kgtk.join.kgtkvalue import KgtkValue
 
 class KgtkFormat:
     COLUMN_SEPARATOR: str = "\t"
@@ -111,8 +112,10 @@ def check_column_name(cls,
             results.append("Warning: Column name '%s' contains a vertical bar (|)" % column_name)
         if ";" in column_name:
             results.append("Warning: Column name '%s' contains a semicolon (;)" % column_name)
+        kv: KgtkValue = KgtkValue(column_name)
+        if not kv.is_valid():
+            results.append(kv.describe())
         return results
-    
 
     @classmethod
     def check_column_names(cls,
diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index a040891cd..710703ded 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -20,6 +20,7 @@
 from kgtk.join.enumnameaction import EnumNameAction
 from kgtk.join.gzipprocess import GunzipProcess
 from kgtk.join.kgtkformat import KgtkFormat
+from kgtk.join.kgtkvalue import KgtkValue
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=False)
@@ -79,6 +80,9 @@ class KgtkReader(KgtkFormat, ClosableIter[typing.List[str]]):
     header_error_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXIT)
     unsafe_column_name_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.REPORT)
 
+    # Validate data cell values?
+    invalid_value_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.REPORT)
+
     # Repair records with too many or too few fields?
     fill_short_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     truncate_long_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
@@ -122,6 +126,7 @@ def open(cls,
              blank_id_line_action: typing.Optional[ValidationAction] = None,
              short_line_action: ValidationAction = ValidationAction.EXCLUDE,
              long_line_action: ValidationAction = ValidationAction.EXCLUDE,
+             invalid_value_action: ValidationAction = ValidationAction.REPORT,
              header_error_action: ValidationAction = ValidationAction.EXIT,
              unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
              compression_type: typing.Optional[str] = None,
@@ -238,6 +243,7 @@ def open(cls,
                               blank_id_line_action=blank_id_line_action,
                               short_line_action=short_line_action,
                               long_line_action=long_line_action,
+                              invalid_value_action=invalid_value_action,
                               header_error_action=header_error_action,
                               unsafe_column_name_action=unsafe_column_name_action,
                               compression_type=compression_type,
@@ -291,6 +297,7 @@ def open(cls,
                               blank_id_line_action=blank_id_line_action,
                               short_line_action=short_line_action,
                               long_line_action=long_line_action,
+                              invalid_value_action=invalid_value_action,
                               header_error_action=header_error_action,
                               unsafe_column_name_action=unsafe_column_name_action,
                               compression_type=compression_type,
@@ -330,6 +337,7 @@ def open(cls,
                        blank_id_line_action=blank_id_line_action,
                        short_line_action=short_line_action,
                        long_line_action=long_line_action,
+                       invalid_value_action=invalid_value_action,
                        header_error_action=header_error_action,
                        unsafe_column_name_action=unsafe_column_name_action,
                        compression_type=compression_type,
@@ -549,6 +557,10 @@ def __next__(self)-> typing.List[str]:
             if self._ignore_if_blank_fields(values, line):
                 continue
 
+            if self.invalid_value_action != ValidationAction.PASS:
+                if self._ignore_invalid_values(values, line):
+                    continue
+
             self.data_lines_passed += 1
             if self.very_verbose:
                 sys.stdout.write(".")
@@ -556,12 +568,26 @@ def __next__(self)-> typing.List[str]:
             
             return values
 
+    def _ignore_invalid_values(self, values: typing.List[str], line: str)->bool:
+        value: str
+        idx: int = 0
+        problems: typing.List[str] = [ ]
+        for value in values:
+            kv: KgtkValue = KgtkValue(value)
+            if not kv.is_valid():
+                problems.append("%s: %s" % (self.column_names[idx], kv.describe()))
+        if len(problems) > 0 and self.exclude_line(self.invalid_value_action,
+                                                   "; ".join(problems),
+                                                   line):
+            return True
+        return False
+
     # May be overridden
-    def _ignore_if_blank_fields(self, values: typing.List[str], line: str):
+    def _ignore_if_blank_fields(self, values: typing.List[str], line: str)->bool:
         return False
 
     # May be overridden
-    def _skip_reserved_fields(self, column_name):
+    def _skip_reserved_fields(self, column_name)->bool:
         return False
 
     def additional_column_names(self)->typing.List[str]:
@@ -642,6 +668,10 @@ def add_shared_arguments(cls, parser: ArgumentParser):
                                   help="The action to take when a header error is detected  Only ERROR or EXIT are supported.",
                                   type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXIT)
 
+        parser.add_argument(      "--invalid-value-action", dest="invalid_value_action",
+                                  help="The action to take when a data cell value is invalid.",
+                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
+
         parser.add_argument(      "--long-line-action", dest="long_line_action",
                                   help="The action to take when a long line is detected.",
                                   type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
@@ -709,6 +739,7 @@ def main():
                                      blank_id_line_action=args.blank_id_line_action,
                                      short_line_action=args.short_line_action,
                                      long_line_action=args.long_line_action,
+                                     invalid_value_action=args.invalid_value_action,
                                      header_error_action=args.header_error_action,
                                      unsafe_column_name_action=args.unsafe_column_name_action,
                                      compression_type=args.compression_type,
diff --git a/kgtk/join/nodereader.py b/kgtk/join/nodereader.py
index 327150fb1..a6fdcae07 100644
--- a/kgtk/join/nodereader.py
+++ b/kgtk/join/nodereader.py
@@ -33,6 +33,7 @@ def open_node_file(cls,
                        blank_id_line_action: ValidationAction = ValidationAction.EXCLUDE,
                        short_line_action: ValidationAction = ValidationAction.EXCLUDE,
                        long_line_action: ValidationAction = ValidationAction.EXCLUDE,
+                       invalid_value_action: ValidationAction = ValidationAction.REPORT,
                        header_error_action: ValidationAction = ValidationAction.EXIT,
                        unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
                        compression_type: typing.Optional[str] = None,
@@ -95,6 +96,7 @@ def open_node_file(cls,
                    blank_id_line_action=blank_id_line_action,
                    short_line_action=short_line_action,
                    long_line_action=long_line_action,
+                   invalid_value_action=invalid_value_action,
                    header_error_action=header_error_action,
                    unsafe_column_name_action=unsafe_column_name_action,
                    compression_type=compression_type,
@@ -106,7 +108,7 @@ def open_node_file(cls,
                    very_verbose=very_verbose,
         )
 
-    def _ignore_if_blank_fields(self, values: typing.List[str], line: str):
+    def _ignore_if_blank_fields(self, values: typing.List[str], line: str)->bool:
         # Ignore line_action with blank id fields.  This code comes after
         # filling missing trailing columns, although it could be reworked
         # to come first.
@@ -116,7 +118,7 @@ def _ignore_if_blank_fields(self, values: typing.List[str], line: str):
                 return self.exclude_line(self.blank_id_line_action, "id is blank", line)
         return False # Do not ignore this line
 
-    def _skip_reserved_fields(self, column_name):
+    def _skip_reserved_fields(self, column_name)->bool:
         if self.id_column_idx >= 0 and column_name in self.ID_COLUMN_NAMES:
             return True
         return False
@@ -153,6 +155,7 @@ def main():
                                      blank_id_line_action=args.blank_id_line_action,
                                      short_line_action=args.short_line_action,
                                      long_line_action=args.long_line_action,
+                                     invalid_value_action=args.invalid_value_action,
                                      header_error_action=args.header_error_action,
                                      unsafe_column_name_action=args.unsafe_column_name_action,
                                      compression_type=args.compression_type,

From 4570aa94b4c45b28ed79666bf85ff6c23b80cba5 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 30 Apr 2020 14:02:55 -0700
Subject: [PATCH 030/278] Split kgtkformat.py into kgtkformat.py and
 kgtkbase.py to avoid a circular import.

---
 kgtk/join/kgtkbase.py | 204 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 204 insertions(+)
 create mode 100644 kgtk/join/kgtkbase.py

diff --git a/kgtk/join/kgtkbase.py b/kgtk/join/kgtkbase.py
new file mode 100644
index 000000000..9aaab4e11
--- /dev/null
+++ b/kgtk/join/kgtkbase.py
@@ -0,0 +1,204 @@
+"""
+Constants and helpers for the KGTK file format.
+
+"""
+
+from enum import Enum
+import sys
+import typing
+
+from kgtk.join.validationaction import ValidationAction
+from kgtk.join.kgtkformat import KgtkFormat
+from kgtk.join.kgtkvalue import KgtkValue
+
+class KgtkBase(KgtkFormat):
+    @classmethod
+    def _yelp(cls,
+              msg: str,
+              header_line: str,
+              error_action: ValidationAction,
+              error_file: typing.TextIO = sys.stderr)->bool:
+        """
+        Take a validation action.  Only ERROR is special, all other values are treated as EXIT.
+        """
+        result: bool
+        if error_action == ValidationAction.ERROR:
+            # Immediately raise an exception.
+            raise ValueError("In input header'%s': %s" % (header_line, msg))
+
+        if (error_action in [ValidationAction.REPORT, ValidationAction.COMPLAIN, ValidationAction.EXIT ]):
+            print("In input header '%s': %s" % (header_line, msg), file=error_file)
+        if error_action == ValidationAction.EXIT:
+            sys.exit(1)
+        return error_action in [ValidationAction.PASS, ValidationAction.REPORT]
+
+    @classmethod
+    def get_column_idx(cls,
+                       name_or_aliases: typing.List[str],
+                       column_name_map: typing.Mapping[str, int],
+                       header_line: str,
+                       error_action: ValidationAction,
+                       error_file: typing.TextIO = sys.stderr,
+                       is_optional: bool = False,
+    )->int:
+        """
+        Get the indices of the required column using one of its allowable names.
+        Return -1 if the column is not found and is optional.
+        """
+        found_column_name: str = ""
+        column_idx: int = -1
+        col_name: str
+        for col_name in name_or_aliases:
+            if col_name in column_name_map:
+                if column_idx >= 0:
+                    cls._yelp("Ambiguous required column names %s and %s" % (found_column_name, col_name),
+                              header_line=header_line, error_action=error_action, error_file=error_file)
+                column_idx = column_name_map[col_name]
+                found_column_name = col_name
+        if column_idx < 0 and not is_optional:
+            # TODO: throw a better exception:
+            cls._yelp("Missing required column: %s" % " | ".join(name_or_aliases),
+                      header_line=header_line, error_action=error_action, error_file=error_file)
+        return column_idx
+
+    @classmethod
+    def check_column_name(cls,
+                          column_name: str,
+                          header_line: str,
+                          error_action: ValidationAction,
+                          error_file: typing.TextIO = sys.stderr)->typing.List[str]:
+        # Returns a list of complaints.
+        # Check for valid column names.
+        # 1) Check for leading white space
+        # 2) Check for trailing white space
+        # 3) Check for internal white space
+        #    1) except inside "" and '' quoted strings
+        # 4) Check for commas
+        # 5) Check for vertical bars
+        # 6) Check for semicolons
+        #
+        # TODO: It might be possible to make some of these checks more efficient.
+        results: typing.List[str] = [ ]
+        if column_name.lstrip() != column_name:
+            results.append("Column name '%s' starts with leading white space" % column_name)
+        if column_name.rstrip() != column_name:
+            results.append("Column name '%s' ends with leading white space" % column_name)
+        if not (column_name.startswith('"') or column_name.startswith("'")):
+            if ''.join(column_name.split()) != column_name.strip():
+                results.append("Column name '%s' contains internal white space" % column_name)
+        if "," in column_name:
+            results.append("Warning: Column name '%s' contains a comma (,)" % column_name)
+        if "|" in column_name:
+            results.append("Warning: Column name '%s' contains a vertical bar (|)" % column_name)
+        if ";" in column_name:
+            results.append("Warning: Column name '%s' contains a semicolon (;)" % column_name)
+        kv: KgtkValue = KgtkValue(column_name)
+        if not kv.is_valid():
+            results.append(kv.describe())
+        return results
+
+    @classmethod
+    def check_column_names(cls,
+                           column_names: typing.List[str],
+                           header_line: str,
+                           error_action: ValidationAction,
+                           error_file: typing.TextIO = sys.stderr)->bool:
+        """
+        Returns True if the column names are OK.
+        """
+        complaints: typing.List[str] = [ ]
+        column_name: str
+        for column_name in column_names:
+            gripes: typing.List[str] = cls.check_column_name(column_name, header_line, error_action, error_file)
+            complaints.extend(gripes)
+        if len(complaints) == 0:
+            return True
+        # take the error action, joining the complaints into a single message.
+        msg = ", ".join(complaints)
+        cls._yelp(msg, header_line=header_line, error_action=error_action, error_file=error_file)
+        return False
+
+    @classmethod
+    def build_column_name_map(cls,
+                              column_names: typing.List[str],
+                              header_line: str,
+                              error_action: ValidationAction,
+                              error_file: typing.TextIO = sys.stderr
+    )->typing.Mapping[str, int]:
+        # Validate the column names and build a map from column name
+        # to column index.
+        column_name_map: typing.MutableMapping[str, int] = { }
+        column_idx: int = 0 # There may be a more pythonic way to do this
+        column_name: str
+        for column_name in column_names:
+            if column_name is None or len(column_name) == 0:
+                cls._yelp("Column %d has an invalid name in the file header" % column_idx,
+                          header_line=header_line, error_action=error_action, error_file=error_file)
+
+            # Ensure that columns names are not duplicated:
+            if column_name in column_name_map:
+               cls._yelp("Column %d (%s) is a duplicate of column %d" % (column_idx, column_name, column_name_map[column_name]),
+                         header_line=header_line, error_action=error_action, error_file=error_file)
+
+            column_name_map[column_name] = column_idx
+            column_idx += 1
+        return column_name_map
+
+    @classmethod
+    def required_edge_columns(cls,
+                              column_name_map: typing.Mapping[str, int],
+                              header_line: str,
+                              error_action: ValidationAction,
+                              error_file: typing.TextIO = sys.stderr
+    )->typing.Tuple[int, int, int]:
+        # Ensure that the three required columns are present:
+        node1_column_idx: int = cls.get_column_idx(cls.NODE1_COLUMN_NAMES, column_name_map,
+                                                   header_line=header_line, error_action=error_action, error_file=error_file)
+
+        node2_column_idx: int = cls.get_column_idx(cls.NODE2_COLUMN_NAMES, column_name_map,
+                                                   header_line=header_line, error_action=error_action, error_file=error_file)
+                                                   
+        label_column_idx: int = cls.get_column_idx(cls.LABEL_COLUMN_NAMES, column_name_map,
+                                                   header_line=header_line, error_action=error_action, error_file=error_file)
+
+        return (node1_column_idx, node2_column_idx, label_column_idx)
+
+    @classmethod
+    def required_node_column(cls,
+                             column_name_map: typing.Mapping[str, int],
+                             header_line: str,
+                             error_action: ValidationAction,
+                             error_file: typing.TextIO = sys.stderr
+    )->int:
+        # Ensure that the required column is present:
+        return cls.get_column_idx(cls.ID_COLUMN_NAMES, column_name_map,
+                                  header_line=header_line, error_action=error_action, error_file=error_file)
+
+    @classmethod
+    def additional_edge_columns(cls, column_names: typing.List[str])->typing.List[str]:
+        """
+        Return a list of column names in this file excluding the required columns.
+        """
+        additional_columns: typing.List[str] = [ ]
+        column_name: str
+        for column_name in column_names:
+            if column_name not in KgtkFormat.NODE1_COLUMN_NAMES and \
+               column_name not in KgtkFormat.NODE2_COLUMN_NAMES and \
+               column_name not in KgtkFormat.LABEL_COLUMN_NAMES:
+                additional_columns.append(column_name)
+        return additional_columns
+
+    @classmethod
+    def additional_node_columns(cls,
+                                column_names: typing.List[str],
+    )->typing.List[str]:
+        """
+        Return a list of column names in this file excluding the required columns.
+        """
+        additional_columns: typing.List[str] = [ ]
+        column_name: str
+        for column_name in column_names:
+            if column_name not in KgtkFormat.ID_COLUMN_NAMES:
+                additional_columns.append(column_name)
+        return additional_columns
+    

From 74b2b0b545e3f535867786a712bb937af64ce976 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 30 Apr 2020 14:06:56 -0700
Subject: [PATCH 031/278] Add missing column index.

---
 kgtk/join/kgtkformat.py | 194 ----------------------------------------
 kgtk/join/kgtkreader.py |   8 +-
 kgtk/join/kgtkwriter.py |   3 +-
 3 files changed, 7 insertions(+), 198 deletions(-)

diff --git a/kgtk/join/kgtkformat.py b/kgtk/join/kgtkformat.py
index 9ab4612fd..6c392fe82 100644
--- a/kgtk/join/kgtkformat.py
+++ b/kgtk/join/kgtkformat.py
@@ -7,9 +7,6 @@
 import sys
 import typing
 
-from kgtk.join.validationaction import ValidationAction
-from kgtk.join.kgtkvalue import KgtkValue
-
 class KgtkFormat:
     COLUMN_SEPARATOR: str = "\t"
     COMMENT_INDICATOR: str = "#"
@@ -31,194 +28,3 @@ class DataTypes(Enum):
 
     TRUE_SYMBOL: str = "True"
     FALSE_SYMBOL: str = "False"
-
-    @classmethod
-    def _yelp(cls,
-              msg: str,
-              header_line: str,
-              error_action: ValidationAction,
-              error_file: typing.TextIO = sys.stderr)->bool:
-        """
-        Take a validation action.  Only ERROR is special, all other values are treated as EXIT.
-        """
-        result: bool
-        if error_action == ValidationAction.ERROR:
-            # Immediately raise an exception.
-            raise ValueError("In input header'%s': %s" % (header_line, msg))
-
-        if (error_action in [ValidationAction.REPORT, ValidationAction.COMPLAIN, ValidationAction.EXIT ]):
-            print("In input header '%s': %s" % (header_line, msg), file=error_file)
-        if error_action == ValidationAction.EXIT:
-            sys.exit(1)
-        return error_action in [ValidationAction.PASS, ValidationAction.REPORT]
-
-    @classmethod
-    def get_column_idx(cls,
-                       name_or_aliases: typing.List[str],
-                       column_name_map: typing.Mapping[str, int],
-                       header_line: str,
-                       error_action: ValidationAction,
-                       error_file: typing.TextIO = sys.stderr,
-                       is_optional: bool = False,
-    )->int:
-        """
-        Get the indices of the required column using one of its allowable names.
-        Return -1 if the column is not found and is optional.
-        """
-        found_column_name: str = ""
-        column_idx: int = -1
-        col_name: str
-        for col_name in name_or_aliases:
-            if col_name in column_name_map:
-                if column_idx >= 0:
-                    cls._yelp("Ambiguous required column names %s and %s" % (found_column_name, col_name),
-                              header_line=header_line, error_action=error_action, error_file=error_file)
-                column_idx = column_name_map[col_name]
-                found_column_name = col_name
-        if column_idx < 0 and not is_optional:
-            # TODO: throw a better exception:
-            cls._yelp("Missing required column: %s" % " | ".join(name_or_aliases),
-                      header_line=header_line, error_action=error_action, error_file=error_file)
-        return column_idx
-
-    @classmethod
-    def check_column_name(cls,
-                          column_name: str,
-                          header_line: str,
-                          error_action: ValidationAction,
-                          error_file: typing.TextIO = sys.stderr)->typing.List[str]:
-        # Returns a list of complaints.
-        # Check for valid column names.
-        # 1) Check for leading white space
-        # 2) Check for trailing white space
-        # 3) Check for internal white space
-        #    1) except inside "" and '' quoted strings
-        # 4) Check for commas
-        # 5) Check for vertical bars
-        # 6) Check for semicolons
-        #
-        # TODO: It might be possible to make some of these checks more efficient.
-        results: typing.List[str] = [ ]
-        if column_name.lstrip() != column_name:
-            results.append("Column name '%s' starts with leading white space" % column_name)
-        if column_name.rstrip() != column_name:
-            results.append("Column name '%s' ends with leading white space" % column_name)
-        if not (column_name.startswith('"') or column_name.startswith("'")):
-            if ''.join(column_name.split()) != column_name.strip():
-                results.append("Column name '%s' contains internal white space" % column_name)
-        if "," in column_name:
-            results.append("Warning: Column name '%s' contains a comma (,)" % column_name)
-        if "|" in column_name:
-            results.append("Warning: Column name '%s' contains a vertical bar (|)" % column_name)
-        if ";" in column_name:
-            results.append("Warning: Column name '%s' contains a semicolon (;)" % column_name)
-        kv: KgtkValue = KgtkValue(column_name)
-        if not kv.is_valid():
-            results.append(kv.describe())
-        return results
-
-    @classmethod
-    def check_column_names(cls,
-                           column_names: typing.List[str],
-                           header_line: str,
-                           error_action: ValidationAction,
-                           error_file: typing.TextIO = sys.stderr)->bool:
-        """
-        Returns True if the column names are OK.
-        """
-        complaints: typing.List[str] = [ ]
-        column_name: str
-        for column_name in column_names:
-            gripes: typing.List[str] = cls.check_column_name(column_name, header_line, error_action, error_file)
-            complaints.extend(gripes)
-        if len(complaints) == 0:
-            return True
-        # take the error action, joining the complaints into a single message.
-        msg = ", ".join(complaints)
-        cls._yelp(msg, header_line=header_line, error_action=error_action, error_file=error_file)
-        return False
-
-    @classmethod
-    def build_column_name_map(cls,
-                              column_names: typing.List[str],
-                              header_line: str,
-                              error_action: ValidationAction,
-                              error_file: typing.TextIO = sys.stderr
-    )->typing.Mapping[str, int]:
-        # Validate the column names and build a map from column name
-        # to column index.
-        column_name_map: typing.MutableMapping[str, int] = { }
-        column_idx: int = 0 # There may be a more pythonic way to do this
-        column_name: str
-        for column_name in column_names:
-            if column_name is None or len(column_name) == 0:
-                cls._yelp("Column %d has an invalid name in the file header" % column_idx,
-                          header_line=header_line, error_action=error_action, error_file=error_file)
-
-            # Ensure that columns names are not duplicated:
-            if column_name in column_name_map:
-               cls._yelp("Column %d (%s) is a duplicate of column %d" % (column_idx, column_name, column_name_map[column_name]),
-                         header_line=header_line, error_action=error_action, error_file=error_file)
-
-            column_name_map[column_name] = column_idx
-            column_idx += 1
-        return column_name_map
-
-    @classmethod
-    def required_edge_columns(cls,
-                              column_name_map: typing.Mapping[str, int],
-                              header_line: str,
-                              error_action: ValidationAction,
-                              error_file: typing.TextIO = sys.stderr
-    )->typing.Tuple[int, int, int]:
-        # Ensure that the three required columns are present:
-        node1_column_idx: int = cls.get_column_idx(cls.NODE1_COLUMN_NAMES, column_name_map,
-                                                   header_line=header_line, error_action=error_action, error_file=error_file)
-
-        node2_column_idx: int = cls.get_column_idx(cls.NODE2_COLUMN_NAMES, column_name_map,
-                                                   header_line=header_line, error_action=error_action, error_file=error_file)
-                                                   
-        label_column_idx: int = cls.get_column_idx(cls.LABEL_COLUMN_NAMES, column_name_map,
-                                                   header_line=header_line, error_action=error_action, error_file=error_file)
-
-        return (node1_column_idx, node2_column_idx, label_column_idx)
-
-    @classmethod
-    def required_node_column(cls,
-                             column_name_map: typing.Mapping[str, int],
-                             header_line: str,
-                             error_action: ValidationAction,
-                             error_file: typing.TextIO = sys.stderr
-    )->int:
-        # Ensure that the required column is present:
-        return cls.get_column_idx(cls.ID_COLUMN_NAMES, column_name_map,
-                                  header_line=header_line, error_action=error_action, error_file=error_file)
-
-    @classmethod
-    def additional_edge_columns(cls, column_names: typing.List[str])->typing.List[str]:
-        """
-        Return a list of column names in this file excluding the required columns.
-        """
-        additional_columns: typing.List[str] = [ ]
-        column_name: str
-        for column_name in column_names:
-            if column_name not in KgtkFormat.NODE1_COLUMN_NAMES and \
-               column_name not in KgtkFormat.NODE2_COLUMN_NAMES and \
-               column_name not in KgtkFormat.LABEL_COLUMN_NAMES:
-                additional_columns.append(column_name)
-        return additional_columns
-
-    @classmethod
-    def additional_node_columns(cls,
-                                column_names: typing.List[str],
-    )->typing.List[str]:
-        """
-        Return a list of column names in this file excluding the required columns.
-        """
-        additional_columns: typing.List[str] = [ ]
-        column_name: str
-        for column_name in column_names:
-            if column_name not in KgtkFormat.ID_COLUMN_NAMES:
-                additional_columns.append(column_name)
-        return additional_columns
-    
diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index 710703ded..0ee76ef03 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -19,12 +19,13 @@
 from kgtk.join.closableiter import ClosableIter, ClosableIterTextIOWrapper
 from kgtk.join.enumnameaction import EnumNameAction
 from kgtk.join.gzipprocess import GunzipProcess
+from kgtk.join.kgtkbase import KgtkBase
 from kgtk.join.kgtkformat import KgtkFormat
 from kgtk.join.kgtkvalue import KgtkValue
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=False)
-class KgtkReader(KgtkFormat, ClosableIter[typing.List[str]]):
+class KgtkReader(KgtkBase, ClosableIter[typing.List[str]]):
     ERROR_LIMIT_DEFAULT: int = 1000
     GZIP_QUEUE_SIZE_DEFAULT: int = GunzipProcess.GZIP_QUEUE_SIZE_DEFAULT
 
@@ -576,6 +577,7 @@ def _ignore_invalid_values(self, values: typing.List[str], line: str)->bool:
             kv: KgtkValue = KgtkValue(value)
             if not kv.is_valid():
                 problems.append("%s: %s" % (self.column_names[idx], kv.describe()))
+            idx += 1
         if len(problems) > 0 and self.exclude_line(self.invalid_value_action,
                                                    "; ".join(problems),
                                                    line):
@@ -592,9 +594,9 @@ def _skip_reserved_fields(self, column_name)->bool:
 
     def additional_column_names(self)->typing.List[str]:
         if self.is_edge_file:
-            return KgtkFormat.additional_edge_columns(self.column_names)
+            return KgtkBase.additional_edge_columns(self.column_names)
         elif self.is_node_file:
-            return KgtkFormat.additional_node_columns(self.column_names)
+            return KgtkBase.additional_node_columns(self.column_names)
         else:
             # TODO: throw a better exception.
             raise ValueError("KgtkReader: Unknown Kgtk file type.")
diff --git a/kgtk/join/kgtkwriter.py b/kgtk/join/kgtkwriter.py
index 20c55ff4b..2aac2f7e1 100644
--- a/kgtk/join/kgtkwriter.py
+++ b/kgtk/join/kgtkwriter.py
@@ -18,11 +18,12 @@
 from kgtk.join.kgtkreader import KgtkReader
 from kgtk.join.enumnameaction import EnumNameAction
 from kgtk.join.gzipprocess import GzipProcess
+from kgtk.join.kgtkbase import KgtkBase
 from kgtk.join.kgtkformat import KgtkFormat
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=False)
-class KgtkWriter(KgtkFormat):
+class KgtkWriter(KgtkBase):
     GZIP_QUEUE_SIZE_DEFAULT: int = GzipProcess.GZIP_QUEUE_SIZE_DEFAULT
 
     file_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))

From c8c5602933429e25289b5b0e8b13042bdff9e9ad Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 30 Apr 2020 14:20:37 -0700
Subject: [PATCH 032/278] Better routing of feedback and error messages.

---
 kgtk/cli/validate.py    | 10 +++++-----
 kgtk/join/edgereader.py |  2 ++
 kgtk/join/kgtkbase.py   |  2 +-
 kgtk/join/kgtkreader.py | 42 ++++++++++++++++++++++-------------------
 kgtk/join/nodereader.py |  2 ++
 5 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 13c91484f..9d97293e9 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -165,11 +165,11 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
         kgtk_file: typing.Optional[Path]
         for kgtk_file in kgtk_files:
             if verbose:
-                print("\n====================================================")
+                print("\n====================================================", flush=True)
                 if kgtk_file is not None:
-                    print("Validating '%s'" % str(kgtk_file), file=error_file)
+                    print("Validating '%s'" % str(kgtk_file), file=error_file, flush=True)
                 else:
-                    print ("Validating from stdin", file=error_file)
+                    print ("Validating from stdin", file=error_file, flush=True)
 
                 kr: KgtkReader = KgtkReader.open(kgtk_file,
                                                  force_column_names=force_column_names,
@@ -200,14 +200,14 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
                 if header_only:
                     kr.close()
                     if verbose:
-                        print("Validated the header only.", file=error_file)
+                        print("Validated the header only.", file=error_file, flush=True)
                 else:
                     line_count: int = 0
                     row: typing.List[str]
                     for row in kr:
                         line_count += 1
                     if verbose:
-                        print("Validated %d data lines" % line_count, file=error_file)
+                        print("Validated %d data lines" % line_count, file=error_file, flush=True)
         return 0
 
     except SystemExit as e:
diff --git a/kgtk/join/edgereader.py b/kgtk/join/edgereader.py
index b5ff87ff5..b15c464dc 100644
--- a/kgtk/join/edgereader.py
+++ b/kgtk/join/edgereader.py
@@ -48,6 +48,7 @@ def open_edge_file(cls,
                                                   compression_type=compression_type,
                                                   gzip_in_parallel=gzip_in_parallel,
                                                   gzip_queue_size=gzip_queue_size,
+                                                  error_file=error_file,
                                                   verbose=verbose)
 
         # Read the edge file header and split it into column names.
@@ -57,6 +58,7 @@ def open_edge_file(cls,
                                                          force_column_names=force_column_names,
                                                          skip_first_record=skip_first_record,
                                                          column_separator=column_separator,
+                                                         error_file=error_file,
                                                          verbose=verbose)
 
         # Check for unsafe column names.
diff --git a/kgtk/join/kgtkbase.py b/kgtk/join/kgtkbase.py
index 9aaab4e11..7f6ed4afd 100644
--- a/kgtk/join/kgtkbase.py
+++ b/kgtk/join/kgtkbase.py
@@ -27,7 +27,7 @@ def _yelp(cls,
             raise ValueError("In input header'%s': %s" % (header_line, msg))
 
         if (error_action in [ValidationAction.REPORT, ValidationAction.COMPLAIN, ValidationAction.EXIT ]):
-            print("In input header '%s': %s" % (header_line, msg), file=error_file)
+            print("In input header '%s': %s" % (header_line, msg), file=error_file, flush=True)
         if error_action == ValidationAction.EXIT:
             sys.exit(1)
         return error_action in [ValidationAction.PASS, ValidationAction.REPORT]
diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index 0ee76ef03..54dfed634 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -144,6 +144,7 @@ def open(cls,
                                                   compression_type=compression_type,
                                                   gzip_in_parallel=gzip_in_parallel,
                                                   gzip_queue_size=gzip_queue_size,
+                                                  error_file=error_file,
                                                   verbose=verbose)
 
         # Read the kgtk file header and split it into column names.  We get the
@@ -154,6 +155,7 @@ def open(cls,
                                                          force_column_names=force_column_names,
                                                          skip_first_record=skip_first_record,
                                                          column_separator=column_separator,
+                                                         error_file=error_file,
                                                          verbose=verbose)
         # Check for unsafe column names.
         cls.check_column_names(column_names,
@@ -182,12 +184,12 @@ def open(cls,
                 is_edge_file = True
                 is_node_file = False
                 if verbose:
-                    print("%s column found, this is a KGTK edge file" % column_names[node1_idx], file=error_file)
+                    print("%s column found, this is a KGTK edge file" % column_names[node1_idx], file=error_file, flush=True)
             else:
                 is_edge_file = False
                 is_node_file = True
                 if verbose:
-                    print("node1 column not found, assuming this is a KGTK node file", file=error_file)
+                    print("node1 column not found, assuming this is a KGTK node file", file=error_file, flush=True)
 
         elif mode is KgtkReader.Mode.EDGE:
             is_edge_file = True
@@ -211,7 +213,7 @@ def open(cls,
                                                                                                error_file=error_file)
 
             if verbose:
-                print("KgtkReader: Reading an edge file. node1=%d label=%d node2=%d" % (node1_column_idx, label_column_idx, node2_column_idx), file=error_file)
+                print("KgtkReader: Reading an edge file. node1=%d label=%d node2=%d" % (node1_column_idx, label_column_idx, node2_column_idx), file=error_file, flush=True)
 
             # Apply the proper defaults to the blank node1, node2, and id actions:
             if blank_node1_line_action is None:
@@ -267,7 +269,7 @@ def open(cls,
                                                           error_file=error_file)
 
             if verbose:
-                print("KgtkReader: Reading an node file. id=%d" % (id_column_idx), file=error_file)
+                print("KgtkReader: Reading an node file. id=%d" % (id_column_idx), file=error_file, flush=True)
 
             # Apply the proper defaults to the blank node1, node2, and id actions:
             if blank_node1_line_action is None:
@@ -356,27 +358,28 @@ def _open_compressed_file(cls,
                               file_name: str,
                               file_or_path: typing.Union[Path, typing.TextIO],
                               who: str,
+                              error_file: typing.TextIO,
                               verbose: bool)->typing.TextIO:
         
         # TODO: find a better way to coerce typing.IO[Any] to typing.TextIO
         if compression_type in [".gz", "gz"]:
             if verbose:
-                print("%s: reading gzip %s" % (who, file_name))
+                print("%s: reading gzip %s" % (who, file_name), file=error_file, flush=True)
             return gzip.open(file_or_path, mode="rt") # type: ignore
         
         elif compression_type in [".bz2", "bz2"]:
             if verbose:
-                print("%s: reading bz2 %s" % (who, file_name))
+                print("%s: reading bz2 %s" % (who, file_name), file=error_file, flush=True)
             return bz2.open(file_or_path, mode="rt") # type: ignore
         
         elif compression_type in [".xz", "xz"]:
             if verbose:
-                print("%s: reading lzma %s" % (who, file_name))
+                print("%s: reading lzma %s" % (who, file_name), file=error_file, flush=True)
             return lzma.open(file_or_path, mode="rt") # type: ignore
         
         elif compression_type in [".lz4", "lz4"]:
             if verbose:
-                print("%s: reading lz4 %s" % (who, file_name))
+                print("%s: reading lz4 %s" % (who, file_name), file=error_file, flush=True)
             return lz4.frame.open(file_or_path, mode="rt") # type: ignore
         else:
             # TODO: throw a better exception.
@@ -387,24 +390,25 @@ def _openfile(cls, file_path: typing.Optional[Path],
                   compression_type: typing.Optional[str],
                   gzip_in_parallel: bool,
                   gzip_queue_size: int,
+                  error_file: typing.TextIO,
                   verbose: bool)->ClosableIter[str]:
         who: str = cls.__name__
         if file_path is None or str(file_path) == "-":
             if compression_type is not None and len(compression_type) > 0:
-                return ClosableIterTextIOWrapper(cls._open_compressed_file(compression_type, "-", sys.stdin, who, verbose))
+                return ClosableIterTextIOWrapper(cls._open_compressed_file(compression_type, "-", sys.stdin, who, error_file, verbose))
             else:
                 if verbose:
-                    print("%s: reading stdin" % who)
+                    print("%s: reading stdin" % who, file=error_file, flush=True)
                 return ClosableIterTextIOWrapper(sys.stdin)
 
         if verbose:
-            print("%s: File_path.suffix: %s" % (who, file_path.suffix))
+            print("%s: File_path.suffix: %s" % (who, file_path.suffix), file=error_file, flush=True)
 
         gzip_file: typing.TextIO
         if compression_type is not None and len(compression_type) > 0:
-            gzip_file = cls._open_compressed_file(compression_type, str(file_path), file_path, who, verbose)
+            gzip_file = cls._open_compressed_file(compression_type, str(file_path), file_path, who, error_file, verbose)
         elif file_path.suffix in [".bz2", ".gz", ".lz4", ".xz"]:
-            gzip_file = cls._open_compressed_file(file_path.suffix, str(file_path), file_path, who, verbose)
+            gzip_file = cls._open_compressed_file(file_path.suffix, str(file_path), file_path, who, error_file, verbose)
         else:
             if verbose:
                 print("%s: reading file %s" % (who, str(file_path)))
@@ -424,6 +428,7 @@ def _build_column_names(cls,
                             force_column_names: typing.Optional[typing.List[str]],
                             skip_first_record: bool,
                             column_separator: str,
+                            error_file: typing.TextIO,
                             verbose: bool = False,
     )->typing.Tuple[str, typing.List[str]]:
         """
@@ -436,8 +441,7 @@ def _build_column_names(cls,
             # TODO: if the read fails, throw a more useful exception with the line number.
             header: str = next(source).rstrip("\r\n")
             if verbose:
-                print("header: %s" % header)
-
+                print("header: %s" % header, file=error_file, flush=True)
 
             # Split the first line into column names.
             return header, header.split(column_separator)
@@ -469,10 +473,10 @@ def exclude_line(self, action: ValidationAction, msg: str, line: str)->bool:
             # Immediately raise an exception.
             raise ValueError("In input data line %d, %s: %s" % (self.data_lines_read, msg, line))
         elif action == ValidationAction.EXIT:
-            print("In input data line %d, %s: %s" % (self.data_lines_read, msg, line), file=self.error_file)
+            print("In input data line %d, %s: %s" % (self.data_lines_read, msg, line), file=self.error_file, flush=True)
             sys.exit(1)
             
-        print("In input data line %d, %s: %s" % (self.data_lines_read, msg, line), file=self.error_file)
+        print("In input data line %d, %s: %s" % (self.data_lines_read, msg, line), file=self.error_file, flush=True)
         self.data_errors_reported += 1
         if self.error_limit > 0 and self.data_errors_reported >= self.error_limit:
             raise ValueError("Too many data errors.")
@@ -507,7 +511,7 @@ def __next__(self)-> typing.List[str]:
             line = line.rstrip("\r\n")
 
             if self.very_verbose:
-                print("'%s'" % line)
+                print("'%s'" % line, file=self.error_file, flush=True)
 
             # Ignore empty lines.
             if self.empty_line_action != ValidationAction.PASS and len(line) == 0:
@@ -755,7 +759,7 @@ def main():
     row: typing.List[str]
     for row in kr:
         line_count += 1
-    print("Read %d lines" % line_count)
+    print("Read %d lines" % line_count, file=error_file, flush=True)
 
 if __name__ == "__main__":
     main()
diff --git a/kgtk/join/nodereader.py b/kgtk/join/nodereader.py
index a6fdcae07..4b3de9587 100644
--- a/kgtk/join/nodereader.py
+++ b/kgtk/join/nodereader.py
@@ -47,6 +47,7 @@ def open_node_file(cls,
                                                   compression_type=compression_type,
                                                   gzip_in_parallel=gzip_in_parallel,
                                                   gzip_queue_size=gzip_queue_size,
+                                                  error_file=error_file,
                                                   verbose=verbose)
 
         # Read the node file header and split it into column names.
@@ -56,6 +57,7 @@ def open_node_file(cls,
                                                          force_column_names=force_column_names,
                                                          skip_first_record=skip_first_record,
                                                          column_separator=column_separator,
+                                                         error_file=error_file,
                                                          verbose=verbose)
         # Check for unsafe column names.
         cls.check_column_names(column_names,

From 1673789a7c38823cee9af5d5dd09c5da9dc77081 Mon Sep 17 00:00:00 2001
From: Filip Ilievski <6437407+filievski@users.noreply.github.com>
Date: Thu, 30 Apr 2020 14:53:40 -0700
Subject: [PATCH 033/278] Update README.md

---
 README.md | 46 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index f386ae6d5..40ba1e138 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,27 @@
-# kgtk
+# kgtk: Knowledge Graph Toolkit
+
+KGTK is a Python library for easy manipulation with knowledge graphs. It provides a flexible framework that allows chaining of common graph operations, such as: extraction of subgraphs, filtering, computation of graph metrics, validation, cleaning, generating embeddings, and so on. Its principal format is TSV, though we do support a number of other inputs. 
+
+### Documentation
+
+To-do.
+
+### Features
+
+* Computation of class instances
+* Computation of reachable nodes
+* Filtering based on property values
+* Removal of columns
+* Sorting
+* Computation of various embeddings
+* Cleaning and validation
+* Computation of graph metrics
+* Joining and concatenation of graphs
+* Manipulation of Wikidata data
+
+### Releases
+
+* [Source code](https://github.com/usc-isi-i2/kgtk/releases)
 
 ### Installation
 
@@ -10,7 +33,7 @@ conda activate kgtk-env
 ```
  **Note:** Installing Graph-tool is problematic on python 3.8 and out of a virtual environment. Thus: **the advised installation path is by using a virtual environment.**
 
-2. Install (the dev branch at this point): `pip install git+https://github.com/usc-isi-i2/kgtk.git@dev`
+2. Install (the dev branch at this point): `pip install kgtk`
 
 You can test if `kgtk` is installed properly now with: `kgtk -h`.
 
@@ -25,7 +48,20 @@ You can test if `kgtk` is installed properly now with: `kgtk -h`.
   
 More installation options for `mlr` can be found [here](https://johnkerl.org/miller/doc/build.html).
 
-### The Miller Package
+### Running KGTK commands
+
+To list all the available KGTK commands, run:
+`kgtk -h`
+
+To see the arguments of a particular commands, run:
+`kgtk <command> -h`
+
+An example command that computes instances of the subclasses of two classes:
+`kgtk instances --transitive --class Q13442814,Q12345678`
+
+### Additional information
+
+#### The Miller Package
 
 1. Our code uses the "miller" package to manipulate formatted data.
 
@@ -40,7 +76,7 @@ https://www.mankier.com/1/mlr
 4. You may need to install the miller command (mlr) on your system.
    * OpenSUSE Tumbleweed Linux: install package `miller` from Main Repository (OSS)
 
-### List of supported tools
+#### List of supported tools
 * `instances`
 * `reachable_nodes`
 * `filter`
@@ -57,6 +93,6 @@ To get an information on how to use each of them, run:
 
 More detailed description of the arguments will be added here promptly.
 
-### Developer Instruction
+#### Developer Instruction
 
 Please refer to [this](README_dev.md)

From 995792fdf15dea8b4190da341d8212c3390a0743 Mon Sep 17 00:00:00 2001
From: Filip Ilievski <6437407+filievski@users.noreply.github.com>
Date: Thu, 30 Apr 2020 14:53:56 -0700
Subject: [PATCH 034/278] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 40ba1e138..fd2e15a5d 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# kgtk: Knowledge Graph Toolkit
+# KGTK: Knowledge Graph Toolkit
 
 KGTK is a Python library for easy manipulation with knowledge graphs. It provides a flexible framework that allows chaining of common graph operations, such as: extraction of subgraphs, filtering, computation of graph metrics, validation, cleaning, generating embeddings, and so on. Its principal format is TSV, though we do support a number of other inputs. 
 

From d1cb0bb62e6e7639863cac330a859dd5cda5cf2c Mon Sep 17 00:00:00 2001
From: Filip Ilievski <6437407+filievski@users.noreply.github.com>
Date: Thu, 30 Apr 2020 14:54:35 -0700
Subject: [PATCH 035/278] Update README.md

---
 README.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index fd2e15a5d..1d9247fba 100644
--- a/README.md
+++ b/README.md
@@ -2,11 +2,11 @@
 
 KGTK is a Python library for easy manipulation with knowledge graphs. It provides a flexible framework that allows chaining of common graph operations, such as: extraction of subgraphs, filtering, computation of graph metrics, validation, cleaning, generating embeddings, and so on. Its principal format is TSV, though we do support a number of other inputs. 
 
-### Documentation
+## Documentation
 
 To-do.
 
-### Features
+## Features
 
 * Computation of class instances
 * Computation of reachable nodes
@@ -19,11 +19,11 @@ To-do.
 * Joining and concatenation of graphs
 * Manipulation of Wikidata data
 
-### Releases
+## Releases
 
 * [Source code](https://github.com/usc-isi-i2/kgtk/releases)
 
-### Installation
+## Installation
 
 0. Our installations will be in a conda environment. If you don't have a conda installed, follow [link](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to install it.
 1. Set up your own conda environment:
@@ -48,7 +48,7 @@ You can test if `kgtk` is installed properly now with: `kgtk -h`.
   
 More installation options for `mlr` can be found [here](https://johnkerl.org/miller/doc/build.html).
 
-### Running KGTK commands
+## Running KGTK commands
 
 To list all the available KGTK commands, run:
 `kgtk -h`
@@ -59,9 +59,9 @@ To see the arguments of a particular commands, run:
 An example command that computes instances of the subclasses of two classes:
 `kgtk instances --transitive --class Q13442814,Q12345678`
 
-### Additional information
+## Additional information
 
-#### The Miller Package
+### The Miller Package
 
 1. Our code uses the "miller" package to manipulate formatted data.
 
@@ -76,7 +76,7 @@ https://www.mankier.com/1/mlr
 4. You may need to install the miller command (mlr) on your system.
    * OpenSUSE Tumbleweed Linux: install package `miller` from Main Repository (OSS)
 
-#### List of supported tools
+### List of supported tools
 * `instances`
 * `reachable_nodes`
 * `filter`
@@ -93,6 +93,6 @@ To get an information on how to use each of them, run:
 
 More detailed description of the arguments will be added here promptly.
 
-#### Developer Instruction
+### Developer Instructions
 
 Please refer to [this](README_dev.md)

From 17b311938728de5ca14da5b8443da08f218bfc47 Mon Sep 17 00:00:00 2001
From: Filip Ilievski <6437407+filievski@users.noreply.github.com>
Date: Thu, 30 Apr 2020 14:55:37 -0700
Subject: [PATCH 036/278] Update README.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 1d9247fba..4110d54ac 100644
--- a/README.md
+++ b/README.md
@@ -51,12 +51,15 @@ More installation options for `mlr` can be found [here](https://johnkerl.org/mil
 ## Running KGTK commands
 
 To list all the available KGTK commands, run:
+
 `kgtk -h`
 
 To see the arguments of a particular commands, run:
+
 `kgtk <command> -h`
 
 An example command that computes instances of the subclasses of two classes:
+
 `kgtk instances --transitive --class Q13442814,Q12345678`
 
 ## Additional information

From 5c3d6ab9a748724c5117c0621d12cc27635441da Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Thu, 30 Apr 2020 15:50:01 -0700
Subject: [PATCH 037/278] temporaray push for text-embedding, bug fix

---
 kgtk/cli/text_embedding.py | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/kgtk/cli/text_embedding.py b/kgtk/cli/text_embedding.py
index 3a439c8ce..420f4f9e0 100644
--- a/kgtk/cli/text_embedding.py
+++ b/kgtk/cli/text_embedding.py
@@ -384,12 +384,12 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
                         else:
                             # if we get to next id
                             # concate all properties into one sentence to represent the Q node
-                            concated_sentence = self.attribute_to_sentence(each_node_attributes, node_id)
+                            concated_sentence = self.attribute_to_sentence(each_node_attributes, current_process_node_id)
                             each_node_attributes["sentence"] = concated_sentence
-                            self.candidates[node_id] = each_node_attributes
-                            self._logger.debug("{} --> {}".format(node_id, concated_sentence))
+                            self.candidates[current_process_node_id] = each_node_attributes
                             # after write down finish, we can cleaer and start parsing next one
                             each_node_attributes = {"has_properties":[], "isa_properties":[], "label_properties":[], "description_properties": []}
+                            # update to new id
                             current_process_node_id = node_id
 
                     if node_property in target_properties:
@@ -725,24 +725,16 @@ def main(**kwargs):
         import argparse
         import pickle
 
-        logging_level = kwargs.get("logging_level", "warning")
-        if logging_level == "info":
-            logging_level_class = logging.INFO
-        elif logging_level == "debug":
+        do_logging = kwargs.get("logging_level", None)
+        if do_logging and do_logging.lower() != "none":
             logging_level_class = logging.DEBUG
-        elif logging_level == "warning":
-            logging_level_class = logging.WARNING
-        elif logging_level == "error":
-            logging_level_class = logging.ERROR
-        else:
-            logging_level_class = logging.WARNING
-        if logging_level != "none":
             logger_path = os.path.join(os.environ.get("HOME"), "kgtk_text_embedding_log_{}.log".format(strftime("%Y-%m-%d-%H-%M")))
             logging.basicConfig(level=logging_level_class,
-                        format="%(asctime)s [%(levelname)s] %(name)s %(lineno)d -- %(message)s",
-                        datefmt='%m-%d %H:%M:%S',
-                        filename=logger_path,
-                        filemode='w')
+                format="%(asctime)s [%(levelname)s] %(name)s %(lineno)d -- %(message)s",
+                datefmt='%m-%d %H:%M:%S',
+                filename=logger_path,
+                filemode='w')
+
         _logger = logging.getLogger(__name__)
         _logger.warning("Running with logging level {}".format(_logger.getEffectiveLevel()))
 
@@ -832,10 +824,12 @@ def str2bool(v):
             return False
         else:
             raise argparse.ArgumentTypeError('Boolean value expected.')
-    # logging level
+    # logging level, no longer need as there is a global choice for it
     parser.add_argument('-l', '--logging-level', action='store', dest='logging_level',
             default="info", choices=("error", "warning", "info", "debug", "none"),
             help="set up the logging level, default is INFO level")
+    # parser.add_argument('--debug', action='store_true', dest='logging_level',
+                        # help='set up to make logging and store at home directory.')
     # model name
     all_models_names = ALL_EMBEDDING_MODELS_NAMES
     parser.add_argument('-m', '--model', action='store', nargs='+', dest='all_models_names',

From a74599f6c116e78326e7af32000e55727d345046 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 30 Apr 2020 16:56:43 -0700
Subject: [PATCH 038/278] Optimize for the case of empty columns.

---
 kgtk/join/kgtkreader.py | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index 54dfed634..d6a30a209 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -574,19 +574,28 @@ def __next__(self)-> typing.List[str]:
             return values
 
     def _ignore_invalid_values(self, values: typing.List[str], line: str)->bool:
+        """Give a row of values, validate each value.  If we find one or more
+        validation problems, we might want to emit erro messages and we might
+        want to ignore the entire row.
+
+        Returns True to indicate that the row should be ignored (skipped).
+
+        """
+        problems: typing.List[str] = [ ] # Build a list of problems.
+        idx: int
         value: str
-        idx: int = 0
-        problems: typing.List[str] = [ ]
-        for value in values:
-            kv: KgtkValue = KgtkValue(value)
-            if not kv.is_valid():
-                problems.append("%s: %s" % (self.column_names[idx], kv.describe()))
-            idx += 1
-        if len(problems) > 0 and self.exclude_line(self.invalid_value_action,
-                                                   "; ".join(problems),
-                                                   line):
-            return True
-        return False
+        for idx, value in enumerate(values):
+            if len(value) > 0: # Optimize the common case of empty columns.
+                kv: KgtkValue = KgtkValue(value)
+                if not kv.is_valid():
+                    problems.append("%s: %s" % (self.column_names[idx], kv.describe()))
+
+        if len(problems) == 0:
+            return False
+
+        return self.exclude_line(self.invalid_value_action,
+                                 "; ".join(problems),
+                                 line)
 
     # May be overridden
     def _ignore_if_blank_fields(self, values: typing.List[str], line: str)->bool:

From 92e36aaab2a38f92ccc865984d5680effeb40594 Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Thu, 30 Apr 2020 18:06:06 -0700
Subject: [PATCH 039/278] embedding vector: code clean / bug fix / use --debug
 for debuging setting

---
 kgtk/cli/text_embedding.py | 333 +++++++++++++++++--------------------
 1 file changed, 152 insertions(+), 181 deletions(-)

diff --git a/kgtk/cli/text_embedding.py b/kgtk/cli/text_embedding.py
index 420f4f9e0..77004a2d9 100644
--- a/kgtk/cli/text_embedding.py
+++ b/kgtk/cli/text_embedding.py
@@ -1,30 +1,29 @@
-import sys
 import typing
 from kgtk.exceptions import KGTKException
 
 ALL_EMBEDDING_MODELS_NAMES = [
-"bert-base-nli-cls-token",
-"bert-base-nli-max-tokens",
-"bert-base-nli-mean-tokens",
-"bert-base-nli-stsb-mean-tokens",
-"bert-base-wikipedia-sections-mean-tokens",
-"bert-large-nli-cls-token",
-"bert-large-nli-max-tokens",
-"bert-large-nli-mean-tokens",
-"bert-large-nli-stsb-mean-tokens",
-"distilbert-base-nli-mean-tokens",
-"distilbert-base-nli-stsb-mean-tokens",
-"distiluse-base-multilingual-cased",
-"roberta-base-nli-mean-tokens",
-"roberta-base-nli-stsb-mean-tokens",
-"roberta-large-nli-mean-tokens",
-"roberta-large-nli-stsb-mean-tokens"
+    "bert-base-nli-cls-token",
+    "bert-base-nli-max-tokens",
+    "bert-base-nli-mean-tokens",
+    "bert-base-nli-stsb-mean-tokens",
+    "bert-base-wikipedia-sections-mean-tokens",
+    "bert-large-nli-cls-token",
+    "bert-large-nli-max-tokens",
+    "bert-large-nli-mean-tokens",
+    "bert-large-nli-stsb-mean-tokens",
+    "distilbert-base-nli-mean-tokens",
+    "distilbert-base-nli-stsb-mean-tokens",
+    "distiluse-base-multilingual-cased",
+    "roberta-base-nli-mean-tokens",
+    "roberta-base-nli-stsb-mean-tokens",
+    "roberta-large-nli-mean-tokens",
+    "roberta-large-nli-stsb-mean-tokens"
 ]
 
 
 class EmbeddingVector:
-    def __init__(self, model_name=None, query_server=None, cache_config:dict={}):
-        from sentence_transformers import SentenceTransformer,  SentencesDataset, LoggingHandler, losses, models # type: ignore
+    def __init__(self, model_name=None, query_server=None, cache_config: dict = {}):
+        from sentence_transformers import SentenceTransformer, SentencesDataset, LoggingHandler, losses, models  # type: ignore
         import logging
         import re
         self._logger = logging.getLogger(__name__)
@@ -65,43 +64,21 @@ def __init__(self, model_name=None, query_server=None, cache_config:dict={}):
             self.redis_server = None
         self.qnodes_descriptions = dict()
         self.vectors_map = dict()
+        self.property_labels_dict = dict()
         self.vectors_2D = None
         self.gt_nodes = set()
         self.candidates = defaultdict(dict)
-        self.embedding_cache = dict()
         self.vector_dump_file = None
         self.q_node_to_label = dict()
         self.metadata = []
         self.gt_indexes = set()
         self.input_format = ""
-        self.token_patern = re.compile(r"(?u)\b\w\w+\b")
-
-    @staticmethod
-    def minDistance(word1, word2):
-        """Dynamic programming solution"""
-        m = len(word1)
-        n = len(word2)
-        table = [[0] * (n + 1) for _ in range(m + 1)]
-        for i in range(m + 1):
-            table[i][0] = i
-        for j in range(n + 1):
-            table[0][j] = j
-        for i in range(1, m + 1):
-            for j in range(1, n + 1):
-                if word1[i - 1] == word2[j - 1]:
-                    table[i][j] = table[i - 1][j - 1]
-                else:
-                    table[i][j] = 1 + min(table[i - 1][j], table[i][j - 1], table[i - 1][j - 1])
-        return table[-1][-1]
-
+        self.token_pattern = re.compile(r"(?u)\b\w\w+\b")
 
     def get_sentences_embedding(self, sentences: typing.List[str], qnodes: typing.List[str]):
         """
             transform a list of sentences to embedding vectors
         """
-        # if sentences in self.embedding_cache:
-        #     return self.embedding_cache[sentences]
-        # else:
         from ast import literal_eval
         if self.redis_server is not None:
             sentence_embeddings = []
@@ -119,14 +96,13 @@ def get_sentences_embedding(self, sentences: typing.List[str], qnodes: typing.Li
                     self.redis_server.set(query_cache_key, str(each_embedding[0].tolist()))
         else:
             sentence_embeddings = self.model.encode(sentences, show_progress_bar=False)
-            # self.embedding_cache[sentences] = sentence_embeddings
         return sentence_embeddings
 
-    def send_sparql_query(self, query_body:str):
+    def send_sparql_query(self, query_body: str):
         """
             a simple wrap to send the query and return the returned results
         """
-        from SPARQLWrapper import SPARQLWrapper, JSON, POST, URLENCODED # type: ignore
+        from SPARQLWrapper import SPARQLWrapper, JSON, POST, URLENCODED  # type: ignore
         qm = SPARQLWrapper(self.wikidata_server)
         qm.setReturnFormat(JSON)
         qm.setMethod(POST)
@@ -138,9 +114,9 @@ def send_sparql_query(self, query_body:str):
             results = qm.query().convert()['results']['bindings']
             return results
         except:
-            raise KGTKException("Sending Sparl query to {} failed!".format(self.wikidata_server))
+            raise KGTKException("Sending Sparql query to {} failed!".format(self.wikidata_server))
 
-    def get_item_description(self, qnodes: typing.List[str]=None, target_properties:dict={}, gt_label:str=""):
+    def get_item_description(self, qnodes: typing.List[str] = None, target_properties: dict = {}):
         """
             use sparql query to get the descriptions of given Q nodes
         """
@@ -166,7 +142,7 @@ def get_item_description(self, qnodes: typing.List[str]=None, target_properties:
         sentences_cache_dict = {}
         if self.redis_server is not None:
             for each_node in qnodes:
-                cache_res = self.redis_server.get(each_node+str(properties_list))
+                cache_res = self.redis_server.get(each_node + str(properties_list))
                 if cache_res is not None:
                     sentences_cache_dict[each_node] = cache_res.decode("utf-8")
 
@@ -199,12 +175,6 @@ def get_item_description(self, qnodes: typing.List[str]=None, target_properties:
                         description = ""
                     if "itemLabel" in each:
                         label = each['itemLabel']['value']
-                        # if each_node == self.gt[gt_label]:
-                        #     if self.minDistance(label, gt_label) > len(gt_label):
-                        #         a = "".join(self.token_patern.findall(label.lower()))
-                        #         b = "".join(self.token_patern.findall(gt_label.lower()))
-                        #         if a not in b and b not in a:
-                        #             self._logger.error("{} with {} --> {} edit distance too larger!!!".format(each_node, label, gt_label))
                     else:
                         label = ""
                     if need_find_label:
@@ -223,7 +193,7 @@ def get_item_description(self, qnodes: typing.List[str]=None, target_properties:
                         part2 += """?item wdt:{} ?{}_{}. \n""".format(each, name, i)
             query_body2 += """
                         where {
-                          values ?item {""" + query_qnodes + "}" 
+                          values ?item {""" + query_qnodes + "}"
 
             query_body2 += part2 + """
                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
@@ -246,7 +216,7 @@ def get_item_description(self, qnodes: typing.List[str]=None, target_properties:
                 query_body3 = """
                     select DISTINCT ?item ?p_entity ?p_entityLabel
                     where {
-                      values ?item {"""+ query_qnodes + """}
+                      values ?item {""" + query_qnodes + """}
                       ?item ?p ?o.
                       FILTER regex(str(?p), "^http://www.wikidata.org/prop/P", "i")
                       BIND (IRI(REPLACE(STR(?p), "http://www.wikidata.org/prop", "http://www.wikidata.org/entity")) AS ?p_entity) .
@@ -262,27 +232,26 @@ def get_item_description(self, qnodes: typing.List[str]=None, target_properties:
                         if "has_properties" in self.candidates[node_name]:
                             self.candidates[node_name]["has_properties"].add(p_node_label)
                         else:
-                            self.candidates[node_name]["has_properties"] = set([p_node_label])
+                            self.candidates[node_name]["has_properties"] = {p_node_label}
 
         for each_node_id in qnodes:
             each_sentence = self.attribute_to_sentence(self.candidates[each_node_id], each_node_id)
             self.candidates[each_node_id]["sentence"] = each_sentence
             if self.redis_server is not None:
-                self.redis_server.set(each_node+str(properties_list), each_sentence)
-            
+                self.redis_server.set(each_node_id + str(properties_list), each_sentence)
+
         for each_node_id, sentence in sentences_cache_dict.items():
             self.candidates[each_node_id]["sentence"] = sentence
 
-
-    def read_input(self, file_path: str, skip_nodes_set: set=None, 
-                   input_format: str="kgtk_format",target_properties: dict={},
-                   property_labels_dict:dict={}, black_list_set:set=set()
+    def read_input(self, file_path: str, skip_nodes_set: set = None,
+                   input_format: str = "kgtk_format", target_properties: dict = {},
+                   property_labels_dict: dict = {}, black_list_set: set = set()
                    ):
         """
             load the input candidates files
         """
         from collections import defaultdict
-        import pandas as pd # type: ignore
+        import pandas as pd  # type: ignore
         import numpy as np
         import math
 
@@ -306,7 +275,7 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
                     temp = str(each['candidates']).split("|")
                 elif each['candidates'] is np.nan or math.isnan(each['candidates']):
                     temp = []
-                    
+
                 to_remove_q = set()
                 if each[gt_column_id] is np.nan:
                     self._logger.warning("Ignore NaN gt value form {}".format(str(each)))
@@ -345,22 +314,25 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
                 # get header
                 headers = f.readline().replace("\n", "").split("\t")
                 if len(headers) < 3:
-                    raise KGTKException("No enough columns found on given input file. Only {} columns given but at least 3 needed.".format(len(headers)))
+                    raise KGTKException(
+                        "No enough columns found on given input file. Only {} columns given but at least 3 needed.".format(
+                            len(headers)))
                 elif "node" in headers and "property" in headers and "value" in headers:
-                    column_references = {"node": headers.index("node"), 
+                    column_references = {"node": headers.index("node"),
                                          "property": headers.index("property"),
                                          "value": headers.index("value")}
                 elif len(headers) == 3:
-                    column_references = {"node": 0, 
+                    column_references = {"node": 0,
                                          "property": 1,
                                          "value": 2}
                 else:
-                    missing_column = set(["node", "property", "value"]) - set(headers)
+                    missing_column = {"node", "property", "value"} - set(headers)
                     raise KGTKException("Missing column {}".format(missing_column))
                 self._logger.debug("column index information: ")
                 self._logger.debug(str(column_references))
                 # read contents
-                each_node_attributes = {"has_properties":[], "isa_properties":[], "label_properties":[], "description_properties": []}
+                each_node_attributes = {"has_properties": [], "isa_properties": [], "label_properties": [],
+                                        "description_properties": []}
                 current_process_node_id = None
                 for each_line in f:
                     each_line = each_line.replace("\n", "").split("\t")
@@ -373,9 +345,9 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
                         node_value = node_value[:node_value.index("@")]
 
                     # remove extra double quote " and single quote '
-                    if node_value[0]== '"' and node_value[-1] == '"':
+                    if node_value[0] == '"' and node_value[-1] == '"':
                         node_value = node_value[1:-1]
-                    if node_value[0]== "'" and node_value[-1] == "'":
+                    if node_value[0] == "'" and node_value[-1] == "'":
                         node_value = node_value[1:-1]
 
                     if current_process_node_id != node_id:
@@ -388,7 +360,8 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
                             each_node_attributes["sentence"] = concated_sentence
                             self.candidates[current_process_node_id] = each_node_attributes
                             # after write down finish, we can cleaer and start parsing next one
-                            each_node_attributes = {"has_properties":[], "isa_properties":[], "label_properties":[], "description_properties": []}
+                            each_node_attributes = {"has_properties": [], "isa_properties": [], "label_properties": [],
+                                                    "description_properties": []}
                             # update to new id
                             current_process_node_id = node_id
 
@@ -396,12 +369,12 @@ def read_input(self, file_path: str, skip_nodes_set: set=None,
                         each_node_attributes[target_properties[node_property]].append(node_value)
                     if add_all_properties and each_line[column_references["value"]][0] == "P":
                         each_node_attributes["has_properties"].append(node_value)
-                        
+
         else:
             raise KGTKException("Unkonwn input format {}".format(input_format))
 
         self._logger.info("Totally {} Q nodes loaded.".format(len(self.candidates)))
-        self.vector_dump_file = "dump_vectors_{}_{}.pkl".format(file_path[:file_path.rfind(".")], self. model_name)
+        self.vector_dump_file = "dump_vectors_{}_{}.pkl".format(file_path[:file_path.rfind(".")], self.model_name)
         # self._logger.debug("The cache file name will be {}".format(self.vector_dump_file))
 
     def get_real_label_name(self, node):
@@ -410,8 +383,9 @@ def get_real_label_name(self, node):
         else:
             return node
 
-    def attribute_to_sentence(self, v, node_id = None):
+    def attribute_to_sentence(self, v, node_id=None):
         concated_sentence = ""
+        have_isa_properties = False
         # sort the properties to ensure the sentence always same
         v = {key: sorted(list(value)) for key, value in v.items() if len(value) > 0}
         if "label_properties" in v and len(v["label_properties"]) > 0:
@@ -421,39 +395,35 @@ def attribute_to_sentence(self, v, node_id = None):
                 concated_sentence += ", "
             concated_sentence += self.get_real_label_name(v["description_properties"][0])
         if "isa_properties" in v and len(v["isa_properties"]) > 0:
+            have_isa_properties = True
             temp = [self.get_real_label_name(each) for each in v["isa_properties"]]
             if concated_sentence != "" and temp[0] != "":
-                concated_sentence += " is a " 
+                concated_sentence += " is a "
             elif temp[0] != "":
                 concated_sentence += "It is a "
             concated_sentence += ", ".join(temp)
         if "has_properties" in v and len(v["has_properties"]) > 0:
             temp = [self.get_real_label_name(each) for each in v["has_properties"]]
             if concated_sentence != "" and temp[0] != "":
-                concated_sentence += ", and has "
+                if have_isa_properties:
+                    concated_sentence += ", and has "
+                else:
+                    concated_sentence += " has "
             elif temp[0] != "":
                 concated_sentence += "It has "
             concated_sentence += " and ".join(temp)
         self._logger.debug("Transform node {} --> {}".format(node_id, concated_sentence))
         return concated_sentence
 
-    def get_vetors(self, use_cache=True, vector_dump_file=None):
+    def get_vetors(self):
         """
             main function to get the vector representations of the descriptions
         """
         import os
         import time
-        from tqdm import tqdm # type: ignore
-        if vector_dump_file is None:
-            vector_dump_file = self.vector_dump_file
-        if use_cache and os.path.exists(vector_dump_file):
-            self._logger.info("Using cached vector file!")
-            self.load_vectors(vector_dump_file)
-            return
-            
+        from tqdm import tqdm  # type: ignore
+
         start_all = time.time()
-        jobs_count = 0
-        counter = 0
         self._logger.info("Now generating embedding vector.")
         for q_node, each_item in tqdm(self.candidates.items()):
             # do process for each row(one target)
@@ -465,6 +435,7 @@ def get_vetors(self, use_cache=True, vector_dump_file=None):
         self._logger.info("Totally used {} seconds.".format(str(time.time() - start_all)))
 
     def dump_vectors(self, file_name, type_=None):
+        import pickle
         if file_name.endswith(".pkl"):
             file_name = file_name.replace(".pkl", "")
         if type_ == "2D":
@@ -491,15 +462,7 @@ def dump_vectors(self, file_name, type_=None):
                         _ = f.write(str(i) + "\t")
                     _ = f.write("\n")
 
-    def load_vectors(self, file_name, type_=None):
-        if type_ == "2D":
-            with open(file_name, "rb") as f:
-                self.vectors_2D = pickle.load(f)
-        else:
-            with open(file_name, "rb") as f:
-                self.vectors_map = pickle.load(f)
-    
-    def print_vector(self, vectors, output_properties:str="text_embedding", output_format="kgtk_format"):
+    def print_vector(self, vectors, output_properties: str = "text_embedding", output_format="kgtk_format"):
         if output_format == "kgtk_format":
             print("node\tproperty\tvalue\n", end="")
             if self.input_format == "kgtk_format":
@@ -530,10 +493,8 @@ def print_vector(self, vectors, output_properties:str="text_embedding", output_f
                     else:
                         print(str(each_dimension) + "\n", end="")
 
-
-    def plot_result(self, use_cache=True, vector_dump_file=None, 
-                    output_properties={}, input_format="kgtk_format", 
-                    output_uri:str="", output_format="kgtk_format",
+    def plot_result(self, output_properties={}, input_format="kgtk_format",
+                    output_uri: str = "", output_format="kgtk_format",
                     run_TSNE=True
                     ):
         """
@@ -542,7 +503,7 @@ def plot_result(self, use_cache=True, vector_dump_file=None,
         """
         import os
         import time
-        from sklearn.manifold import TSNE # type: ignore
+        from sklearn.manifold import TSNE  # type: ignore
 
         self.vectors_map = {k: v for k, v in sorted(self.vectors_map.items(), key=lambda item: item[0], reverse=True)}
         vectors = list(self.vectors_map.values())
@@ -551,7 +512,6 @@ def plot_result(self, use_cache=True, vector_dump_file=None,
             self._logger.warning("Start running TSNE to reduce dimension. It will take a long time.")
             start = time.time()
             self.vectors_2D = TSNE(n_components=2, random_state=0).fit_transform(vectors)
-            # self.dump_vectors(vector_dump_file, "2D")
             self._logger.info("Totally used {} seconds.".format(time.time() - start))
 
         if input_format == "test_format":
@@ -616,7 +576,7 @@ def evaluate_result(self):
                     centroid += np.array(self.vectors_map[each])
                 gt_nodes_vectors.append(self.vectors_map[each])
         centroid = centroid / len(points)
-        
+
         distance_sum = 0
         for each in gt_nodes_vectors:
             distance_sum += self.calculate_distance(each, centroid)
@@ -627,8 +587,8 @@ def calculate_distance(a, b):
         if len(a) != len(b):
             raise KGTKException("Vector dimension are different!")
         dist = 0
-        for v1, v2 in zip(a,b):
-            dist += (v1 - v2) **2
+        for v1, v2 in zip(a, b):
+            dist += (v1 - v2) ** 2
         dist = dist ** 0.5
         return dist
 
@@ -643,12 +603,14 @@ def load_property_labels_file(input_files: typing.List[str]):
                 if headers is None:
                     headers = each_line
                     if len(headers) < 2:
-                        raise KGTKException("No enough columns found on given input file. Only {} columns given but at least 2 needed.".format(len(headers)))
+                        raise KGTKException(
+                            "No enough columns found on given input file. Only {} columns given but at least 2 needed.".format(
+                                len(headers)))
                     elif "predicate" in headers and "label" in headers:
-                        column_references = {"predicate": headers.index("predicate"), 
+                        column_references = {"predicate": headers.index("predicate"),
                                              "label": headers.index("label")}
                     elif "label" in headers:
-                        column_references = {"predicate": 0, 
+                        column_references = {"predicate": 0,
                                              "label": headers.index("label"),
                                              }
                     else:
@@ -669,19 +631,22 @@ def load_black_list_files(file_path):
     import tarfile
     import zipfile
     import gzip
+    import logging
     import re
-    token_patern = re.compile(r"(?u)\b\w\w+\b")
+    import numpy as np
+    token_pattern = re.compile(r"(?u)\b\w\w+\b")
     qnodes_set = set()
+    _logger = logging.getLogger(__name__)
     for each_file in file_path:
         try:
             # tar.gz file
             if each_file.endswith("tar.gz"):
                 tar = tarfile.open("filename.tar.gz", "r:gz")
                 for member in tar.getmembers():
-                     f = tar.extractfile(member)
-                     if f:
-                         content = f.read()
-                         Data = np.loadtxt(content)
+                    f = tar.extractfile(member)
+                    if f:
+                        content = f.read()
+                        input_data = np.loadtxt(content)
             # gz file
             elif each_file.endswith(".gz"):
                 with gzip.open('big_file.txt.gz', 'rb') as f:
@@ -695,10 +660,9 @@ def load_black_list_files(file_path):
                 with open(each_file, "r") as f:
                     input_data = f.readlines()
 
-            
             for each in input_data:
                 each = each.replace("\n", "")
-                for each_part in token_patern.findall(each):
+                for each_part in token_pattern.findall(each):
                     if each_part[0] == "Q" and each_part[1:].isnumeric():
                         qnodes_set.add(each_part)
         except Exception as e:
@@ -725,15 +689,16 @@ def main(**kwargs):
         import argparse
         import pickle
 
-        do_logging = kwargs.get("logging_level", None)
-        if do_logging and do_logging.lower() != "none":
+        do_logging = kwargs.get("_debug", False)
+        if do_logging:
             logging_level_class = logging.DEBUG
-            logger_path = os.path.join(os.environ.get("HOME"), "kgtk_text_embedding_log_{}.log".format(strftime("%Y-%m-%d-%H-%M")))
+            logger_path = os.path.join(os.environ.get("HOME"),
+                                       "kgtk_text_embedding_log_{}.log".format(strftime("%Y-%m-%d-%H-%M")))
             logging.basicConfig(level=logging_level_class,
-                format="%(asctime)s [%(levelname)s] %(name)s %(lineno)d -- %(message)s",
-                datefmt='%m-%d %H:%M:%S',
-                filename=logger_path,
-                filemode='w')
+                                format="%(asctime)s [%(levelname)s] %(name)s %(lineno)d -- %(message)s",
+                                datefmt='%m-%d %H:%M:%S',
+                                filename=logger_path,
+                                filemode='w')
 
         _logger = logging.getLogger(__name__)
         _logger.warning("Running with logging level {}".format(_logger.getEffectiveLevel()))
@@ -748,21 +713,21 @@ def main(**kwargs):
         property_labels_files = kwargs.get("property_labels_file_uri", "")
         query_server = kwargs.get("query_server")
         properties = dict()
-        all_property_relate_inputs = [kwargs.get("label_properties", ["label"]), 
+        all_property_relate_inputs = [kwargs.get("label_properties", ["label"]),
                                       kwargs.get("description_properties", ["description"]),
                                       kwargs.get("isa_properties", ["P31"]),
                                       kwargs.get("has_properties", ["all"]),
-                                     ]
-        all_required_properties = ["label_properties", "description_properties", 
+                                      ]
+        all_required_properties = ["label_properties", "description_properties",
                                    "isa_properties", "has_properties"]
-        cache_config = {"use_cache": kwargs.get("use_cache", False), 
+        cache_config = {"use_cache": kwargs.get("use_cache", False),
                         "host": kwargs.get("cache_host", "dsbox01.isi.edu"),
                         "port": kwargs.get("cache_port", 6379)
                         }
         for each_property, each_input in zip(all_required_properties, all_property_relate_inputs):
             for each in each_input:
                 properties[each] = each_property
-        
+
         output_properties = {
             "metatada_properties": kwargs.get("metatada_properties", []),
             "output_properties": kwargs.get("output_properties", "text_embedding")
@@ -778,7 +743,7 @@ def main(**kwargs):
             raise KGTKException("No input file path given!")
 
         if output_uri == "":
-            output_uri = os.getenv("HOME") # os.getcwd()
+            output_uri = os.getenv("HOME")  # os.getcwd()
         if black_list_files != "":
             black_list_set = load_black_list_files(black_list_files)
         else:
@@ -788,19 +753,19 @@ def main(**kwargs):
             _logger.info("Totally {} property labels loaded.".format(len(property_labels_dict)))
         else:
             property_labels_dict = {}
-            
+
         run_TSNE = kwargs.get("run_TSNE", True)
 
         for each_model_name in all_models_names:
             for each_input_file in input_uris:
                 _logger.info("Running {} model on {}".format(each_model_name, each_input_file))
                 process = EmbeddingVector(each_model_name, query_server=query_server, cache_config=cache_config)
-                process.read_input(file_path=each_input_file, skip_nodes_set=black_list_set, 
+                process.read_input(file_path=each_input_file, skip_nodes_set=black_list_set,
                                    input_format=input_format, target_properties=properties,
                                    property_labels_dict=property_labels_dict)
-                process.get_vetors(use_cache=True)
-                process.plot_result(use_cache=True, output_properties=output_properties, 
-                                    input_format=input_format, output_uri=output_uri, 
+                process.get_vetors()
+                process.plot_result(output_properties=output_properties,
+                                    input_format=input_format, output_uri=output_uri,
                                     run_TSNE=run_TSNE, output_format=output_format)
                 process.evaluate_result()
                 _logger.info("*" * 20 + "finished" + "*" * 20)
@@ -808,88 +773,95 @@ def main(**kwargs):
         _logger.debug(e, exc_info=True)
         raise KGTKException(str(e))
 
+
 def parser():
     return {
         'help': """Produce embedding vectors on given file's nodes."""
     }
 
+
 def add_arguments(parser):
     import argparse
     def str2bool(v):
         if isinstance(v, bool):
-           return v
+            return v
         if v.lower() in ('yes', 'true', 't', 'y', '1'):
             return True
         elif v.lower() in ('no', 'false', 'f', 'n', '0'):
             return False
         else:
             raise argparse.ArgumentTypeError('Boolean value expected.')
-    # logging level, no longer need as there is a global choice for it
-    parser.add_argument('-l', '--logging-level', action='store', dest='logging_level',
-            default="info", choices=("error", "warning", "info", "debug", "none"),
-            help="set up the logging level, default is INFO level")
-    # parser.add_argument('--debug', action='store_true', dest='logging_level',
-                        # help='set up to make logging and store at home directory.')
+
+    parser.accept_shared_argument('_debug')
+    # logging level, no longer need as there is a global --debug choice for it
+    # parser.add_argument('-l', '--logging-level', action='store', dest='logging_level',
+    #         default="info", choices=("error", "warning", "info", "debug", "none"),
+    #         help="set up the logging level, default is INFO level")
+    # parser.add_argument('--debug', dest='_debug', action='store_true', default=False, help='enable debug mode')
+
     # model name
     all_models_names = ALL_EMBEDDING_MODELS_NAMES
     parser.add_argument('-m', '--model', action='store', nargs='+', dest='all_models_names',
-            default="bert-base-wikipedia-sections-mean-tokens", choices=all_models_names,
-            help="the model to used for embedding")
+                        default="bert-base-wikipedia-sections-mean-tokens", choices=all_models_names,
+                        help="the model to used for embedding")
     # input file
     parser.add_argument('-i', '--input', action='store', nargs='+', dest='input_uris',
-            help="input path",)
-    parser.add_argument('-f', '--input-format', action='store', dest='input_format', 
-            choices=("test_format", "kgtk_format"), default = "kgtk_format",
-            help="the input file format, could either be `test_format` or `kgtk_format`, default is `kgtk_format`",)
-    parser.add_argument('-p', '--property-labels-file', action='store', nargs='+', 
-            dest='property_labels_file_uri', help="the path to the property labels file.",)
+                        help="input path", )
+    parser.add_argument('-f', '--input-format', action='store', dest='input_format',
+                        choices=("test_format", "kgtk_format"), default="kgtk_format",
+                        help="the input file format, could either be `test_format` or `kgtk_format`, default is `kgtk_format`", )
+    parser.add_argument('-p', '--property-labels-file', action='store', nargs='+',
+                        dest='property_labels_file_uri', help="the path to the property labels file.", )
     # properties (only valid for kgtk format input/output data)
-    parser.add_argument('--label-properties', action='store', nargs='+', 
-            dest='label_properties',default= ["label"],
-            help="""The names of the eges for label properties, Default is ["label"]. \n This argument is only valid for input in kgtk format.""")
-    parser.add_argument('--description-properties', action='store', nargs='+', 
-            dest='description_properties', default= ["description"],
-            help="""The names of the eges for description properties, Default is ["description"].\n This argument is only valid for input in kgtk format.""")
-    parser.add_argument('--isa-properties', action='store', nargs='+', 
-            dest='isa_properties', default= ["P31"],
-            help="""The names of the eges for `isa` properties, Default is ["P31"] (the `instance of` node in wikidata).\n This argument is only valid for input in kgtk format.""")
-    parser.add_argument('--has-properties', action='store', nargs='+', 
-            dest='has_properties', default= ["all"],
-            help="""The names of the eges for `has` properties, Default is ["all"] (will automatically append all properties found for each node).\n This argument is only valid for input in kgtk format.""")
-    parser.add_argument('--output-property', action='store', 
-            dest='output_properties', default= "text_embedding",
-            help="""The output property name used to record the embedding. Default is `output_properties`. \nThis argument is only valid for output in kgtk format.""")
+    parser.add_argument('--label-properties', action='store', nargs='+',
+                        dest='label_properties', default=["label"],
+                        help="""The names of the eges for label properties, Default is ["label"]. \n 
+                        This argument is only valid for input in kgtk format.""")
+    parser.add_argument('--description-properties', action='store', nargs='+',
+                        dest='description_properties', default=["description"],
+                        help="""The names of the eges for description properties, Default is ["description"].\n 
+                        This argument is only valid for input in kgtk format.""")
+    parser.add_argument('--isa-properties', action='store', nargs='+',
+                        dest='isa_properties', default=["P31"],
+                        help="""The names of the eges for `isa` properties, Default is ["P31"] (the `instance of` node in wikidata).\n 
+                        This argument is only valid for input in kgtk format.""")
+    parser.add_argument('--has-properties', action='store', nargs='+',
+                        dest='has_properties', default=["all"],
+                        help="""The names of the eges for `has` properties, Default is ["all"] (will automatically append all properties found for each node).\n This argument is only valid for input in kgtk format.""")
+    parser.add_argument('--output-property', action='store',
+                        dest='output_properties', default="text_embedding",
+                        help="""The output property name used to record the embedding. Default is `output_properties`. \nThis argument is only valid for output in kgtk format.""")
     # output
     parser.add_argument('-o', '--embedding-projector-metadata-path', action='store', dest='output_uri', default="",
-            help="output path for the metadata file, default will be current user's home directory")
-    parser.add_argument('--output-format', action='store', dest='output_format', 
-            default="kgtk", choices=("tsv_format", "kgtk_format"),
-            help="output format, can either be `tsv_format` or `kgtk_format`. \nIf choose `tsv_format`, the output will be a tsv file, with each row contains only the vector representation of a node. Each dimension is separated by a tab")
-    parser.add_argument('--embedding-projector-metatada', action='store', nargs='+', 
-            dest='metatada_properties', default= [],
-            help="""list of properties used to construct a metadata file for use in the Google Embedding Projector: http://projector.tensorflow.org. \n Default: the label and description of each node.""")
+                        help="output path for the metadata file, default will be current user's home directory")
+    parser.add_argument('--output-format', action='store', dest='output_format',
+                        default="kgtk", choices=("tsv_format", "kgtk_format"),
+                        help="output format, can either be `tsv_format` or `kgtk_format`. \nIf choose `tsv_format`, the output will be a tsv file, with each row contains only the vector representation of a node. Each dimension is separated by a tab")
+    parser.add_argument('--embedding-projector-metatada', action='store', nargs='+',
+                        dest='metatada_properties', default=[],
+                        help="""list of properties used to construct a metadata file for use in the Google Embedding Projector: http://projector.tensorflow.org. \n Default: the label and description of each node.""")
     # black list file
     parser.add_argument('-b', '--black-list', nargs='+', action='store', dest='black_list_files',
-            default= "",
-            help="the black list file, contains the Q nodes which should not consider as candidates.")
+                        default="",
+                        help="the black list file, contains the Q nodes which should not consider as candidates.")
     # run tsne or not
-    parser.add_argument("--run-TSNE", type=str2bool, nargs='?',  action='store',
+    parser.add_argument("--run-TSNE", type=str2bool, nargs='?', action='store',
                         default=True, dest="run_TSNE",
                         help="whether to run TSNE or not after the embedding, default is true.")
     # cache config
-    parser.add_argument("--use-cache", type=str2bool, nargs='?',  action='store',
+    parser.add_argument("--use-cache", type=str2bool, nargs='?', action='store',
                         default=False, dest="use_cache",
                         help="whether to use cache to get some embedding vectors quicker, default is False")
-    parser.add_argument("--cache-host", nargs='?',  action='store',
+    parser.add_argument("--cache-host", nargs='?', action='store',
                         default="dsbox01.isi.edu", dest="cache_host",
                         help="cache host address, default is `dsbox01.isi.edu`"
                         )
-    parser.add_argument("--cache-port", nargs='?',  action='store',
+    parser.add_argument("--cache-port", nargs='?', action='store',
                         default="6379", dest="cache_port",
                         help="cache server port, default is `6379`"
                         )
     # query server
-    parser.add_argument("--query-server", nargs='?',  action='store',
+    parser.add_argument("--query-server", nargs='?', action='store',
                         default="", dest="query_server",
                         help="sparql query endpoint used for test_format input files, default is https://query.wikidata.org/sparql"
                         )
@@ -897,4 +869,3 @@ def str2bool(v):
 
 def run(**kwargs):
     main(**kwargs)
-

From 9f41543403ad6bfc6b25d186546aefc4534cdef0 Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Thu, 30 Apr 2020 18:08:53 -0700
Subject: [PATCH 040/278] update requirement.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 439daf31e..7d600bae5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,3 +13,4 @@ etk==2.2.1
 simplejson
 pyrallel.lib
 attrs
+redis
\ No newline at end of file

From 2634adc433dfeaeb811be28866554d3eba296a60 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 30 Apr 2020 20:22:38 -0700
Subject: [PATCH 041/278] Print with system-specific end-of-line.

---
 kgtk/exceptions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kgtk/exceptions.py b/kgtk/exceptions.py
index d6fea9582..c70e03797 100644
--- a/kgtk/exceptions.py
+++ b/kgtk/exceptions.py
@@ -45,11 +45,11 @@ def handle_exception(self, type_, exc_val, exc_tb):
             traceback.print_exception(type_, exc_val, exc_tb)  # the output goes to sys.stderr
 
         if isinstance(exc_val, KGTKException):
-            sys.stderr.write(exc_val.message)
+            print("%s" % exc_val.message, file=sys.stderr)
             return exc_val.return_code
 
         warnings.warn('Please raise KGTKException instead of {}'.format(type_))
-        sys.stderr.write(KGTKException.message)
+        print("%s" %KGTKException.message, file=sys.stderr)
         return KGTKException.return_code
 
 

From a81358521a1eb80860bd2fbd13a6b40708748182 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 30 Apr 2020 20:32:10 -0700
Subject: [PATCH 042/278] Improve date/time parsing. Improve error feedback.

---
 kgtk/cli/validate.py    |  2 +-
 kgtk/join/kgtkreader.py | 15 ++++++++++----
 kgtk/join/kgtkvalue.py  | 43 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 9d97293e9..3593ed784 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -213,5 +213,5 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
     except SystemExit as e:
         raise KGTKException("Exit requested")
     except Exception as e:
-        raise KGTKException(e)
+        raise KGTKException(str(e))
 
diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index d6a30a209..916a46abf 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -439,7 +439,10 @@ def _build_column_names(cls,
             # Read the column names from the first line, stripping end-of-line characters.
             #
             # TODO: if the read fails, throw a more useful exception with the line number.
-            header: str = next(source).rstrip("\r\n")
+            try:
+                header: str = next(source).rstrip("\r\n")
+            except StopIteration:
+                raise ValueError("No header line in file")
             if verbose:
                 print("header: %s" % header, file=error_file, flush=True)
 
@@ -449,7 +452,11 @@ def _build_column_names(cls,
             # Skip the first record to override the column names in the file.
             # Do not skip the first record if the file does not hae a header record.
             if skip_first_record:
-                next(source)
+                try:
+                    next(source)
+                except StopIteration:
+                    raise ValueError("No header line to skip")
+
             # Use the forced column names.
             return column_separator.join(force_column_names), force_column_names
 
@@ -479,7 +486,7 @@ def exclude_line(self, action: ValidationAction, msg: str, line: str)->bool:
         print("In input data line %d, %s: %s" % (self.data_lines_read, msg, line), file=self.error_file, flush=True)
         self.data_errors_reported += 1
         if self.error_limit > 0 and self.data_errors_reported >= self.error_limit:
-            raise ValueError("Too many data errors.")
+            raise ValueError("Too many data errors, exiting.")
         return result
 
     # This is both and iterable and an iterator object.
@@ -588,7 +595,7 @@ def _ignore_invalid_values(self, values: typing.List[str], line: str)->bool:
             if len(value) > 0: # Optimize the common case of empty columns.
                 kv: KgtkValue = KgtkValue(value)
                 if not kv.is_valid():
-                    problems.append("%s: %s" % (self.column_names[idx], kv.describe()))
+                    problems.append("col %d (%s) value '%s'is an %s" % (idx, self.column_names[idx], value, kv.describe()))
 
         if len(problems) == 0:
             return False
diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index c9369efd6..f03390101 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -274,7 +274,7 @@ def is_date_and_times(self, idx: typing.Optional[int] = None)->bool:
         v: str = self.get_item(idx)
         return v.startswith("^")
 
-    date_and_times_re: typing.Pattern = re.compile(r"^\^(?P<year>[0-9]{4})(?P<hyphen>-)?(?P<month>1[0-2]|0[1-9])(?(hyphen)-)(?P<day>3[01]|0[1-9]|[12][0-9])T(?P<hour>2[0-3]|[01][0-9])(?(hyphen):)(?P<minute>[0-5][0-9])(?(hyphen):)(?P<second>[0-5][0-9])(?P<zone>Z|\+[0-9][0-9](?::[0-9][0-9])?)?(?P<precision>/[0-9])?$")
+    date_and_times_re: typing.Pattern = re.compile(r"^\^(?P<year>[0-9]{4})(?:(?P<hyphen>-)?(?P<month>1[0-2]|0[1-9])(?:(?(hyphen)-)(?P<day>3[01]|0[1-9]|[12][0-9])))T(?P<hour>2[0-3]|[01][0-9])(?:(?(hyphen):)(?P<minute>[0-5][0-9])(?:(?(hyphen):)(?P<second>[0-5][0-9])))(?P<zone>Z|\[-+][0-9][0-9](?::[0-9][0-9])?)?(?P<precision>/[0-1]?[0-9])?$")
 
     def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
         """
@@ -282,6 +282,47 @@ def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
         Otherwise, return True if the value looks like valid date and times
         literal based on ISO-8601.
 
+        Valid date formats:
+        YYYY
+        YYYY-MM
+        YYYYMMDD
+        YYYY-MM-DD
+
+        Valid date and time formats
+        YYMMDDTHH
+        YYYY-MM-DDTHH
+        YYMMDDTHHMM
+        YYYY-MM-DDTHH:MM
+        YYMMDDTHHMMSS
+        YYYY-MM-DDTHH:MM:SS
+
+        Optional Time Zone suffix for date and time:
+        Z
+        +HH
+        -HH
+        +HHMM
+        -HHMM
+        +HH:MM
+        -HH:MM
+
+        NOTE: This code also accepts the following, which are disallowed by the standard:
+        YYYYT...
+        YYYYMM
+        YYYYMMT...
+        YYYY-MMT...
+
+        Note:  IS0-8601 disallows 0 for month or day, e.g.:
+        Invalid                   Correct
+        1960-00-00T00:00:00Z/9    1960-01-01T00:00:00Z/9
+
+        TODO: Support fractional time elements
+
+        TODO: Support week dates.
+
+        TODO: Support ordinal dates
+
+        TODO: Support Unicode minus sign as well as ASCII minus sign.
+
         TODO: validate the calendar date, eg fail if 31-Apr-2020.
         """
         if self.is_list() and idx is None:

From cdeb056faa6a559c59700bc32529c70ca8ec701e Mon Sep 17 00:00:00 2001
From: filievski <filip.dbrsk@gmail.com>
Date: Fri, 1 May 2020 09:54:42 -0700
Subject: [PATCH 043/278] gt_loader is way cleaner now

---
 kgtk/cli/gt_loader.py     | 39 ++++++++++++++++++++++++++++-----------
 kgtk/gt/analysis_utils.py |  4 ++--
 2 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/kgtk/cli/gt_loader.py b/kgtk/cli/gt_loader.py
index e1121cd5e..650a1c06e 100644
--- a/kgtk/cli/gt_loader.py
+++ b/kgtk/cli/gt_loader.py
@@ -16,10 +16,6 @@ def add_arguments(parser):
             parser (argparse.ArgumentParser)
     """
     parser.add_argument(action="store", type=str, dest="filename", metavar='filename', help='filename here')
-    parser.add_argument("--header", action="store_true", dest="header_bool", help="Does the file contain a header in its first row")
-    parser.add_argument("--subj", action="store", type=int, dest="sub", help='Column in which the subject is given, default 0', default=0)
-    parser.add_argument("--obj", action="store", type=int, dest="obj", help='Column in which the subject is given, default 2', default=2)
-    parser.add_argument('--pred', action='store', type=str, dest="props", help="Edge properties to store in their order of appearance - comma-separated string.")
     parser.add_argument('--directed', action='store_true', dest="directed", help="Is the graph directed or not?")
     parser.add_argument('--degrees', action='store_true', dest='compute_degrees', help="Whether or not to compute degree distribution.")
     parser.add_argument('--pagerank', action='store_true', dest='compute_pagerank', help="Whether or not to compute PageRank centraility.")
@@ -27,7 +23,19 @@ def add_arguments(parser):
     parser.add_argument('--log', action='store', type=str, dest='log_file', help='Log file for summarized statistics of the graph.', default="./log.txt")
     parser.add_argument('-o', '--out', action='store', type=str, dest='output', help='Graph tool file to dump the graph too - if empty, it will not be saved.')
 
-def run(filename, header_bool, sub, obj, props, directed, compute_degrees, compute_pagerank, compute_hits, log_file, output):
+def run(filename, directed, compute_degrees, compute_pagerank, compute_hits, log_file, output):
+
+	def infer_index(h, options=[]):
+		for o in options:
+			if o in h:
+				return h.index(o)
+		return -1
+	
+	def infer_predicate(h, options=[]):
+		for o in options:
+			if o in h:
+				return o
+		return ''
 
 	try:
 		# import modules locally
@@ -42,22 +50,31 @@ def run(filename, header_bool, sub, obj, props, directed, compute_degrees, compu
 		directions=['in', 'out', 'total']
 		id_col='name'
 
-		p=props.split(',')
-		predicate=p[0]
+		with open(filename, 'r') as f:
+			header=next(f).split('\t')
+			subj_index=infer_index(header, options=['node1', 'subject'])
+			obj_index=infer_index(header, options=['node2', 'object', 'value'])
+			predicate=infer_predicate(header, options=['property', 'predicate', 'label'])
+
+			p=[]
+			for i, header_col in enumerate(header):
+				if i in [subj_index, obj_index]: continue
+				p.append(header_col)
+
 		with open(log_file, 'w') as writer:
 
 			writer.write('loading the TSV graph now ...\n')
 			G2 = load_graph_from_csv(filename, 
-									skip_first=header_bool, 
+									skip_first=True, 
 									directed=directed, 
 									hashed=True, 
-									ecols=[sub,obj],
-									eprop_names=props.split(','), 
+									ecols=[subj_index,obj_index],
+									eprop_names=p, 
 									csv_options={'delimiter': '\t'})
 
 			writer.write('graph loaded! It has %d nodes and %d edges\n' % (G2.num_vertices(), G2.num_edges()))		
 			writer.write('\n###Top relations:\n')
-			for rel, freq in gtanalysis.get_topN_relations(G2):
+			for rel, freq in gtanalysis.get_topN_relations(G2, pred_property=predicate):
 				writer.write('%s\t%d\n' % (rel, freq))
 
 			if compute_degrees:
diff --git a/kgtk/gt/analysis_utils.py b/kgtk/gt/analysis_utils.py
index f4f9e89c7..d2f3da9c1 100644
--- a/kgtk/gt/analysis_utils.py
+++ b/kgtk/gt/analysis_utils.py
@@ -81,9 +81,9 @@ def compute_stats(g, direction):
             'stdev_degree': stdev_degree
             }
 
-def get_topN_relations(g, N=10):
+def get_topN_relations(g, N=10, pred_property='predicate'):
     rel_freq=defaultdict(int)
     for i, e in enumerate(g.edges()):
-        r=g.edge_properties['predicate'][e]
+        r=g.edge_properties[pred_property][e]
         rel_freq[r]+=1
     return sorted(rel_freq.items(), key=lambda x: x[1], reverse=True)[:N]

From c1bb5f757a5c95021b7039b50c0422b8c8728104 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Fri, 1 May 2020 14:51:20 -0700
Subject: [PATCH 044/278] Support two or three character language codes.  Relax
 the constraints on location coordinates.

---
 kgtk/join/kgtkvalue.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index f03390101..b0b83b040 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -188,7 +188,8 @@ def is_language_qualified_string(self, idx: typing.Optional[int] = None)->bool:
         v: str = self.get_item(idx)
         return v.startswith("'")
 
-    language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'(?:[^']|\\.)*')@(?P<lang>[a-zA-Z][a-zA-Z])$")
+    # Support two or three character language codes.
+    language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'(?:[^']|\\.)*')@(?P<lang>[a-zA-Z]{2,3})$")
 
     def is_valid_language_qualified_string(self, idx: typing.Optional[int] = None)->bool:
         """
@@ -207,7 +208,12 @@ def is_valid_language_qualified_string(self, idx: typing.Optional[int] = None)->
         lang: str = m.group("lang")
         # print("lang: %s" % lang)
         try:
-            languages.get(alpha2=lang.lower())
+            if len(lang) == 2:
+                # Two-character language codes.
+                languages.get(alpha2=lang.lower())
+            else:
+                # Three-character language codes.
+                languages.get(bibliographic=lang.lower())
             return True
         except KeyError:
             return False
@@ -223,7 +229,8 @@ def is_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
         v: str = self.get_item(idx)
         return v.startswith("@")
 
-    location_coordinates_re: typing.Pattern = re.compile(r"^@(?P<lat>[-+]?\d{3}\.\d{5})/(?P<lon>[-+]?\d{3}\.\d{5})$")
+    #location_coordinates_re: typing.Pattern = re.compile(r"^@(?P<lat>[-+]?\d{3}\.\d{5})/(?P<lon>[-+]?\d{3}\.\d{5})$")
+    location_coordinates_re: typing.Pattern = re.compile(r"^@(?P<lat>[-+]?(?:\d+(?:\.\d*)?)|(?:\.\d+))/(?P<lon>[-+]?(?:\d+(?:\.\d*)?)|(?:\.\d+))$")
 
     def is_valid_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
         """

From 624ec231331aec3c224d27e25fee94b28b09c489 Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Fri, 1 May 2020 16:29:38 -0700
Subject: [PATCH 045/278] update sentence generating algorithm

---
 kgtk/cli/text_embedding.py | 203 ++++++++++++++++++++++---------------
 1 file changed, 123 insertions(+), 80 deletions(-)

diff --git a/kgtk/cli/text_embedding.py b/kgtk/cli/text_embedding.py
index 77004a2d9..35f8e8c35 100644
--- a/kgtk/cli/text_embedding.py
+++ b/kgtk/cli/text_embedding.py
@@ -65,11 +65,12 @@ def __init__(self, model_name=None, query_server=None, cache_config: dict = {}):
         self.qnodes_descriptions = dict()
         self.vectors_map = dict()
         self.property_labels_dict = dict()
+        self.q_node_to_label = dict()
+        self.node_labels = dict()
         self.vectors_2D = None
+        self.vector_dump_file = None
         self.gt_nodes = set()
         self.candidates = defaultdict(dict)
-        self.vector_dump_file = None
-        self.q_node_to_label = dict()
         self.metadata = []
         self.gt_indexes = set()
         self.input_format = ""
@@ -116,6 +117,99 @@ def send_sparql_query(self, query_body: str):
         except:
             raise KGTKException("Sending Sparql query to {} failed!".format(self.wikidata_server))
 
+    def _get_labels(self, nodes: typing.List[str]):
+        query_nodes = " ".join(["wd:{}".format(each) for each in nodes])
+        query = """
+        select ?item ?nodeLabel
+        where { 
+          values ?item {""" + query_nodes + """}
+          ?item rdfs:label ?nodeLabel.
+          FILTER(LANG(?nodeLabel) = "en").
+        }
+        """
+        results2 = self.send_sparql_query(query)
+        for each_res in results2:
+            node_id = each_res['item']['value'].split("/")[-1]
+            value = each_res['nodeLabel']['value']
+            self.node_labels[node_id] = value
+
+    def _get_labels_and_descriptions(self, query_qnodes: str, need_find_label: bool, need_find_description: bool):
+        query_body = """
+            select ?item ?itemDescription ?itemLabel
+            where {
+              values ?item {""" + query_qnodes + """ }
+                 SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
+            }
+        """
+        results = self.send_sparql_query(query_body)
+        for each in results:
+            each_node = each['item']['value'].split("/")[-1]
+            if 'itemDescription' in each:
+                description = each['itemDescription']['value']
+            else:
+                description = ""
+            if "itemLabel" in each:
+                label = each['itemLabel']['value']
+            else:
+                label = ""
+            if need_find_label:
+                self.candidates[each_node]["label_properties"] = [label]
+            if need_find_description:
+                self.candidates[each_node]["description_properties"] = [description]
+
+    def _get_property_values(self, query_qnodes, query_part_names, query_part_properties):
+        used_p_node_ids = set()
+        for part_name, part in zip(query_part_names, query_part_properties):
+            if part_name == "isa_properties":
+                self._get_labels(part)
+            for i, each in enumerate(part):
+                if each not in {"label", "description", "all"}:
+                    query_body2 = """
+                    select ?item ?eachPropertyLabel
+                    where {{
+                      values ?item {{{all_nodes}}}
+                    ?item wdt:{qnode} ?eachProperty.
+                      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
+                    }}
+                    """.format(all_nodes=query_qnodes, qnode=each)
+                    results2 = self.send_sparql_query(query_body2)
+
+                    for each_res in results2:
+                        node_id = each_res['item']['value'].split("/")[-1]
+                        value = each_res['eachPropertyLabel']['value']
+                        if part_name == "isa_properties" and self.node_labels[each].endswith("of"):
+                            value = self.node_labels[each] + "||" + value
+                        used_p_node_ids.add(node_id)
+                        if part_name in self.candidates[node_id]:
+                            self.candidates[node_id][part_name] = value
+                        else:
+                            self.candidates[node_id][part_name] = {value}
+        return used_p_node_ids
+
+    def _get_all_properties(self, query_qnodes, used_p_node_ids, properties_list):
+        has_properties_set = set(properties_list[3])
+        query_body3 = """
+                            select DISTINCT ?item ?p_entity ?p_entityLabel
+                            where {
+                              values ?item {""" + query_qnodes + """}
+                              ?item ?p ?o.
+                              FILTER regex(str(?p), "^http://www.wikidata.org/prop/P", "i")
+                              BIND (IRI(REPLACE(STR(?p), "http://www.wikidata.org/prop", "http://www.wikidata.org/entity")) AS ?p_entity) .
+                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
+                            }
+                        """
+        results3 = self.send_sparql_query(query_body3)
+        for each in results3:
+            node_name = each['item']['value'].split("/")[-1]
+            p_node_id = each['p_entity']['value'].split("/")[-1]
+            p_node_label = each['p_entityLabel']['value']
+            if p_node_id not in used_p_node_ids:
+                if properties_list[3] == ["all"] or p_node_id in has_properties_set:
+                    if "has_properties" in self.candidates[node_name]:
+                        self.candidates[node_name]["has_properties"].add(p_node_label)
+                    else:
+                        self.candidates[node_name]["has_properties"] = {p_node_label}
+
     def get_item_description(self, qnodes: typing.List[str] = None, target_properties: dict = {}):
         """
             use sparql query to get the descriptions of given Q nodes
@@ -127,7 +221,6 @@ def get_item_description(self, qnodes: typing.List[str] = None, target_propertie
         else:
             find_all_properties = False
         properties_list = [[] for _ in range(4)]
-        used_p_node_ids = set()
         names = ["labels", "descriptions", "isa_properties", "has_properties"]
         for k, v in target_properties.items():
             if v == "label_properties":
@@ -159,80 +252,23 @@ def get_item_description(self, qnodes: typing.List[str] = None, target_propertie
 
             # this is used to get corresponding labels / descriptions
             if need_find_label or need_find_description:
-                query_body = """
-                    select ?item ?itemDescription ?itemLabel
-                    where {
-                      values ?item {""" + query_qnodes + """ }
-                         SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
-                    }
-                """
-                results = self.send_sparql_query(query_body)
-                for each in results:
-                    each_node = each['item']['value'].split("/")[-1]
-                    if 'itemDescription' in each:
-                        description = each['itemDescription']['value']
-                    else:
-                        description = ""
-                    if "itemLabel" in each:
-                        label = each['itemLabel']['value']
-                    else:
-                        label = ""
-                    if need_find_label:
-                        self.candidates[each_node]["label_properties"] = [label]
-                    if need_find_description:
-                        self.candidates[each_node]["description_properties"] = [description]
-
-            # this is used to get corresponding P node labels
-            query_body2 = "select ?item"
-            part2 = ""
-            for name, part in zip(names, properties_list):
-                for i, each in enumerate(part):
-                    if each not in {"label", "description", "all"}:
-                        used_p_node_ids.add(each)
-                        query_body2 += " ?{}_{}Label".format(name, i)
-                        part2 += """?item wdt:{} ?{}_{}. \n""".format(each, name, i)
-            query_body2 += """
-                        where {
-                          values ?item {""" + query_qnodes + "}"
-
-            query_body2 += part2 + """
-                             SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
-                        }
-            """
-            results2 = self.send_sparql_query(query_body2)
-            for each in results2:
-                node_name = each['item']['value'].split("/")[-1]
-                for name, part in zip(names, properties_list):
-                    if len(part) > 0:
-                        properties_res = set()
-                        for i in range(len(part)):
-                            property_key = '{}_{}Label'.format(name, i)
-                            if property_key in each:
-                                properties_res.add(each[property_key]['value'])
-                        self.candidates[node_name][name] = properties_res
+                self._get_labels_and_descriptions(query_qnodes, need_find_label, need_find_description)
+
+            if len(properties_list[3]) > len(qnodes):
+                # in this condition, we have too many properties need to be queried, it will waste time
+                # query to get all properties then filtering would save more times
+                find_all_properties = True
+                query_part2_names = names[:3]
+                query_part2_properties = properties_list[:3]
+            else:
+                query_part2_names = names
+                query_part2_properties = properties_list
+            # this is used to get corresponding labels of properties values
+            used_p_node_ids = self._get_property_values(query_qnodes, query_part2_names, query_part2_properties)
 
             # if need get all properties, we need to run extra query
             if find_all_properties:
-                query_body3 = """
-                    select DISTINCT ?item ?p_entity ?p_entityLabel
-                    where {
-                      values ?item {""" + query_qnodes + """}
-                      ?item ?p ?o.
-                      FILTER regex(str(?p), "^http://www.wikidata.org/prop/P", "i")
-                      BIND (IRI(REPLACE(STR(?p), "http://www.wikidata.org/prop", "http://www.wikidata.org/entity")) AS ?p_entity) .
-                      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
-                    }
-                """
-                results3 = self.send_sparql_query(query_body3)
-                for each in results3:
-                    node_name = each['item']['value'].split("/")[-1]
-                    p_node_id = each['p_entity']['value'].split("/")[-1]
-                    p_node_label = each['p_entityLabel']['value']
-                    if p_node_id not in used_p_node_ids:
-                        if "has_properties" in self.candidates[node_name]:
-                            self.candidates[node_name]["has_properties"].add(p_node_label)
-                        else:
-                            self.candidates[node_name]["has_properties"] = {p_node_label}
+                self._get_all_properties(query_qnodes, used_p_node_ids, properties_list)
 
         for each_node_id in qnodes:
             each_sentence = self.attribute_to_sentence(self.candidates[each_node_id], each_node_id)
@@ -260,7 +296,6 @@ def read_input(self, file_path: str, skip_nodes_set: set = None,
         if input_format == "test_format":
             self.input_format = input_format
             input_df = pd.read_csv(file_path)
-            candidates = {}
             gt = {}
             count = 0
             if "GT_kg_id" in input_df.columns:
@@ -297,7 +332,7 @@ def read_input(self, file_path: str, skip_nodes_set: set = None,
                 temp = set(temp) - to_remove_q
                 count += len(temp)
                 self.gt_nodes.add(each[gt_column_id])
-                self.get_item_description(temp, target_properties, label)
+                self.get_item_description(temp, target_properties)
 
             self._logger.info("Totally {} rows with {} candidates loaded.".format(str(len(gt)), str(count)))
 
@@ -396,12 +431,20 @@ def attribute_to_sentence(self, v, node_id=None):
             concated_sentence += self.get_real_label_name(v["description_properties"][0])
         if "isa_properties" in v and len(v["isa_properties"]) > 0:
             have_isa_properties = True
-            temp = [self.get_real_label_name(each) for each in v["isa_properties"]]
-            if concated_sentence != "" and temp[0] != "":
+            temp = ""
+            for each in v["isa_properties"]:
+                each = self.get_real_label_name(each)
+                if "||" in each:
+                    if "instance of" in each:
+                        each = each.split("||")[1]
+                    else:
+                        each = each.replace("||", " ")
+                temp += each + ", "
+            if concated_sentence != "" and temp != "":
                 concated_sentence += " is a "
-            elif temp[0] != "":
+            elif concated_sentence == "":
                 concated_sentence += "It is a "
-            concated_sentence += ", ".join(temp)
+            concated_sentence += temp[:-2]
         if "has_properties" in v and len(v["has_properties"]) > 0:
             temp = [self.get_real_label_name(each) for each in v["has_properties"]]
             if concated_sentence != "" and temp[0] != "":

From 37572a41202ef022d218cb9e86c915601290a37e Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Fri, 1 May 2020 17:34:34 -0700
Subject: [PATCH 046/278] Validate numbers and quantities.

---
 kgtk/join/kgtkvalue.py | 188 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 179 insertions(+), 9 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index b0b83b040..cdf235316 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -62,10 +62,10 @@ def is_empty(self, idx: typing.Optional[int] = None)->bool:
         v: str = self.get_item(idx)
         return len(v) == 0
 
-    def is_number(self, idx: typing.Optional[int] = None)->bool:
+    def is_number_old(self, idx: typing.Optional[int] = None)->bool:
         """
         Return False if this value is a list and idx is None.
-        Otherwise, return True if the first character is 0-9,_,-,. .
+        Otherwise, return True if the first character is 0-9,+,-,. .
         """
         if self.is_list() and idx is None:
             return False
@@ -73,7 +73,7 @@ def is_number(self, idx: typing.Optional[int] = None)->bool:
         v: str = self.get_item(idx)
         return v.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", "."))
     
-    def is_valid_number(self, idx: typing.Optional[int] = None)->bool:
+    def is_valid_number_old(self, idx: typing.Optional[int] = None)->bool:
         """
         Return False if this value is a list and idx is None.
         Otherwise, return True if the first character is 0-9,_,-,.
@@ -110,6 +110,174 @@ def is_valid_number(self, idx: typing.Optional[int] = None)->bool:
                 return False
         
     
+    def is_number_or_quantity(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the first character is 0-9,+,-,. .
+        """
+        if self.is_list() and idx is None:
+            return False
+        
+        v: str = self.get_item(idx)
+        return v.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", "."))
+    
+    # The following lexical analysis is based on:
+    # https://docs.python.org/3/reference/lexical_analysis.html
+
+    # The long integer suffix was part of Python 2.  It was dropped in Python 3.
+    long_suffix_pat: str = r'[lL]'
+
+    plus_or_minus_pat: str = r'[-+]'
+
+    # Integer literals.
+    #
+    # Decimal integers, allowing leading zeros.
+    digit_pat: str = r'[0-9]'
+    decinteger_pat: str = r'(?:{digit}(?:_?{digit})*{long_suffix}?)'.format(digit=digit_pat,
+                                                                            long_suffix=long_suffix_pat)
+    bindigit_pat: str = r'[01]'
+    bininteger_pat: str = r'(?:0[bB](":_?{bindigit})+{long_suffix})'.format(bindigit=bindigit_pat,
+                                                                            long_suffix=long_suffix_pat)
+    octdigit_pat: str = r'[0-7]'
+    octinteger_pat: str = r'(?:0[oO](":_?{octdigit})+{long_suffix})'.format(octdigit=octdigit_pat,
+                                                                            long_suffix=long_suffix_pat)
+    hexdigit_pat: str = r'[0-7a-fA-F]'
+    hexinteger_pat: str = r'(?:0[xX](":_?{hexdigit})+{long_suffix})'.format(hexdigit=hexdigit_pat,
+                                                                            long_suffix=long_suffix_pat)
+     
+    integer_pat: str = r'(?:{decinteger}|{bininteger}|{octinteger}|{hexinteger})'.format(decinteger=decinteger_pat,
+                                                                                        bininteger=bininteger_pat,
+                                                                                        octinteger=octinteger_pat,
+                                                                                        hexinteger=hexinteger_pat)
+
+    # Floating point literals.
+    digitpart_pat: str = r'(?:{digit}(?:_?{digit})*)'.format(digit=digit_pat)
+    fraction_pat: str = r'(?:\.{digitpart})'.format(digitpart=digitpart_pat)
+    pointfloat_pat: str = r'(?:{digitpart}?{fraction})|(?:{digitpart}\.)'.format(digitpart=digitpart_pat,
+                                                                                 fraction=fraction_pat)
+    exponent_pat: str = r'(?:[eE]{plus_or_minus}?{digitpart})'.format(plus_or_minus=plus_or_minus_pat,
+                                                                      digitpart=digitpart_pat)
+    exponentfloat_pat: str = r'(?:{digitpart}|{pointfloat}){exponent}'.format(digitpart=digitpart_pat,
+                                                                              pointfloat=pointfloat_pat,
+                                                                              exponent=exponent_pat)
+    floatnumber_pat: str = r'(?:{pointfloat}|{exponentfloat})'.format(pointfloat=pointfloat_pat,
+                                                                      exponentfloat=exponentfloat_pat)
+
+    # Imaginary literals.
+    imagnumber_pat: str = r'(?:{floatnumber}|{digitpart})[jJ]'.format(floatnumber=floatnumber_pat,
+                                                                      digitpart=digitpart_pat)
+
+    # Numeric literals.
+    numeric_pat: str = r'(?:{plus_or_minus}?(?:{integer}|{floatnumber}|{imagnumber}))'.format(plus_or_minus=plus_or_minus_pat,
+                                                                                              integer=integer_pat,
+                                                                                              floatnumber=floatnumber_pat,
+                                                                                              imagnumber=imagnumber_pat)
+
+    # Tolerances
+    tolerance_pat: str = r'(?:\[{numeric},{numeric}\])'.format(numeric=numeric_pat)
+
+    # SI units taken from:
+    # http://www.csun.edu/~vceed002/ref/measurement/units/units.pdf
+    #
+    # Note: if Q were in this list, it would conflict with Wikidata nodes (below).
+    si_unit_pat: str = r'(?:m|kg|s|C|K|mol|cd|F|M|A|N|ohms|V|J|Hz|lx|H|Wb|V\W|Pa)'
+    si_power_pat: str = r'(?:-1|2|3)' # Might need more.
+    si_combiner_pat: str = r'[./]'
+    si_pat: str = r'(?:{si_unit}{si_power}?(?:{si_combiner}{si_unit}{si_power}?)*)'.format(si_unit=si_unit_pat,
+                                                                                           si_combiner=si_combiner_pat,
+                                                                                           si_power=si_power_pat)
+    # Wikidata nodes (for units):
+    nonzero_digit_pat: str = r'[1-9]'
+    wikidata_node_pat: str = r'(?:Q{nonzero_digit}{digit}*)'.format(nonzero_digit=nonzero_digit_pat,
+                                                                    digit=digit_pat)
+
+    units_pat: str = r'(?:{si}|{wikidata_node})'.format(si=si_pat,
+                                                        wikidata_node=wikidata_node_pat)
+    
+
+    # This definition matches numbers or quantities.
+    number_or_quantity_pat: str = r'{numeric}{tolerance}?{units}?'.format(numeric=numeric_pat,
+                                                                          tolerance=tolerance_pat,
+                                                                          units=units_pat)
+    # This definition for quantity excludes plain numbers.
+    quantity_pat: str = r'{numeric}(?:(?:{tolerance}{units}?)|{units})'.format(numeric=numeric_pat,
+                                                                               tolerance=tolerance_pat,
+                                                                               units=units_pat)
+    # This matches numbers or quantities.
+    number_or_quantity_re: typing.Pattern = re.compile(r'^' + number_or_quantity_pat + r'$')
+
+    # This matches numbers but not quantities.
+    number_re: typing.Pattern = re.compile(r'^' + numeric_pat + r'$')
+
+    # This matches quantities excluding numbers.
+    quantity_re: typing.Pattern = re.compile(r'^' + quantity_pat + r'$')
+
+    def is_valid_number_or_quantity(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the first character is 0-9,_,-,.
+        and it is either a Python-compatible number or an enhanced
+        quantity.
+        """
+        if self.is_list() and idx is None:
+            return False
+        
+        v: str = self.get_item(idx)
+        if not v.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", ".")):
+            return False
+
+        m: typing.Optional[typing.Match] = KgtkValue.number_or_quantity_re.match(v)
+        return m is not None
+        
+    
+    def is_valid_number(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the first character is 0-9,_,-,.
+        and it is a Python-compatible number (with optional limited enhancements).
+
+        Examples:
+        1
+        123
+        -123
+        +123
+        0b101
+        0o277
+        0x24F
+        .4
+        0.4
+        10.
+        10.4
+        10.4e10
+        """
+        if self.is_list() and idx is None:
+            return False
+        
+        v: str = self.get_item(idx)
+        if not v.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", ".")):
+            return False
+
+        m: typing.Optional[typing.Match] = KgtkValue.number_re.match(v)
+        return m is not None
+        
+    
+    def is_valid_quantity(self, idx: typing.Optional[int] = None)->bool:
+        """
+        Return False if this value is a list and idx is None.
+        Otherwise, return True if the first character is 0-9,_,-,.
+        and it is an enhanced quantity.
+        """
+        if self.is_list() and idx is None:
+            return False
+        
+        v: str = self.get_item(idx)
+        if not v.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", ".")):
+            return False
+
+        m: typing.Optional[typing.Match] = KgtkValue.quantity_re.match(v)
+        return m is not None
+        
+    
     def is_string(self, idx: typing.Optional[int] = None)->bool:
         """
         Return False if this value is a list and idx is None.
@@ -163,7 +331,7 @@ def is_symbol(self, idx: typing.Optional[int] = None)->bool:
         if self.is_list() and idx is None:
             return False
 
-        return not (self.is_number(idx) or self.is_string(idx) or self.is_structured_literal(idx))
+        return not (self.is_number_or_quantity(idx) or self.is_string(idx) or self.is_structured_literal(idx))
 
     def is_boolean(self, idx: typing.Optional[int] = None)->bool:
         """
@@ -361,8 +529,8 @@ def is_valid_literal(self, idx: typing.Optional[int] = None)->bool:
 
         if self.is_string(idx):
             return self.is_valid_string(idx)
-        elif self.is_number(idx):
-            return self.is_valid_number(idx)
+        elif self.is_number_or_quantity(idx):
+            return self.is_valid_number_or_quantity(idx)
         elif self.is_structured_literal(idx):
             if self.is_language_qualified_string(idx):
                 return self.is_valid_language_qualified_string(idx)
@@ -373,7 +541,7 @@ def is_valid_literal(self, idx: typing.Optional[int] = None)->bool:
             elif self.is_extension(idx):
                 return False # no validation presently available.
             else:
-                return False # Quantities will reach here at present.
+                return False # Shouldn't get here.
         else:
             return False
 
@@ -422,11 +590,13 @@ def describe(self, idx: typing.Optional[int] = None)->str:
                 return "String"
             else:
                 return "Invalid String"
-        elif self.is_number(idx):
+        elif self.is_number_or_quantity(idx):
             if self.is_valid_number(idx):
                 return "Number"
+            elif self.is_valid_quantity(idx):
+                return "Quantity"
             else:
-                return "Invalid Number"
+                return "Invalid Number or Quantity"
         elif self.is_structured_literal(idx):
             if self.is_language_qualified_string(idx):
                 if self.is_valid_language_qualified_string(idx):

From c0762dc609af5fd0021b50a3af3ad79471a634ae Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Fri, 1 May 2020 17:43:17 -0700
Subject: [PATCH 047/278] add option to output stats only

---
 kgtk/cli/gt_loader.py | 224 ++++++++++++++++++++++--------------------
 1 file changed, 117 insertions(+), 107 deletions(-)

diff --git a/kgtk/cli/gt_loader.py b/kgtk/cli/gt_loader.py
index 650a1c06e..bfc5da2b9 100644
--- a/kgtk/cli/gt_loader.py
+++ b/kgtk/cli/gt_loader.py
@@ -17,110 +17,120 @@ def add_arguments(parser):
     """
     parser.add_argument(action="store", type=str, dest="filename", metavar='filename', help='filename here')
     parser.add_argument('--directed', action='store_true', dest="directed", help="Is the graph directed or not?")
-    parser.add_argument('--degrees', action='store_true', dest='compute_degrees', help="Whether or not to compute degree distribution.")
-    parser.add_argument('--pagerank', action='store_true', dest='compute_pagerank', help="Whether or not to compute PageRank centraility.")
-    parser.add_argument('--hits', action='store_true', dest='compute_hits', help="Whether or not to compute HITS centraility.")
-    parser.add_argument('--log', action='store', type=str, dest='log_file', help='Log file for summarized statistics of the graph.', default="./log.txt")
-    parser.add_argument('-o', '--out', action='store', type=str, dest='output', help='Graph tool file to dump the graph too - if empty, it will not be saved.')
-
-def run(filename, directed, compute_degrees, compute_pagerank, compute_hits, log_file, output):
-
-	def infer_index(h, options=[]):
-		for o in options:
-			if o in h:
-				return h.index(o)
-		return -1
-	
-	def infer_predicate(h, options=[]):
-		for o in options:
-			if o in h:
-				return o
-		return ''
-
-	try:
-		# import modules locally
-		import socket
-		from graph_tool import load_graph_from_csv
-		from graph_tool import centrality
-		import kgtk.gt.analysis_utils as gtanalysis
-		from kgtk.exceptions import KGTKException
-		import sys
-
-		# hardcoded values useful for the script. Perhaps some of them should be exposed as arguments later
-		directions=['in', 'out', 'total']
-		id_col='name'
-
-		with open(filename, 'r') as f:
-			header=next(f).split('\t')
-			subj_index=infer_index(header, options=['node1', 'subject'])
-			obj_index=infer_index(header, options=['node2', 'object', 'value'])
-			predicate=infer_predicate(header, options=['property', 'predicate', 'label'])
-
-			p=[]
-			for i, header_col in enumerate(header):
-				if i in [subj_index, obj_index]: continue
-				p.append(header_col)
-
-		with open(log_file, 'w') as writer:
-
-			writer.write('loading the TSV graph now ...\n')
-			G2 = load_graph_from_csv(filename, 
-									skip_first=True, 
-									directed=directed, 
-									hashed=True, 
-									ecols=[subj_index,obj_index],
-									eprop_names=p, 
-									csv_options={'delimiter': '\t'})
-
-			writer.write('graph loaded! It has %d nodes and %d edges\n' % (G2.num_vertices(), G2.num_edges()))		
-			writer.write('\n###Top relations:\n')
-			for rel, freq in gtanalysis.get_topN_relations(G2, pred_property=predicate):
-				writer.write('%s\t%d\n' % (rel, freq))
-
-			if compute_degrees:
-				writer.write('\n###Degrees:\n')
-				for direction in directions:
-					degree_data=gtanalysis.compute_node_degree_hist(G2, direction)
-					max_degree=len(degree_data)-1
-					mean_degree, std_degree= gtanalysis.compute_avg_node_degree(G2, direction)
-					writer.write('%s degree stats: mean=%f, std=%f, max=%d\n' % (direction, mean_degree, std_degree, max_degree))
-
-			if compute_pagerank:
-				writer.write('\n###PageRank\n')
-				v_pr = G2.new_vertex_property('float')
-				centrality.pagerank(G2, prop=v_pr)
-				G2.properties[('v', 'vertex_pagerank')] = v_pr 
-				writer.write('Max pageranks\n')
-				result=gtanalysis.get_topn_indices(G2, 'vertex_pagerank', 5, id_col)
-				for n_id, n_label, pr in result:
-					writer.write('%s\t%s\t%f\n' % (n_id, n_label, pr))
-
-			if compute_hits:
-				writer.write('\n###HITS\n')
-				hits_eig, G2.vp['vertex_hubs'], G2.vp['vertex_auth']=gtanalysis.compute_hits(G2)
-				writer.write('HITS hubs\n')
-				main_hubs=gtanalysis.get_topn_indices(G2, 'vertex_hubs', 5, id_col)
-				for n_id, n_label, hubness in main_hubs:
-					writer.write('%s\t%s\t%f\n' % (n_id, n_label, hubness))
-				writer.write('HITS auth\n')
-				main_auth=gtanalysis.get_topn_indices(G2, 'vertex_auth', 5, id_col)
-				for n_id, n_label, authority in main_auth:
-					writer.write('%s\t%s\t%f\n' % (n_id, n_label, authority))
-
-			for e in G2.edges():
-				sid, oid=e
-				lbl=G2.ep[predicate][e]
-				sys.stdout.write('%s\t%s\t%s\n' % (G2.vp[id_col][sid], lbl, G2.vp[id_col][oid]))
-
-			for v in G2.vertices():
-				v_id=G2.vp[id_col][v]
-				for vprop in G2.vertex_properties.keys():
-					if vprop==id_col: continue
-					sys.stdout.write('%s\t%s\t%s\n' % (v_id, vprop, G2.vp[vprop][v]))
-
-			if output:
-					writer.write('now saving the graph to %s\n' % output)
-					G2.save(output)
-	except Exception as e:
-		raise KGTKException('Error: ' + str(e))
-	
+    parser.add_argument('--degrees', action='store_true', dest='compute_degrees',
+                        help="Whether or not to compute degree distribution.")
+    parser.add_argument('--pagerank', action='store_true', dest='compute_pagerank',
+                        help="Whether or not to compute PageRank centraility.")
+    parser.add_argument('--hits', action='store_true', dest='compute_hits',
+                        help="Whether or not to compute HITS centraility.")
+    parser.add_argument('--log', action='store', type=str, dest='log_file',
+                        help='Log file for summarized statistics of the graph.', default="./log.txt")
+    parser.add_argument('-o', '--out', action='store', type=str, dest='output',
+                        help='Graph tool file to dump the graph too - if empty, it will not be saved.')
+    parser.add_argument('--output-stats', action='store_true', dest='output_stats',
+                        help='do not output the graph but statistics only')
+
+
+
+def run(filename, directed, compute_degrees, compute_pagerank, compute_hits, log_file, output, output_stats):
+    from kgtk.exceptions import KGTKException
+    def infer_index(h, options=[]):
+        for o in options:
+            if o in h:
+                return h.index(o)
+        return -1
+
+    def infer_predicate(h, options=[]):
+        for o in options:
+            if o in h:
+                return o
+        return ''
+
+    try:
+        # import modules locally
+        import socket
+        from graph_tool import load_graph_from_csv
+        from graph_tool import centrality
+        import kgtk.gt.analysis_utils as gtanalysis
+        import sys
+
+        # hardcoded values useful for the script. Perhaps some of them should be exposed as arguments later
+        directions = ['in', 'out', 'total']
+        id_col = 'name'
+
+        with open(filename, 'r') as f:
+            header = next(f).split('\t')
+            subj_index = infer_index(header, options=['node1', 'subject'])
+            obj_index = infer_index(header, options=['node2', 'object', 'value'])
+            predicate = infer_predicate(header, options=['property', 'predicate', 'label'])
+
+            p = []
+            for i, header_col in enumerate(header):
+                if i in [subj_index, obj_index]: continue
+                p.append(header_col)
+
+        with open(log_file, 'w') as writer:
+
+            writer.write('loading the TSV graph now ...\n')
+            G2 = load_graph_from_csv(filename,
+                                     skip_first=True,
+                                     directed=directed,
+                                     hashed=True,
+                                     ecols=[subj_index, obj_index],
+                                     eprop_names=p,
+                                     csv_options={'delimiter': '\t'})
+
+            writer.write('graph loaded! It has %d nodes and %d edges\n' % (G2.num_vertices(), G2.num_edges()))
+            writer.write('\n###Top relations:\n')
+            for rel, freq in gtanalysis.get_topN_relations(G2, pred_property=predicate):
+                writer.write('%s\t%d\n' % (rel, freq))
+
+            if compute_degrees:
+                writer.write('\n###Degrees:\n')
+                for direction in directions:
+                    degree_data = gtanalysis.compute_node_degree_hist(G2, direction)
+                    max_degree = len(degree_data) - 1
+                    mean_degree, std_degree = gtanalysis.compute_avg_node_degree(G2, direction)
+                    writer.write(
+                        '%s degree stats: mean=%f, std=%f, max=%d\n' % (direction, mean_degree, std_degree, max_degree))
+
+            if compute_pagerank:
+                writer.write('\n###PageRank\n')
+                v_pr = G2.new_vertex_property('float')
+                centrality.pagerank(G2, prop=v_pr)
+                G2.properties[('v', 'vertex_pagerank')] = v_pr
+                writer.write('Max pageranks\n')
+                result = gtanalysis.get_topn_indices(G2, 'vertex_pagerank', 5, id_col)
+                for n_id, n_label, pr in result:
+                    writer.write('%s\t%s\t%f\n' % (n_id, n_label, pr))
+
+            if compute_hits:
+                writer.write('\n###HITS\n')
+                hits_eig, G2.vp['vertex_hubs'], G2.vp['vertex_auth'] = gtanalysis.compute_hits(G2)
+                writer.write('HITS hubs\n')
+                main_hubs = gtanalysis.get_topn_indices(G2, 'vertex_hubs', 5, id_col)
+                for n_id, n_label, hubness in main_hubs:
+                    writer.write('%s\t%s\t%f\n' % (n_id, n_label, hubness))
+                writer.write('HITS auth\n')
+                main_auth = gtanalysis.get_topn_indices(G2, 'vertex_auth', 5, id_col)
+                for n_id, n_label, authority in main_auth:
+                    writer.write('%s\t%s\t%f\n' % (n_id, n_label, authority))
+
+            if not output_stats:
+                for e in G2.edges():
+                    sid, oid = e
+                    lbl = G2.ep[predicate][e]
+                    sys.stdout.write('THIS IS EDGES\n')
+                    sys.stdout.write('%s\t%s\t%s\n' % (G2.vp[id_col][sid], lbl, G2.vp[id_col][oid]))
+
+            for v in G2.vertices():
+                v_id = G2.vp[id_col][v]
+                for vprop in G2.vertex_properties.keys():
+                    if vprop == id_col: continue
+                    sys.stdout.write('%s\t%s\t%s\n' % (v_id, vprop, G2.vp[vprop][v]))
+
+            if output:
+                writer.write('now saving the graph to %s\n' % output)
+                G2.save(output)
+    except Exception as e:
+        raise KGTKException('Error: ' + str(e))

From 3664ba0cf0baca6dc48ab38b39b0b7d09eada890 Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Fri, 1 May 2020 17:43:44 -0700
Subject: [PATCH 048/278] tabs v spaces

---
 kgtk/gt/analysis_utils.py | 77 +++++++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 31 deletions(-)

diff --git a/kgtk/gt/analysis_utils.py b/kgtk/gt/analysis_utils.py
index d2f3da9c1..c0f224084 100644
--- a/kgtk/gt/analysis_utils.py
+++ b/kgtk/gt/analysis_utils.py
@@ -1,33 +1,41 @@
 import graph_tool as gtmain
 import graph_tool.all as gtall
-import numpy as np # type: ignore
+import numpy as np  # type: ignore
 from collections import defaultdict
 
-import matplotlib.pyplot as plt # type: ignore
+import matplotlib.pyplot as plt  # type: ignore
+
 plt.rcParams.update({'font.size': 12})
 
-import seaborn as sns # type: ignore
+import seaborn as sns  # type: ignore
+
 sns.set_style("whitegrid")
 
+
 #### BASIC STATS ####
 
 def get_num_nodes(g):
     return g.num_vertices()
 
+
 def get_num_edges(g):
     return g.num_edges()
 
+
 #### DEGREES ####
 
 def compute_avg_node_degree(g, direction):
     return gtmain.stats.vertex_average(g, direction)
 
+
 def compute_node_degree_hist(g, direction):
     return gtall.vertex_hist(g, direction, float_count=False)
 
+
 def get_degree_maxn_counts(g, direction):
     return list(compute_node_degree_hist(g, direction)[0])[:10]
 
+
 def plot_degrees(degrees, plottype='loglog', base=10, xlabel='', ylabel='', title=''):
     plt.loglog(degrees, basex=base, basey=base)
     plt.ylabel(ylabel)
@@ -35,55 +43,62 @@ def plot_degrees(degrees, plottype='loglog', base=10, xlabel='', ylabel='', titl
     plt.title(title)
     plt.show()
 
+
 #### CENTRALITY ####
-    
+
 def compute_betweenness(g):
-    bn, be=gtmain.centrality.betweenness(g)
+    bn, be = gtmain.centrality.betweenness(g)
     return bn, be
 
+
 def compute_pagerank(g):
     v_pr = g.new_vertex_property('float')
     gtmain.centrality.pagerank(g, prop=v_pr)
     return v_pr
 
+
 def compute_hits(g):
-    hits_eig, v_hubs, v_auth=gtmain.centrality.hits(g)
+    hits_eig, v_hubs, v_auth = gtmain.centrality.hits(g)
     return hits_eig, v_hubs, v_auth
-    
+
+
 def get_max_node(g, prop):
-    max_pr=0.0
-    max_pr_vertex=None
+    max_pr = 0.0
+    max_pr_vertex = None
     for v in g.vertices():
-        vertex_pr=g.vp[prop][v]
-        if vertex_pr>max_pr:
-            max_pr=vertex_pr
-            max_pr_vertex=g.vp['_graphml_vertex_id'][v]
-    
+        vertex_pr = g.vp[prop][v]
+        if vertex_pr > max_pr:
+            max_pr = vertex_pr
+            max_pr_vertex = g.vp['_graphml_vertex_id'][v]
+
     return max_pr, max_pr_vertex
 
+
 def get_topn_indices(g, prop, n, print_prop):
-	a=g.vp[prop].a
-	ind = np.argpartition(a, -n)[-n:]
-	result=[]
-	for i in ind:
-		result.append([i, g.vp[print_prop][i], g.vp[prop][i]])
-	return result
+    a = g.vp[prop].a
+    ind = np.argpartition(a, -n)[-n:]
+    result = []
+    for i in ind:
+        result.append([i, g.vp[print_prop][i], g.vp[prop][i]])
+    return result
+
 
 #### RUN ALL STATS ####
-    
+
 def compute_stats(g, direction):
-    avg_degree, stdev_degree=compute_avg_node_degree(g, direction)
+    avg_degree, stdev_degree = compute_avg_node_degree(g, direction)
     return {
-            'num_nodes': get_num_nodes(g),
-            'num_edges': get_num_edges(g),
-            'avg_degree': avg_degree,
-            'degree_maxn_counts': get_degree_maxn_counts(g, direction),
-            'stdev_degree': stdev_degree
-            }
+        'num_nodes': get_num_nodes(g),
+        'num_edges': get_num_edges(g),
+        'avg_degree': avg_degree,
+        'degree_maxn_counts': get_degree_maxn_counts(g, direction),
+        'stdev_degree': stdev_degree
+    }
+
 
 def get_topN_relations(g, N=10, pred_property='predicate'):
-    rel_freq=defaultdict(int)
+    rel_freq = defaultdict(int)
     for i, e in enumerate(g.edges()):
-        r=g.edge_properties[pred_property][e]
-        rel_freq[r]+=1
+        r = g.edge_properties[pred_property][e]
+        rel_freq[r] += 1
     return sorted(rel_freq.items(), key=lambda x: x[1], reverse=True)[:N]

From 47126cfbc7a450b3d40558bd343efd52c38a468c Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Fri, 1 May 2020 18:32:15 -0700
Subject: [PATCH 049/278] add in and out degrees as well

---
 kgtk/cli/gt_loader.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kgtk/cli/gt_loader.py b/kgtk/cli/gt_loader.py
index bfc5da2b9..c65514849 100644
--- a/kgtk/cli/gt_loader.py
+++ b/kgtk/cli/gt_loader.py
@@ -31,7 +31,6 @@ def add_arguments(parser):
                         help='do not output the graph but statistics only')
 
 
-
 def run(filename, directed, compute_degrees, compute_pagerank, compute_hits, log_file, output, output_stats):
     from kgtk.exceptions import KGTKException
     def infer_index(h, options=[]):
@@ -116,15 +115,18 @@ def infer_predicate(h, options=[]):
                 for n_id, n_label, authority in main_auth:
                     writer.write('%s\t%s\t%f\n' % (n_id, n_label, authority))
 
+            sys.stdout.write('node1\tproperty\tnode2\n')
             if not output_stats:
                 for e in G2.edges():
                     sid, oid = e
                     lbl = G2.ep[predicate][e]
-                    sys.stdout.write('THIS IS EDGES\n')
                     sys.stdout.write('%s\t%s\t%s\n' % (G2.vp[id_col][sid], lbl, G2.vp[id_col][oid]))
 
             for v in G2.vertices():
                 v_id = G2.vp[id_col][v]
+
+                sys.stdout.write('{}\t{}\t{}\n'.format(v_id, 'vertex_in_degree', v.in_degree()))
+                sys.stdout.write('{}\t{}\t{}\n'.format(v_id, 'vertex_out_degree', v.out_degree()))
                 for vprop in G2.vertex_properties.keys():
                     if vprop == id_col: continue
                     sys.stdout.write('%s\t%s\t%s\n' % (v_id, vprop, G2.vp[vprop][v]))

From a2ba02ae5658f1a5da4d2a253d1549e3671a9989 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Fri, 1 May 2020 22:33:55 -0700
Subject: [PATCH 050/278] Use pycontry instead of iso-639, pyconotry is better
 maintained.

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 42c81a774..bc68dd469 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,4 +13,4 @@ etk==2.2.1
 simplejson
 pyrallel.lib
 attrs
-iso-639
+pycountry

From f65d3db32f5e86bad974513ee229d487a55e1b37 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Sat, 2 May 2020 00:31:50 -0700
Subject: [PATCH 051/278] Use iso-639 and pycountry both.

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index bc68dd469..44d1e9524 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,3 +14,4 @@ simplejson
 pyrallel.lib
 attrs
 pycountry
+iso-639

From 5df36217573556f02049b9466369c61b03f73c43 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Sat, 2 May 2020 00:34:54 -0700
Subject: [PATCH 052/278] Support allowing month 0 and day 0 in dates.  Support
 ignoring internal quotes in strings and language qualified strings. Check for
 two-character language codes, three-character language codes, retired
 language codes, language group codes, and two-character language codes with
 suffixes (typically countro or dialect, unchecked).

---
 kgtk/join/kgtkvalue.py | 118 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 98 insertions(+), 20 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index cdf235316..85a3010da 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -7,7 +7,8 @@
 
 from argparse import ArgumentParser
 import attr
-from iso639 import languages # type: ignore
+import iso639 # type: ignore
+import pycountry # type: ignore
 import re
 import sys
 import typing
@@ -18,7 +19,23 @@
 class KgtkValue(KgtkFormat):
     value: str = attr.ib(validator=attr.validators.instance_of(str))
 
+    allow_month_or_day_zero: bool = attr.ib(validator=attr.validators.instance_of(bool), default=True)
+    allow_additional_language_codes: bool = attr.ib(validator=attr.validators.instance_of(bool), default=True)
+
+    # When allow_lax_strings is true, strings will be checked to see if they
+    # start and end with double quote ("), but we won't check if internal
+    # double quotes are excaped by backslash.
+    allow_lax_strings: bool = attr.ib(validator=attr.validators.instance_of(bool), default=True)
+
+    # When allow_lax_lq_strings is true, language qualified strings will be
+    # checked to see if they start and end with single quote ('), but we won't
+    # check if internal single quotes are excaped by backslash.
+    allow_lax_lq_strings: bool = attr.ib(validator=attr.validators.instance_of(bool), default=True)
     
+    additional_language_codes: typing.List[str] = [
+        "mo", # Retired, replaced by the codes for Romanian, but still appearing in wikidata.
+    ]
+
     split_list_re: typing.Pattern = re.compile(r"(?<!\\)" + "\\" + KgtkFormat.LIST_SEPARATOR)
 
     # Cache the list of values.
@@ -294,7 +311,8 @@ def is_string(self, idx: typing.Optional[int] = None)->bool:
         v: str = self.get_item(idx)
         return v.startswith('"')
 
-    string_re: typing.Pattern = re.compile(r'^"(?:[^"]|\\.)*"$')
+    lax_string_re: typing.Pattern = re.compile(r'^".*"$')
+    strict_string_re: typing.Pattern = re.compile(r'^"(?:[^"\\]|\\.)*"$')
 
     def is_valid_string(self, idx: typing.Optional[int] = None)->bool:
         """
@@ -309,7 +327,11 @@ def is_valid_string(self, idx: typing.Optional[int] = None)->bool:
         v: str = self.get_item(idx)
         if not v.startswith('"'):
             return False
-        m: typing.Optional[typing.Match] = KgtkValue.string_re.match(v)
+        m: typing.Optional[typing.Match]
+        if self.allow_lax_strings:
+            m = KgtkValue.lax_string_re.match(v)
+        else:
+            m = KgtkValue.strict_string_re.match(v)
         return m is not None
 
     def is_structured_literal(self, idx: typing.Optional[int] = None)->bool:
@@ -356,35 +378,84 @@ def is_language_qualified_string(self, idx: typing.Optional[int] = None)->bool:
         v: str = self.get_item(idx)
         return v.startswith("'")
 
-    # Support two or three character language codes.
-    language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'(?:[^']|\\.)*')@(?P<lang>[a-zA-Z]{2,3})$")
+    # Support two or three character language codes.  Suports hyphenated codes
+    # with country codes or dialect names after a language code.
+    lax_language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'.*')@(?P<lang>[a-zA-Z]{2,3}(?:-[a-zA-Z]+)?)$")
+    strict_language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'(?:[^'\\]|\\.)*')@(?P<lang>[a-zA-Z]{2,3}(?:-[a-zA-Z]+)?)$")
 
     def is_valid_language_qualified_string(self, idx: typing.Optional[int] = None)->bool:
-        """
-        Return False if this value is a list and idx is None.
+        """Return False if this value is a list and idx is None.
         Otherwise, return True if the value looks like a language-qualified string.
+
+        The language code may be a two- or three-character code from ISO
+        639-3, which replaces ISO 639-1 and ISO 639-2.  In addition, wikidata
+        may include language codes, such as 'mo', that have been retired.  The
+        additional_language_codes table supports these codes, when allowed.
+
+        Wikidata may also contain collective language codes, such as "nah",
+        referring the the Nahuatl languages. These codes from ISO 639-5 are
+        accepted as a fallback when ISO 639-3 lookup fails.
+
+        https://meta.wikimedia.org/wiki/Special_language_codes
+        https://en.wikipedia.org/wiki/Template:ISO_639_name_be-tarask
+
         """
         if self.is_list() and idx is None:
             return False
 
         v: str = self.get_item(idx)
-        m: typing.Optional[typing.Match] = KgtkValue.language_qualified_string_re.match(v)
+        # print("checking %s" % v)
+        m: typing.Optional[typing.Match]
+        if self.allow_lax_lq_strings:
+            m = KgtkValue.lax_language_qualified_string_re.match(v)
+        else:
+            m = KgtkValue.strict_language_qualified_string_re.match(v)
         if m is None:
+            # print("match failed for %s" % v)
             return False
 
         # Validate the language code:
-        lang: str = m.group("lang")
+        lang: str = m.group("lang").lower()
         # print("lang: %s" % lang)
-        try:
-            if len(lang) == 2:
-                # Two-character language codes.
-                languages.get(alpha2=lang.lower())
-            else:
-                # Three-character language codes.
-                languages.get(bibliographic=lang.lower())
+
+        if len(lang) == 2:
+            # Two-character language codes.
+            if pycountry.languages.get(alpha_2=lang) is not None:
+                return True
+
+        elif len(lang) == 3:
+            # Three-character language codes.
+           if pycountry.languages.get(alpha_3=lang) is not None:
+               return True
+
+           # Perhaps this is a collective code from ISO 639-5?
+           try:
+               iso639.languages.get(part5=lang)
+               return True
+           except KeyError:
+               pass
+
+        # Wikidata contains entries such as:
+        # 'panamenha'@pt-br      # language code followed by country code
+        # 'Ecuador'@es-formal    # language code followed by dialect name
+        #
+        # If we see a dash, we'll check the language code by itself.
+        save_lang: str = lang # for the debug print below.
+        country_or_dialect: str = ""
+        if "-" in lang:
+            (lang, country_or_dialect) = lang.split("-", 1)
+
+            # Assume that this is a two-character code.  If necessary,
+            # we can try three-character codes, too.
+            if  pycountry.languages.get(alpha_2=lang) is not None:
+                return True
+
+        # If there's a table of additional language codes, check there:
+        if self.allow_additional_language_codes and lang in self.additional_language_codes:
             return True
-        except KeyError:
-            return False
+
+        print("save_lang: %s lang: %s country_or_dialect: %s" % (save_lang, lang, country_or_dialect))
+        return False
 
     def is_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
         """
@@ -449,7 +520,10 @@ def is_date_and_times(self, idx: typing.Optional[int] = None)->bool:
         v: str = self.get_item(idx)
         return v.startswith("^")
 
-    date_and_times_re: typing.Pattern = re.compile(r"^\^(?P<year>[0-9]{4})(?:(?P<hyphen>-)?(?P<month>1[0-2]|0[1-9])(?:(?(hyphen)-)(?P<day>3[01]|0[1-9]|[12][0-9])))T(?P<hour>2[0-3]|[01][0-9])(?:(?(hyphen):)(?P<minute>[0-5][0-9])(?:(?(hyphen):)(?P<second>[0-5][0-9])))(?P<zone>Z|\[-+][0-9][0-9](?::[0-9][0-9])?)?(?P<precision>/[0-1]?[0-9])?$")
+    # This pattern allows month 00 and day 00, which are excluded by ISO 8601.
+    date_and_times_re: typing.Pattern = re.compile(r"^\^(?P<year>[0-9]{4})(?:(?P<hyphen>-)?(?P<month>1[0-2]|0[0-9])(?:(?(hyphen)-)(?P<day>3[01]|0[0-9]|[12][0-9])))T(?P<hour>2[0-3]|[01][0-9])(?:(?(hyphen):)(?P<minute>[0-5][0-9])(?:(?(hyphen):)(?P<second>[0-5][0-9])))(?P<zone>Z|\[-+][0-9][0-9](?::[0-9][0-9])?)?(?P<precision>/[0-1]?[0-9])?$")
+
+    strict_date_and_times_re: typing.Pattern = re.compile(r"^\^(?P<year>[0-9]{4})(?:(?P<hyphen>-)?(?P<month>1[0-2]|0[1-9])(?:(?(hyphen)-)(?P<day>3[01]|0[1-9]|[12][0-9])))T(?P<hour>2[0-3]|[01][0-9])(?:(?(hyphen):)(?P<minute>[0-5][0-9])(?:(?(hyphen):)(?P<second>[0-5][0-9])))(?P<zone>Z|\[-+][0-9][0-9](?::[0-9][0-9])?)?(?P<precision>/[0-1]?[0-9])?$")
 
     def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
         """
@@ -504,7 +578,11 @@ def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
             return False
 
         v: str = self.get_item(idx)
-        m: typing.Optional[typing.Match] = KgtkValue.date_and_times_re.match(v)
+        m: typing.Optional[typing.Match]
+        if self.allow_month_or_day_zero:
+            m = KgtkValue.date_and_times_re.match(v)
+        else:
+            m = KgtkValue.strict_date_and_times_re.match(v)
         return m is not None
 
     def is_extension(self,  idx: typing.Optional[int] = None)->bool:

From 245edca1d4d7b5e8cbccf9c8bc3bfdea4853739d Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Sat, 2 May 2020 01:40:42 -0700
Subject: [PATCH 053/278] Support the value options on the command line.

---
 kgtk/cli/validate.py    | 30 +++++++++++++++++++
 kgtk/join/edgereader.py | 11 +++++++
 kgtk/join/kgtkreader.py | 32 ++++++++++++++++++--
 kgtk/join/kgtkvalue.py  | 66 +++++++++++++++++++++++++++--------------
 kgtk/join/nodereader.py | 11 +++++++
 5 files changed, 126 insertions(+), 24 deletions(-)

diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 3593ed784..c84a7fb6f 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -18,6 +18,7 @@
 from kgtk.join.enumnameaction import EnumNameAction
 from kgtk.join.kgtkformat import KgtkFormat
 from kgtk.join.kgtkreader import KgtkReader
+from kgtk.join.kgtkvalue import DEFAULT_ADDITIONAL_LANGUAGE_CODES, KgtkValueOptions
 from kgtk.join.validationaction import ValidationAction
 
 def parser():
@@ -33,6 +34,21 @@ def add_arguments(parser):
         parser (argparse.ArgumentParser)
     """
     parser.add_argument(      "kgtk_files", nargs="*", help="The KGTK file(s) to validate. May be omitted or '-' for stdin.", type=Path)
+
+    parser.add_argument(      "--additional-language-codes", dest="additional_language_codes",
+                              help="Additional language codes.", nargs="*", default=DEFAULT_ADDITIONAL_LANGUAGE_CODES)
+    
+    parser.add_argument(      "--allow-additional-language-codes", dest="allow_additional_language_codes",
+                              help="Allow certain language codes not found in the current version of ISO 639-3 or ISO 639-5.", action='store_true')
+    
+    parser.add_argument(      "--allow-lax-strings", dest="allow_lax_strings",
+                              help="Do not check if double quotes are backslashed inside strings.", action='store_true')
+    
+    parser.add_argument(      "--allow-lax-lq-strings", dest="allow_lax_lq_strings",
+                              help="Do not check if single quotes are backslashed inside language qualified strings.", action='store_true')
+    
+    parser.add_argument(      "--allow-month-or-day-zero", dest="allow_month_or_day_zero",
+                              help="Allow month or day zero in dates.", action='store_true')
     
     parser.add_argument(      "--blank-id-line-action", dest="blank_id_line_action",
                               help="The action to take when a blank id field is detected.",
@@ -143,6 +159,11 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
         invalid_value_action: ValidationAction = ValidationAction.REPORT,
         header_error_action: ValidationAction = ValidationAction.EXIT,
         unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
+        additional_language_codes: typing.List[str] = DEFAULT_ADDITIONAL_LANGUAGE_CODES,
+        allow_additional_language_codes: bool = False,
+        allow_lax_strings: bool = False,
+        allow_lax_lq_strings: bool = False,
+        allow_month_or_day_zero: bool = False,
         compression_type: typing.Optional[str] = None,
         gzip_in_parallel: bool = False,
         gzip_queue_size: int = KgtkReader.GZIP_QUEUE_SIZE_DEFAULT,
@@ -161,6 +182,14 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
     # Select where to send error messages, defaulting to stderr.
     error_file: typing.TextIO = sys.stderr if errors_to_stderr else sys.stdout
 
+    # Build the value parsing option structure.
+    value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=allow_month_or_day_zero,
+                                                       allow_lax_strings=allow_lax_strings,
+                                                       allow_lax_lq_strings=allow_lax_lq_strings,
+                                                       allow_additional_language_codes=allow_additional_language_codes,
+                                                       additional_language_codes=additional_language_codes)
+    print("value_options.allow_month_or_day_zero = %s" % str(value_options.allow_month_or_day_zero))
+
     try:
         kgtk_file: typing.Optional[Path]
         for kgtk_file in kgtk_files:
@@ -191,6 +220,7 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
                                                  header_error_action=header_error_action,
                                                  unsafe_column_name_action=unsafe_column_name_action,
                                                  compression_type=compression_type,
+                                                 value_options=value_options,
                                                  gzip_in_parallel=gzip_in_parallel,
                                                  gzip_queue_size=gzip_queue_size,
                                                  column_separator=column_separator,
diff --git a/kgtk/join/edgereader.py b/kgtk/join/edgereader.py
index b15c464dc..0bdb3d4fb 100644
--- a/kgtk/join/edgereader.py
+++ b/kgtk/join/edgereader.py
@@ -13,6 +13,7 @@
 from kgtk.join.closableiter import ClosableIter
 from kgtk.join.enumnameaction import EnumNameAction
 from kgtk.join.kgtkreader import KgtkReader
+from kgtk.join.kgtkvalue import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=False)
@@ -37,6 +38,7 @@ def open_edge_file(cls,
                        invalid_value_action: ValidationAction = ValidationAction.REPORT,
                        header_error_action: ValidationAction = ValidationAction.EXIT,
                        unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
+                       value_options: KgtkValueOptions = DEFAULT_KGTK_VALUE_OPTIONS,
                        compression_type: typing.Optional[str] = None,
                        gzip_in_parallel: bool = False,
                        gzip_queue_size: int = KgtkReader.GZIP_QUEUE_SIZE_DEFAULT,
@@ -110,6 +112,7 @@ def open_edge_file(cls,
                    invalid_value_action=invalid_value_action,
                    header_error_action=header_error_action,
                    unsafe_column_name_action=unsafe_column_name_action,
+                   value_options=value_options,
                    compression_type=compression_type,
                    gzip_in_parallel=gzip_in_parallel,
                    gzip_queue_size=gzip_queue_size,
@@ -166,6 +169,13 @@ def main():
 
     error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
 
+    # Build the value parsing option structure.
+    value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=args.allow_month_or_day_zero,
+                                                       allow_lax_strings=args.allow_lax_strings,
+                                                       allow_lax_lq_strings=args.allow_lax_lq_strings,
+                                                       allow_additional_language_codes=args.allow_additional_language_codes,
+                                                       additional_language_codes=args.additional_language_codes)
+
     er: EdgeReader = EdgeReader.open(args.kgtk_file,
                                      force_column_names=args.force_column_names,
                                      skip_first_record=args.skip_first_record,
@@ -183,6 +193,7 @@ def main():
                                      invalid_value_action=args.invalid_value_action,
                                      header_error_action=args.header_error_action,
                                      unsafe_column_name_action=args.unsafe_column_name_action,
+                                     value_options=value_options,
                                      compression_type=args.compression_type,
                                      gzip_in_parallel=args.gzip_in_parallel,
                                      gzip_queue_size=args.gzip_queue_size,
diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index 916a46abf..f0831c3d3 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -21,7 +21,7 @@
 from kgtk.join.gzipprocess import GunzipProcess
 from kgtk.join.kgtkbase import KgtkBase
 from kgtk.join.kgtkformat import KgtkFormat
-from kgtk.join.kgtkvalue import KgtkValue
+from kgtk.join.kgtkvalue import KgtkValue, KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS, DEFAULT_ADDITIONAL_LANGUAGE_CODES
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=False)
@@ -83,6 +83,7 @@ class KgtkReader(KgtkBase, ClosableIter[typing.List[str]]):
 
     # Validate data cell values?
     invalid_value_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.REPORT)
+    value_options: KgtkValueOptions = attr.ib(validator=attr.validators.instance_of(KgtkValueOptions), default=DEFAULT_KGTK_VALUE_OPTIONS)
 
     # Repair records with too many or too few fields?
     fill_short_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
@@ -130,6 +131,7 @@ def open(cls,
              invalid_value_action: ValidationAction = ValidationAction.REPORT,
              header_error_action: ValidationAction = ValidationAction.EXIT,
              unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
+             value_options: KgtkValueOptions = DEFAULT_KGTK_VALUE_OPTIONS,
              compression_type: typing.Optional[str] = None,
              gzip_in_parallel: bool = False,
              gzip_queue_size: int = GZIP_QUEUE_SIZE_DEFAULT,
@@ -249,6 +251,7 @@ def open(cls,
                               invalid_value_action=invalid_value_action,
                               header_error_action=header_error_action,
                               unsafe_column_name_action=unsafe_column_name_action,
+                              value_options=value_options,
                               compression_type=compression_type,
                               gzip_in_parallel=gzip_in_parallel,
                               gzip_queue_size=gzip_queue_size,
@@ -303,6 +306,7 @@ def open(cls,
                               invalid_value_action=invalid_value_action,
                               header_error_action=header_error_action,
                               unsafe_column_name_action=unsafe_column_name_action,
+                              value_options=value_options,
                               compression_type=compression_type,
                               gzip_in_parallel=gzip_in_parallel,
                               gzip_queue_size=gzip_queue_size,
@@ -343,6 +347,7 @@ def open(cls,
                        invalid_value_action=invalid_value_action,
                        header_error_action=header_error_action,
                        unsafe_column_name_action=unsafe_column_name_action,
+                       value_options=value_options,
                        compression_type=compression_type,
                        gzip_in_parallel=gzip_in_parallel,
                        gzip_queue_size=gzip_queue_size,
@@ -593,7 +598,7 @@ def _ignore_invalid_values(self, values: typing.List[str], line: str)->bool:
         value: str
         for idx, value in enumerate(values):
             if len(value) > 0: # Optimize the common case of empty columns.
-                kv: KgtkValue = KgtkValue(value)
+                kv: KgtkValue = KgtkValue(value, options=self.value_options)
                 if not kv.is_valid():
                     problems.append("col %d (%s) value '%s'is an %s" % (idx, self.column_names[idx], value, kv.describe()))
 
@@ -653,6 +658,21 @@ def to_map(self, row: typing.List[str])->typing.Mapping[str, str]:
     def add_shared_arguments(cls, parser: ArgumentParser):
         parser.add_argument(dest="kgtk_file", help="The KGTK file to read", type=Path, nargs="?")
 
+        parser.add_argument(      "--additional-language-codes", dest="additional_language_codes",
+                                  help="Additional language codes.", nargs="*", default=DEFAULT_ADDITIONAL_LANGUAGE_CODES)
+
+        parser.add_argument(      "--allow-additional-language-codes", dest="allow_additional_language_codes",
+                                  help="Allow certain language codes not found in the current version of ISO 639-3 or ISO 639-5.", action='store_true')
+
+        parser.add_argument(      "--allow-lax-strings", dest="allow_lax_strings",
+                                  help="Do not check if double quotes are backslashed inside strings.", action='store_true')
+
+        parser.add_argument(      "--allow-lax-lq-strings", dest="allow_lax_lq_strings",
+                                  help="Do not check if single quotes are backslashed inside language qualified strings.", action='store_true')
+
+        parser.add_argument(      "--allow-month-or-day-zero", dest="allow_month_or_day_zero",
+                                  help="Allow month or day zero in dates.", action='store_true')
+
         parser.add_argument(      "--blank-required-field-line-action", dest="blank_line_action",
                                   help="The action to take when a line with a blank node1, node2, or id field (per mode) is detected.",
                                   type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
@@ -745,6 +765,13 @@ def main():
 
     error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
 
+    # Build the value parsing option structure.
+    value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=args.allow_month_or_day_zero,
+                                                       allow_lax_strings=args.allow_lax_strings,
+                                                       allow_lax_lq_strings=args.allow_lax_lq_strings,
+                                                       allow_additional_language_codes=args.allow_additional_language_codes,
+                                                       additional_language_codes=args.additional_language_codes)
+
     kr: KgtkReader = KgtkReader.open(args.kgtk_file,
                                      force_column_names=args.force_column_names,
                                      skip_first_record=args.skip_first_record,
@@ -764,6 +791,7 @@ def main():
                                      invalid_value_action=args.invalid_value_action,
                                      header_error_action=args.header_error_action,
                                      unsafe_column_name_action=args.unsafe_column_name_action,
+                                     value_options=value_options,
                                      compression_type=args.compression_type,
                                      gzip_in_parallel=args.gzip_in_parallel,
                                      gzip_queue_size=args.gzip_queue_size,
diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 85a3010da..41dbd8b94 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -15,30 +15,51 @@
 
 from kgtk.join.kgtkformat import KgtkFormat
 
-@attr.s(slots=True, frozen=False)
-class KgtkValue(KgtkFormat):
-    value: str = attr.ib(validator=attr.validators.instance_of(str))
+DEFAULT_ADDITIONAL_LANGUAGE_CODES: typing.List[str] = [
+    "mo", # Retired, replaced by the codes for Romanian, but still appearing in wikidata.
+]
 
-    allow_month_or_day_zero: bool = attr.ib(validator=attr.validators.instance_of(bool), default=True)
-    allow_additional_language_codes: bool = attr.ib(validator=attr.validators.instance_of(bool), default=True)
+
+@attr.s(slots=True, frozen=True)
+class KgtkValueOptions:
+    """
+    These options will affect some aspects of value processing. They are in a
+    seperate class for efficiency.
+    """
+    
+    # Allow month 00 or day 00 in dates?  This isn't really allowed by ISO
+    # 8601, but appears in wikidata.
+    allow_month_or_day_zero: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
     # When allow_lax_strings is true, strings will be checked to see if they
     # start and end with double quote ("), but we won't check if internal
     # double quotes are excaped by backslash.
-    allow_lax_strings: bool = attr.ib(validator=attr.validators.instance_of(bool), default=True)
+    allow_lax_strings: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
     # When allow_lax_lq_strings is true, language qualified strings will be
     # checked to see if they start and end with single quote ('), but we won't
     # check if internal single quotes are excaped by backslash.
-    allow_lax_lq_strings: bool = attr.ib(validator=attr.validators.instance_of(bool), default=True)
+    allow_lax_lq_strings: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+    
+    # Shall we allow additional language codes?
+    allow_additional_language_codes: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+
+    # If this list gets long, we may want to turn it into a map to make lookup
+    # more efficient.
+    additional_language_codes: typing.List[str] = attr.ib(validator=attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
+                                                                                                  iterable_validator=attr.validators.instance_of(list)),
+                                                          default=DEFAULT_ADDITIONAL_LANGUAGE_CODES)
     
-    additional_language_codes: typing.List[str] = [
-        "mo", # Retired, replaced by the codes for Romanian, but still appearing in wikidata.
-    ]
+DEFAULT_KGTK_VALUE_OPTIONS: KgtkValueOptions = KgtkValueOptions()
+
+@attr.s(slots=True, frozen=False)
+class KgtkValue(KgtkFormat):
+    value: str = attr.ib(validator=attr.validators.instance_of(str))
+    options: KgtkValueOptions = attr.ib(validator=attr.validators.instance_of(KgtkValueOptions), default=DEFAULT_KGTK_VALUE_OPTIONS)
 
     split_list_re: typing.Pattern = re.compile(r"(?<!\\)" + "\\" + KgtkFormat.LIST_SEPARATOR)
 
-    # Cache the list of values.
+    # Cache the list of values.  This member is why the class isn't frozen.
     values: typing.Optional[typing.List[str]] = None
 
     def get_list(self)->typing.List[str]:
@@ -65,7 +86,7 @@ def get_values(self)->typing.List['KgtkValue']:
             result: typing.List['KgtkValue'] = [ ]
             v: str
             for v in self.get_list():
-                result.append(KgtkValue(v))
+                result.append(KgtkValue(v, options=self.options))
             return result
 
     def is_empty(self, idx: typing.Optional[int] = None)->bool:
@@ -328,7 +349,7 @@ def is_valid_string(self, idx: typing.Optional[int] = None)->bool:
         if not v.startswith('"'):
             return False
         m: typing.Optional[typing.Match]
-        if self.allow_lax_strings:
+        if self.options.allow_lax_strings:
             m = KgtkValue.lax_string_re.match(v)
         else:
             m = KgtkValue.strict_string_re.match(v)
@@ -406,7 +427,7 @@ def is_valid_language_qualified_string(self, idx: typing.Optional[int] = None)->
         v: str = self.get_item(idx)
         # print("checking %s" % v)
         m: typing.Optional[typing.Match]
-        if self.allow_lax_lq_strings:
+        if self.options.allow_lax_lq_strings:
             m = KgtkValue.lax_language_qualified_string_re.match(v)
         else:
             m = KgtkValue.strict_language_qualified_string_re.match(v)
@@ -428,7 +449,7 @@ def is_valid_language_qualified_string(self, idx: typing.Optional[int] = None)->
            if pycountry.languages.get(alpha_3=lang) is not None:
                return True
 
-           # Perhaps this is a collective code from ISO 639-5?
+           # Perhaps this is a collective (language family) code from ISO 639-5?
            try:
                iso639.languages.get(part5=lang)
                return True
@@ -440,7 +461,7 @@ def is_valid_language_qualified_string(self, idx: typing.Optional[int] = None)->
         # 'Ecuador'@es-formal    # language code followed by dialect name
         #
         # If we see a dash, we'll check the language code by itself.
-        save_lang: str = lang # for the debug print below.
+        # save_lang: str = lang # for the debug print below.
         country_or_dialect: str = ""
         if "-" in lang:
             (lang, country_or_dialect) = lang.split("-", 1)
@@ -448,13 +469,14 @@ def is_valid_language_qualified_string(self, idx: typing.Optional[int] = None)->
             # Assume that this is a two-character code.  If necessary,
             # we can try three-character codes, too.
             if  pycountry.languages.get(alpha_2=lang) is not None:
+                # Note: we didn't check the country_or_dialect portion.
                 return True
 
         # If there's a table of additional language codes, check there:
-        if self.allow_additional_language_codes and lang in self.additional_language_codes:
+        if self.options.allow_additional_language_codes and lang in self.options.additional_language_codes:
             return True
 
-        print("save_lang: %s lang: %s country_or_dialect: %s" % (save_lang, lang, country_or_dialect))
+        # print("save_lang: %s lang: %s country_or_dialect: %s" % (save_lang, lang, country_or_dialect))
         return False
 
     def is_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
@@ -521,7 +543,7 @@ def is_date_and_times(self, idx: typing.Optional[int] = None)->bool:
         return v.startswith("^")
 
     # This pattern allows month 00 and day 00, which are excluded by ISO 8601.
-    date_and_times_re: typing.Pattern = re.compile(r"^\^(?P<year>[0-9]{4})(?:(?P<hyphen>-)?(?P<month>1[0-2]|0[0-9])(?:(?(hyphen)-)(?P<day>3[01]|0[0-9]|[12][0-9])))T(?P<hour>2[0-3]|[01][0-9])(?:(?(hyphen):)(?P<minute>[0-5][0-9])(?:(?(hyphen):)(?P<second>[0-5][0-9])))(?P<zone>Z|\[-+][0-9][0-9](?::[0-9][0-9])?)?(?P<precision>/[0-1]?[0-9])?$")
+    lax_date_and_times_re: typing.Pattern = re.compile(r"^\^(?P<year>[0-9]{4})(?:(?P<hyphen>-)?(?P<month>1[0-2]|0[0-9])(?:(?(hyphen)-)(?P<day>3[01]|0[0-9]|[12][0-9])))T(?P<hour>2[0-3]|[01][0-9])(?:(?(hyphen):)(?P<minute>[0-5][0-9])(?:(?(hyphen):)(?P<second>[0-5][0-9])))(?P<zone>Z|\[-+][0-9][0-9](?::[0-9][0-9])?)?(?P<precision>/[0-1]?[0-9])?$")
 
     strict_date_and_times_re: typing.Pattern = re.compile(r"^\^(?P<year>[0-9]{4})(?:(?P<hyphen>-)?(?P<month>1[0-2]|0[1-9])(?:(?(hyphen)-)(?P<day>3[01]|0[1-9]|[12][0-9])))T(?P<hour>2[0-3]|[01][0-9])(?:(?(hyphen):)(?P<minute>[0-5][0-9])(?:(?(hyphen):)(?P<second>[0-5][0-9])))(?P<zone>Z|\[-+][0-9][0-9](?::[0-9][0-9])?)?(?P<precision>/[0-1]?[0-9])?$")
 
@@ -579,8 +601,8 @@ def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
 
         v: str = self.get_item(idx)
         m: typing.Optional[typing.Match]
-        if self.allow_month_or_day_zero:
-            m = KgtkValue.date_and_times_re.match(v)
+        if self.options.allow_month_or_day_zero:
+            m = KgtkValue.lax_date_and_times_re.match(v)
         else:
             m = KgtkValue.strict_date_and_times_re.match(v)
         return m is not None
@@ -710,7 +732,7 @@ def main():
 
     value: str
     for value in args.values:
-        print("%s: %s" % (value, KgtkValue(value).describe()))
+        print("%s: %s" % (value, KgtkValue(value).describe()), flush=True)
 
 if __name__ == "__main__":
     main()
diff --git a/kgtk/join/nodereader.py b/kgtk/join/nodereader.py
index 4b3de9587..668e8918a 100644
--- a/kgtk/join/nodereader.py
+++ b/kgtk/join/nodereader.py
@@ -13,6 +13,7 @@
 from kgtk.join.closableiter import ClosableIter
 from kgtk.join.enumnameaction import EnumNameAction
 from kgtk.join.kgtkreader import KgtkReader
+from kgtk.join.kgtkvalue import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=False)
@@ -36,6 +37,7 @@ def open_node_file(cls,
                        invalid_value_action: ValidationAction = ValidationAction.REPORT,
                        header_error_action: ValidationAction = ValidationAction.EXIT,
                        unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
+                       value_options: KgtkValueOptions = DEFAULT_KGTK_VALUE_OPTIONS,
                        compression_type: typing.Optional[str] = None,
                        gzip_in_parallel: bool = False,
                        gzip_queue_size: int = KgtkReader.GZIP_QUEUE_SIZE_DEFAULT,
@@ -101,6 +103,7 @@ def open_node_file(cls,
                    invalid_value_action=invalid_value_action,
                    header_error_action=header_error_action,
                    unsafe_column_name_action=unsafe_column_name_action,
+                   value_options=value_options,
                    compression_type=compression_type,
                    gzip_in_parallel=gzip_in_parallel,
                    gzip_queue_size=gzip_queue_size,
@@ -144,6 +147,13 @@ def main():
 
     error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
 
+    # Build the value parsing option structure.
+    value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=args.allow_month_or_day_zero,
+                                                       allow_lax_strings=args.allow_lax_strings,
+                                                       allow_lax_lq_strings=args.allow_lax_lq_strings,
+                                                       allow_additional_language_codes=args.allow_additional_language_codes,
+                                                       additional_language_codes=args.additional_language_codes)
+
     er: NodeReader = NodeReader.open(args.kgtk_file,
                                      force_column_names=args.force_column_names,
                                      skip_first_record=args.skip_first_record,
@@ -160,6 +170,7 @@ def main():
                                      invalid_value_action=args.invalid_value_action,
                                      header_error_action=args.header_error_action,
                                      unsafe_column_name_action=args.unsafe_column_name_action,
+                                     value_options=value_options,
                                      compression_type=args.compression_type,
                                      gzip_in_parallel=args.gzip_in_parallel,
                                      gzip_queue_size=args.gzip_queue_size,

From 443bdd2eca2629b9ab58cd0ac1f19ac5e1eeeb76 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Sat, 2 May 2020 01:45:03 -0700
Subject: [PATCH 054/278] Remove debug write.

---
 kgtk/cli/validate.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index c84a7fb6f..7c28d38b7 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -188,7 +188,6 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
                                                        allow_lax_lq_strings=allow_lax_lq_strings,
                                                        allow_additional_language_codes=allow_additional_language_codes,
                                                        additional_language_codes=additional_language_codes)
-    print("value_options.allow_month_or_day_zero = %s" % str(value_options.allow_month_or_day_zero))
 
     try:
         kgtk_file: typing.Optional[Path]

From 44538259570bf25e1a3d067867e1c216ae7edce0 Mon Sep 17 00:00:00 2001
From: Naren <naren954@gmail.com>
Date: Sat, 2 May 2020 18:06:37 -0700
Subject: [PATCH 055/278] escape quotes when necessary

---
 kgtk/cli/import_ntriples.py |  2 +-
 kgtk/cli/import_wikidata.py | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/kgtk/cli/import_ntriples.py b/kgtk/cli/import_ntriples.py
index 49f63a10d..49cabeb5a 100644
--- a/kgtk/cli/import_ntriples.py
+++ b/kgtk/cli/import_ntriples.py
@@ -111,7 +111,7 @@ def run(input_file, output_file, limit):
                             if '@' in subject:
                                 str_parts = subject.split('@')
                                 final_value = '\'' + \
-                                    str_parts[0] + '\'@' + str_parts[1]
+                                    str_parts[0].replace("'","\\'") + '\'@' + str_parts[1]
                             else:
                                 final_value = '\"' + str(subject) + '\"'
                     final_row.append(final_value)
diff --git a/kgtk/cli/import_wikidata.py b/kgtk/cli/import_wikidata.py
index 2c242f3e7..797bd3a90 100644
--- a/kgtk/cli/import_wikidata.py
+++ b/kgtk/cli/import_wikidata.py
@@ -160,7 +160,7 @@ def process(self,line,node_file,edge_file,qual_file,lang,doc_id):
                             lang_label = labels.get(lang, None)
                             if lang_label:
                                 row.append(
-                                    '\'' + lang_label['value'] + '\'' + "@" + lang)
+                                    '\'' + lang_label['value'].replace("'","\\'") + '\'' + "@" + lang)
                             else:
                                 row.append("")
                         else:
@@ -173,7 +173,7 @@ def process(self,line,node_file,edge_file,qual_file,lang,doc_id):
                             lang_descr = descriptions.get(lang, None)
                             if lang_descr:
                                 row.append(
-                                    '\'' + lang_descr['value'] + '\'' + "@" + lang)
+                                    '\'' + lang_descr['value'].replace("'","\\'") + '\'' + "@" + lang)
                             else:
                                 row.append("")
                         else:
@@ -187,7 +187,7 @@ def process(self,line,node_file,edge_file,qual_file,lang,doc_id):
                                 alias_list = []
                                 for item in lang_aliases:
                                     alias_list.append(
-                                        '\'' + item['value'] + '\'' + "@" + lang)
+                                        '\'' + item['value'].replace("'","\\'") + '\'' + "@" + lang)
                                 row.append("|".join(alias_list))
                             else:
                                 row.append('')
@@ -269,9 +269,9 @@ def process(self,line,node_file,edge_file,qual_file,lang,doc_id):
                                             val['time'][1:] + '/' + str(val['precision'])
                                     elif typ == 'monolingualtext':
                                         value = '\'' + \
-                                            val['text'] + '\'' + '@' + val['language']
+                                            val['text'].replace("'","\\'") + '\'' + '@' + val['language']
                                     else:
-                                        value = '\"' + val + '\"'
+                                        value = '\"' + val.replace('"','\\"') + '\"'
                                     if edge_file:
                                         erows.append([sid,
                                                      qnode,
@@ -355,9 +355,9 @@ def process(self,line,node_file,edge_file,qual_file,lang,doc_id):
                                                                 val['time'][1:] + '/' + str(val['precision'])
                                                         elif typ == 'monolingualtext':
                                                             value = '\'' + \
-                                                                val['text'] + '\'' + '@' + val['language']
+                                                                val['text'].replace("'","\\'") + '\'' + '@' + val['language']
                                                         else:
-                                                            value = '\"' + val + '\"'
+                                                            value = '\"' + val.replace('"','\\"') + '\"'
                                                         qrows.append(
                                                             [
                                                                 tempid,

From a5032e5c36557d3e836b47e9bef0a5090ccdd469 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Sun, 3 May 2020 10:33:30 -0700
Subject: [PATCH 056/278] Check for three-character language codes with
 suffixes.

---
 kgtk/join/kgtkvalue.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 41dbd8b94..a20fe47f2 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -466,11 +466,23 @@ def is_valid_language_qualified_string(self, idx: typing.Optional[int] = None)->
         if "-" in lang:
             (lang, country_or_dialect) = lang.split("-", 1)
 
-            # Assume that this is a two-character code.  If necessary,
-            # we can try three-character codes, too.
-            if  pycountry.languages.get(alpha_2=lang) is not None:
-                # Note: we didn't check the country_or_dialect portion.
+            # TODO: refactor so this code isn't duplicated?
+            if len(lang) == 2:
+                # Two-character language codes.
+                if pycountry.languages.get(alpha_2=lang) is not None:
+                    return True
+
+            elif len(lang) == 3:
+                # Three-character language codes.
+                if pycountry.languages.get(alpha_3=lang) is not None:
+                    return True
+
+            # Perhaps this is a collective (language family) code from ISO 639-5?
+            try:
+                iso639.languages.get(part5=lang)
                 return True
+            except KeyError:
+                pass
 
         # If there's a table of additional language codes, check there:
         if self.options.allow_additional_language_codes and lang in self.options.additional_language_codes:

From 1045c18f56630669fcb591efaf9fe67cf6663e1b Mon Sep 17 00:00:00 2001
From: Naren <naren954@gmail.com>
Date: Sun, 3 May 2020 14:48:01 -0700
Subject: [PATCH 057/278] escape | character

---
 kgtk/cli/import_wikidata.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kgtk/cli/import_wikidata.py b/kgtk/cli/import_wikidata.py
index 797bd3a90..abf623746 100644
--- a/kgtk/cli/import_wikidata.py
+++ b/kgtk/cli/import_wikidata.py
@@ -56,7 +56,7 @@ def add_arguments(parser):
         type=str,
         dest="lang",
         default="en",
-        help='language to extract, default en')
+        help='languages to extract, comma separated, default en')
     parser.add_argument(
         "--source",
         action="store",
@@ -159,6 +159,7 @@ def process(self,line,node_file,edge_file,qual_file,lang,doc_id):
                         if labels:
                             lang_label = labels.get(lang, None)
                             if lang_label:
+                                lang_label['value']=lang_label['value'].replace('|','\\|')
                                 row.append(
                                     '\'' + lang_label['value'].replace("'","\\'") + '\'' + "@" + lang)
                             else:
@@ -172,6 +173,7 @@ def process(self,line,node_file,edge_file,qual_file,lang,doc_id):
                         if descriptions:
                             lang_descr = descriptions.get(lang, None)
                             if lang_descr:
+                                lang_descr['value']=lang_descr['value'].replace('|','\\|')
                                 row.append(
                                     '\'' + lang_descr['value'].replace("'","\\'") + '\'' + "@" + lang)
                             else:
@@ -186,6 +188,7 @@ def process(self,line,node_file,edge_file,qual_file,lang,doc_id):
                             if lang_aliases:
                                 alias_list = []
                                 for item in lang_aliases:
+                                    item['value']=item['value'].replace('|','\\|')
                                     alias_list.append(
                                         '\'' + item['value'].replace("'","\\'") + '\'' + "@" + lang)
                                 row.append("|".join(alias_list))

From 4d8d015f282a8554d20d787f52434467c240c6f2 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Sun, 3 May 2020 17:13:28 -0700
Subject: [PATCH 058/278] Refactor the KgtkValueOptions ArgumentParser
 arguments.

---
 kgtk/cli/validate.py    | 19 ++++---------------
 kgtk/join/edgereader.py |  7 ++-----
 kgtk/join/kgtkreader.py | 24 +++---------------------
 kgtk/join/kgtkvalue.py  | 40 ++++++++++++++++++++++++++++++++++++----
 kgtk/join/nodereader.py |  7 ++-----
 5 files changed, 47 insertions(+), 50 deletions(-)

diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 7c28d38b7..470304e0f 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -35,21 +35,6 @@ def add_arguments(parser):
     """
     parser.add_argument(      "kgtk_files", nargs="*", help="The KGTK file(s) to validate. May be omitted or '-' for stdin.", type=Path)
 
-    parser.add_argument(      "--additional-language-codes", dest="additional_language_codes",
-                              help="Additional language codes.", nargs="*", default=DEFAULT_ADDITIONAL_LANGUAGE_CODES)
-    
-    parser.add_argument(      "--allow-additional-language-codes", dest="allow_additional_language_codes",
-                              help="Allow certain language codes not found in the current version of ISO 639-3 or ISO 639-5.", action='store_true')
-    
-    parser.add_argument(      "--allow-lax-strings", dest="allow_lax_strings",
-                              help="Do not check if double quotes are backslashed inside strings.", action='store_true')
-    
-    parser.add_argument(      "--allow-lax-lq-strings", dest="allow_lax_lq_strings",
-                              help="Do not check if single quotes are backslashed inside language qualified strings.", action='store_true')
-    
-    parser.add_argument(      "--allow-month-or-day-zero", dest="allow_month_or_day_zero",
-                              help="Allow month or day zero in dates.", action='store_true')
-    
     parser.add_argument(      "--blank-id-line-action", dest="blank_id_line_action",
                               help="The action to take when a blank id field is detected.",
                               type=ValidationAction, action=EnumNameAction, default=None)
@@ -138,6 +123,10 @@ def add_arguments(parser):
                               help="The action to take when a whitespace line is detected.",
                               type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
 
+    # Note: Any arguments described by KgtkValueOptions.add_arguments(...)
+    # need to be included in the arguments to run(...), below.
+    KgtkValueOptions.add_arguments(parser)
+
 
 def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
         force_column_names: typing.Optional[typing.List[str]] = None,
diff --git a/kgtk/join/edgereader.py b/kgtk/join/edgereader.py
index 0bdb3d4fb..4b8865640 100644
--- a/kgtk/join/edgereader.py
+++ b/kgtk/join/edgereader.py
@@ -165,16 +165,13 @@ def main():
     parser = ArgumentParser()
     KgtkReader.add_shared_arguments(parser)
     EdgeReader.add_arguments(parser)
+    KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
 
     error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
 
     # Build the value parsing option structure.
-    value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=args.allow_month_or_day_zero,
-                                                       allow_lax_strings=args.allow_lax_strings,
-                                                       allow_lax_lq_strings=args.allow_lax_lq_strings,
-                                                       allow_additional_language_codes=args.allow_additional_language_codes,
-                                                       additional_language_codes=args.additional_language_codes)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
     er: EdgeReader = EdgeReader.open(args.kgtk_file,
                                      force_column_names=args.force_column_names,
diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index f0831c3d3..c6518daa0 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -21,7 +21,7 @@
 from kgtk.join.gzipprocess import GunzipProcess
 from kgtk.join.kgtkbase import KgtkBase
 from kgtk.join.kgtkformat import KgtkFormat
-from kgtk.join.kgtkvalue import KgtkValue, KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS, DEFAULT_ADDITIONAL_LANGUAGE_CODES
+from kgtk.join.kgtkvalue import KgtkValue, KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=False)
@@ -658,21 +658,6 @@ def to_map(self, row: typing.List[str])->typing.Mapping[str, str]:
     def add_shared_arguments(cls, parser: ArgumentParser):
         parser.add_argument(dest="kgtk_file", help="The KGTK file to read", type=Path, nargs="?")
 
-        parser.add_argument(      "--additional-language-codes", dest="additional_language_codes",
-                                  help="Additional language codes.", nargs="*", default=DEFAULT_ADDITIONAL_LANGUAGE_CODES)
-
-        parser.add_argument(      "--allow-additional-language-codes", dest="allow_additional_language_codes",
-                                  help="Allow certain language codes not found in the current version of ISO 639-3 or ISO 639-5.", action='store_true')
-
-        parser.add_argument(      "--allow-lax-strings", dest="allow_lax_strings",
-                                  help="Do not check if double quotes are backslashed inside strings.", action='store_true')
-
-        parser.add_argument(      "--allow-lax-lq-strings", dest="allow_lax_lq_strings",
-                                  help="Do not check if single quotes are backslashed inside language qualified strings.", action='store_true')
-
-        parser.add_argument(      "--allow-month-or-day-zero", dest="allow_month_or_day_zero",
-                                  help="Allow month or day zero in dates.", action='store_true')
-
         parser.add_argument(      "--blank-required-field-line-action", dest="blank_line_action",
                                   help="The action to take when a line with a blank node1, node2, or id field (per mode) is detected.",
                                   type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
@@ -761,16 +746,13 @@ def main():
     KgtkReader.add_arguments(parser)
     EdgeReader.add_arguments(parser)
     NodeReader.add_arguments(parser)
+    KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
 
     error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
 
     # Build the value parsing option structure.
-    value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=args.allow_month_or_day_zero,
-                                                       allow_lax_strings=args.allow_lax_strings,
-                                                       allow_lax_lq_strings=args.allow_lax_lq_strings,
-                                                       allow_additional_language_codes=args.allow_additional_language_codes,
-                                                       additional_language_codes=args.additional_language_codes)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
     kr: KgtkReader = KgtkReader.open(args.kgtk_file,
                                      force_column_names=args.force_column_names,
diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index a20fe47f2..bbfa8c3b5 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -5,7 +5,7 @@
 
 """
 
-from argparse import ArgumentParser
+from argparse import ArgumentParser, Namespace
 import attr
 import iso639 # type: ignore
 import pycountry # type: ignore
@@ -50,6 +50,33 @@ class KgtkValueOptions:
                                                                                                   iterable_validator=attr.validators.instance_of(list)),
                                                           default=DEFAULT_ADDITIONAL_LANGUAGE_CODES)
     
+
+    @classmethod
+    def add_arguments(cls, parser: ArgumentParser):
+        parser.add_argument(      "--additional-language-codes", dest="additional_language_codes",
+                                  help="Additional language codes.", nargs="*", default=DEFAULT_ADDITIONAL_LANGUAGE_CODES)
+
+        parser.add_argument(      "--allow-additional-language-codes", dest="allow_additional_language_codes",
+                                  help="Allow certain language codes not found in the current version of ISO 639-3 or ISO 639-5.", action='store_true')
+
+        parser.add_argument(      "--allow-lax-strings", dest="allow_lax_strings",
+                                  help="Do not check if double quotes are backslashed inside strings.", action='store_true')
+
+        parser.add_argument(      "--allow-lax-lq-strings", dest="allow_lax_lq_strings",
+                                  help="Do not check if single quotes are backslashed inside language qualified strings.", action='store_true')
+
+        parser.add_argument(      "--allow-month-or-day-zero", dest="allow_month_or_day_zero",
+                                  help="Allow month or day zero in dates.", action='store_true')
+
+    @classmethod
+    # Build the value parsing option structure.
+    def from_args(cls, args: Namespace)->'KgtkValueOptions':
+        return cls(allow_month_or_day_zero=args.allow_month_or_day_zero,
+                   allow_lax_strings=args.allow_lax_strings,
+                   allow_lax_lq_strings=args.allow_lax_lq_strings,
+                   allow_additional_language_codes=args.allow_additional_language_codes,
+                   additional_language_codes=args.additional_language_codes)
+
 DEFAULT_KGTK_VALUE_OPTIONS: KgtkValueOptions = KgtkValueOptions()
 
 @attr.s(slots=True, frozen=False)
@@ -732,19 +759,24 @@ def describe(self, idx: typing.Optional[int] = None)->str:
         else:
             return "Symbol"
 
+    
 def main():
     """
     Test the KGTK value vparser.
     """
-    parser = ArgumentParser()
+    parser: ArgumentParser = ArgumentParser()
     parser.add_argument(dest="values", help="The values(s) to test", type=str, nargs="+")
     parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
     parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
-    args = parser.parse_args()
+    KgtkValueOptions.add_arguments(parser)
+    args: Namespace = parser.parse_args()
+
+    # Build the value parsing option structure.
+    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
     value: str
     for value in args.values:
-        print("%s: %s" % (value, KgtkValue(value).describe()), flush=True)
+        print("%s: %s" % (value, KgtkValue(value, options=value_options).describe()), flush=True)
 
 if __name__ == "__main__":
     main()
diff --git a/kgtk/join/nodereader.py b/kgtk/join/nodereader.py
index 668e8918a..9b50fea02 100644
--- a/kgtk/join/nodereader.py
+++ b/kgtk/join/nodereader.py
@@ -143,16 +143,13 @@ def main():
     parser = ArgumentParser()
     KgtkReader.add_shared_arguments(parser)
     NodeReader.add_arguments(parser)
+    KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
 
     error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
 
     # Build the value parsing option structure.
-    value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=args.allow_month_or_day_zero,
-                                                       allow_lax_strings=args.allow_lax_strings,
-                                                       allow_lax_lq_strings=args.allow_lax_lq_strings,
-                                                       allow_additional_language_codes=args.allow_additional_language_codes,
-                                                       additional_language_codes=args.additional_language_codes)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
     er: NodeReader = NodeReader.open(args.kgtk_file,
                                      force_column_names=args.force_column_names,

From 6d94e179861ec5c23d364f1096156b81d2d4f4fc Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Sun, 3 May 2020 18:21:51 -0700
Subject: [PATCH 059/278] Refactor the language validation code.  Rely on the
 default list of additional language codes.

---
 kgtk/cli/validate.py           |   6 +-
 kgtk/join/kgtkvalue.py         |  81 +++-------------------
 kgtk/join/languagevalidator.py | 123 +++++++++++++++++++++++++++++++++
 3 files changed, 133 insertions(+), 77 deletions(-)
 create mode 100644 kgtk/join/languagevalidator.py

diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 470304e0f..db46d66ee 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -18,7 +18,7 @@
 from kgtk.join.enumnameaction import EnumNameAction
 from kgtk.join.kgtkformat import KgtkFormat
 from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.kgtkvalue import DEFAULT_ADDITIONAL_LANGUAGE_CODES, KgtkValueOptions
+from kgtk.join.kgtkvalue import KgtkValueOptions
 from kgtk.join.validationaction import ValidationAction
 
 def parser():
@@ -148,8 +148,7 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
         invalid_value_action: ValidationAction = ValidationAction.REPORT,
         header_error_action: ValidationAction = ValidationAction.EXIT,
         unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
-        additional_language_codes: typing.List[str] = DEFAULT_ADDITIONAL_LANGUAGE_CODES,
-        allow_additional_language_codes: bool = False,
+        additional_language_codes: typing.Optional[typing.List[str]] = None,
         allow_lax_strings: bool = False,
         allow_lax_lq_strings: bool = False,
         allow_month_or_day_zero: bool = False,
@@ -175,7 +174,6 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
     value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=allow_month_or_day_zero,
                                                        allow_lax_strings=allow_lax_strings,
                                                        allow_lax_lq_strings=allow_lax_lq_strings,
-                                                       allow_additional_language_codes=allow_additional_language_codes,
                                                        additional_language_codes=additional_language_codes)
 
     try:
diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index bbfa8c3b5..fd6aef315 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -1,24 +1,15 @@
 """
 Validate KGTK File data types.
-
-Dimensioned quantities are not supported.
-
 """
 
 from argparse import ArgumentParser, Namespace
 import attr
-import iso639 # type: ignore
-import pycountry # type: ignore
 import re
 import sys
 import typing
 
 from kgtk.join.kgtkformat import KgtkFormat
-
-DEFAULT_ADDITIONAL_LANGUAGE_CODES: typing.List[str] = [
-    "mo", # Retired, replaced by the codes for Romanian, but still appearing in wikidata.
-]
-
+from kgtk.join.languagevalidator import LanguageValidator
 
 @attr.s(slots=True, frozen=True)
 class KgtkValueOptions:
@@ -41,23 +32,17 @@ class KgtkValueOptions:
     # check if internal single quotes are excaped by backslash.
     allow_lax_lq_strings: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     
-    # Shall we allow additional language codes?
-    allow_additional_language_codes: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-
     # If this list gets long, we may want to turn it into a map to make lookup
     # more efficient.
-    additional_language_codes: typing.List[str] = attr.ib(validator=attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
-                                                                                                  iterable_validator=attr.validators.instance_of(list)),
-                                                          default=DEFAULT_ADDITIONAL_LANGUAGE_CODES)
+    additional_language_codes: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
+                                                                                                                   iterable_validator=attr.validators.instance_of(list)),
+                                                                           default=None)
     
 
     @classmethod
     def add_arguments(cls, parser: ArgumentParser):
         parser.add_argument(      "--additional-language-codes", dest="additional_language_codes",
-                                  help="Additional language codes.", nargs="*", default=DEFAULT_ADDITIONAL_LANGUAGE_CODES)
-
-        parser.add_argument(      "--allow-additional-language-codes", dest="allow_additional_language_codes",
-                                  help="Allow certain language codes not found in the current version of ISO 639-3 or ISO 639-5.", action='store_true')
+                                  help="Additional language codes.", nargs="*", default=None)
 
         parser.add_argument(      "--allow-lax-strings", dest="allow_lax_strings",
                                   help="Do not check if double quotes are backslashed inside strings.", action='store_true')
@@ -74,7 +59,6 @@ def from_args(cls, args: Namespace)->'KgtkValueOptions':
         return cls(allow_month_or_day_zero=args.allow_month_or_day_zero,
                    allow_lax_strings=args.allow_lax_strings,
                    allow_lax_lq_strings=args.allow_lax_lq_strings,
-                   allow_additional_language_codes=args.allow_additional_language_codes,
                    additional_language_codes=args.additional_language_codes)
 
 DEFAULT_KGTK_VALUE_OPTIONS: KgtkValueOptions = KgtkValueOptions()
@@ -466,57 +450,8 @@ def is_valid_language_qualified_string(self, idx: typing.Optional[int] = None)->
         lang: str = m.group("lang").lower()
         # print("lang: %s" % lang)
 
-        if len(lang) == 2:
-            # Two-character language codes.
-            if pycountry.languages.get(alpha_2=lang) is not None:
-                return True
-
-        elif len(lang) == 3:
-            # Three-character language codes.
-           if pycountry.languages.get(alpha_3=lang) is not None:
-               return True
-
-           # Perhaps this is a collective (language family) code from ISO 639-5?
-           try:
-               iso639.languages.get(part5=lang)
-               return True
-           except KeyError:
-               pass
-
-        # Wikidata contains entries such as:
-        # 'panamenha'@pt-br      # language code followed by country code
-        # 'Ecuador'@es-formal    # language code followed by dialect name
-        #
-        # If we see a dash, we'll check the language code by itself.
-        # save_lang: str = lang # for the debug print below.
-        country_or_dialect: str = ""
-        if "-" in lang:
-            (lang, country_or_dialect) = lang.split("-", 1)
-
-            # TODO: refactor so this code isn't duplicated?
-            if len(lang) == 2:
-                # Two-character language codes.
-                if pycountry.languages.get(alpha_2=lang) is not None:
-                    return True
-
-            elif len(lang) == 3:
-                # Three-character language codes.
-                if pycountry.languages.get(alpha_3=lang) is not None:
-                    return True
-
-            # Perhaps this is a collective (language family) code from ISO 639-5?
-            try:
-                iso639.languages.get(part5=lang)
-                return True
-            except KeyError:
-                pass
-
-        # If there's a table of additional language codes, check there:
-        if self.options.allow_additional_language_codes and lang in self.options.additional_language_codes:
-            return True
-
-        # print("save_lang: %s lang: %s country_or_dialect: %s" % (save_lang, lang, country_or_dialect))
-        return False
+        return LanguageValidator.validate(lang,
+                                          additional_language_codes=self.options.additional_language_codes)
 
     def is_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
         """
@@ -762,7 +697,7 @@ def describe(self, idx: typing.Optional[int] = None)->str:
     
 def main():
     """
-    Test the KGTK value vparser.
+    Test the KGTK value parser.
     """
     parser: ArgumentParser = ArgumentParser()
     parser.add_argument(dest="values", help="The values(s) to test", type=str, nargs="+")
diff --git a/kgtk/join/languagevalidator.py b/kgtk/join/languagevalidator.py
new file mode 100644
index 000000000..771859d71
--- /dev/null
+++ b/kgtk/join/languagevalidator.py
@@ -0,0 +1,123 @@
+"""
+Validate language qualifiers.
+"""
+
+from argparse import ArgumentParser, Namespace
+import attr
+import iso639 # type: ignore
+import pycountry # type: ignore
+import re
+import sys
+import typing
+
+# Problem: pycountry incorporates the Debian team's ISO 639-3 table,
+# which as of 03-May-2020 has not been updated in four years!
+# Meanwhile, iso639 (from pypi iso-639) has an ISO 639-3 table
+# from 2015-05-05.
+#
+# https://salsa.debian.org/iso-codes-team/iso-codes/-/blob/master/iso_639-3/iso_639_3.tab 
+# https://pypi.org/project/iso-639/
+#
+# Problem: Wikidata may contain obsolete language codes which have been
+# removed from the standard indices.
+#
+# Example: "mo"
+#
+# Solution: We will keep a list of additional language codes.
+@attr.s(slots=True, frozen=True)
+class LanguageValidator:
+
+    DEFAULT_ADDITIONAL_LANGUAGE_CODES: typing.List[str] = [
+        # New codes:
+        "cnr", # Montenegrin.  Added 21-Dec-2017. https://iso639-3.sil.org/code/cnr
+        "hyw", # Wester Armenian.  Added 23-Jan-2018. https://iso639-3.sil.org/code/hyw
+
+        # Obsolete codes:
+        "mo", # Retired, replaced by the codes for Romanian, but still appearing in wikidata.
+        "eml", # Emiliano-Romagnolo. Split and retired 16-Jan-2009. https://iso639-3.sil.org/code/eml
+    ]
+
+    @classmethod
+    def validate(cls,
+                 lang: str,
+                 additional_language_codes: typing.Optional[typing.List[str]]=None,
+                 verbose: bool = False,
+    )->bool:
+        # Wikidata contains entries such as:
+        # 'panamenha'@pt-br      # language code followed by country code
+        # 'Ecuador'@es-formal    # language code followed by dialect name
+        #
+        # If we see a dash, we'll check the language code by itself.
+        if verbose:
+            print("Validating '%s'" % lang)
+
+        save_lang: str = lang # for the debug prints below.
+        country_or_dialect: str = ""
+        if "-" in lang:
+            (lang, country_or_dialect) = lang.split("-", 1)
+            if verbose:
+                print("'%s' split into '%s' and '%s'" % (save_lang, lang, country_or_dialect))
+
+        if len(lang) == 2:
+            # Two-character language codes.
+            if pycountry.languages.get(alpha_2=lang) is not None:
+                if verbose:
+                    print("pycountry.languages.get(alpha_2=lang) succeeded")
+                return True
+
+        elif len(lang) == 3:
+            # Three-character language codes.
+            if pycountry.languages.get(alpha_3=lang) is not None:
+                if verbose:
+                    print("pycountry.languages.get(alpha_3=lang) succeeded")
+                return True
+
+        # Perhaps this is a collective (language family) code from ISO 639-5?
+        try:
+            iso639.languages.get(part5=lang)
+            if verbose:
+                print("iso639.languages.get(part5=lang) succeeded")
+            return True
+        except KeyError:
+            pass
+
+        # If there's a table of additional language codes, check there:
+        if additional_language_codes is None:
+            if verbose:
+                print("Using the default list of additional language codes.")
+            additional_language_codes = LanguageValidator.DEFAULT_ADDITIONAL_LANGUAGE_CODES
+        else:
+            if verbose:
+                print("Using a custom list of %d additional language codes." % len(additional_language_codes))
+        if lang in additional_language_codes:
+            if verbose:
+                print("found in the table of additional languages.")
+            return True
+
+        if verbose:
+            print("Not found.")
+        return False
+
+def main():
+    """
+    Test the language validator.
+    """
+    parser: ArgumentParser = ArgumentParser()
+    parser.add_argument(dest="values", help="The values(s) to test", type=str, nargs="+")
+
+    parser.add_argument(      "--additional-language-codes", dest="additional_language_codes",
+                              help="Additional language codes.", nargs="*", default=None)
+
+    parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
+    args: Namespace = parser.parse_args()
+
+    value: str
+    for value in args.values:
+        result: bool = LanguageValidator.validate(value,
+                                                  additional_language_codes=args.additional_language_codes,
+                                                  verbose=args.verbose)
+        
+        print("%s: %s" % (value, str(result)), flush=True)
+
+if __name__ == "__main__":
+    main()

From 4423b714b26e67d828af8e9c193d459dbe6e0b24 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Sun, 3 May 2020 18:24:58 -0700
Subject: [PATCH 060/278] The additional language codes list is not optional in
 the value options.

---
 kgtk/join/kgtkvalue.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index fd6aef315..9d95418b8 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -34,8 +34,8 @@ class KgtkValueOptions:
     
     # If this list gets long, we may want to turn it into a map to make lookup
     # more efficient.
-    additional_language_codes: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
-                                                                                                                   iterable_validator=attr.validators.instance_of(list)),
+    additional_language_codes: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.optional(attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
+                                                                                                                                            iterable_validator=attr.validators.instance_of(list))),
                                                                            default=None)
     
 

From 4c04282e924b88a66c90487b3c49150769716964 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Sun, 3 May 2020 19:25:19 -0700
Subject: [PATCH 061/278] Use a language validator.  Control it by value
 options.

---
 kgtk/join/edgereader.py        |  2 +-
 kgtk/join/kgtkreader.py        |  3 +-
 kgtk/join/kgtkvalue.py         | 69 +-----------------------------
 kgtk/join/kgtkvalueoptions.py  | 77 ++++++++++++++++++++++++++++++++++
 kgtk/join/languagevalidator.py | 33 +++++++++++----
 kgtk/join/nodereader.py        |  2 +-
 6 files changed, 108 insertions(+), 78 deletions(-)
 create mode 100644 kgtk/join/kgtkvalueoptions.py

diff --git a/kgtk/join/edgereader.py b/kgtk/join/edgereader.py
index 4b8865640..0d687988e 100644
--- a/kgtk/join/edgereader.py
+++ b/kgtk/join/edgereader.py
@@ -13,7 +13,7 @@
 from kgtk.join.closableiter import ClosableIter
 from kgtk.join.enumnameaction import EnumNameAction
 from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.kgtkvalue import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
+from kgtk.join.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=False)
diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index c6518daa0..326b905bf 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -21,7 +21,8 @@
 from kgtk.join.gzipprocess import GunzipProcess
 from kgtk.join.kgtkbase import KgtkBase
 from kgtk.join.kgtkformat import KgtkFormat
-from kgtk.join.kgtkvalue import KgtkValue, KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
+from kgtk.join.kgtkvalue import KgtkValue
+from kgtk.join.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=False)
diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 9d95418b8..677bcdd51 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -9,60 +9,9 @@
 import typing
 
 from kgtk.join.kgtkformat import KgtkFormat
+from kgtk.join.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
 from kgtk.join.languagevalidator import LanguageValidator
 
-@attr.s(slots=True, frozen=True)
-class KgtkValueOptions:
-    """
-    These options will affect some aspects of value processing. They are in a
-    seperate class for efficiency.
-    """
-    
-    # Allow month 00 or day 00 in dates?  This isn't really allowed by ISO
-    # 8601, but appears in wikidata.
-    allow_month_or_day_zero: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-
-    # When allow_lax_strings is true, strings will be checked to see if they
-    # start and end with double quote ("), but we won't check if internal
-    # double quotes are excaped by backslash.
-    allow_lax_strings: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-
-    # When allow_lax_lq_strings is true, language qualified strings will be
-    # checked to see if they start and end with single quote ('), but we won't
-    # check if internal single quotes are excaped by backslash.
-    allow_lax_lq_strings: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-    
-    # If this list gets long, we may want to turn it into a map to make lookup
-    # more efficient.
-    additional_language_codes: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.optional(attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
-                                                                                                                                            iterable_validator=attr.validators.instance_of(list))),
-                                                                           default=None)
-    
-
-    @classmethod
-    def add_arguments(cls, parser: ArgumentParser):
-        parser.add_argument(      "--additional-language-codes", dest="additional_language_codes",
-                                  help="Additional language codes.", nargs="*", default=None)
-
-        parser.add_argument(      "--allow-lax-strings", dest="allow_lax_strings",
-                                  help="Do not check if double quotes are backslashed inside strings.", action='store_true')
-
-        parser.add_argument(      "--allow-lax-lq-strings", dest="allow_lax_lq_strings",
-                                  help="Do not check if single quotes are backslashed inside language qualified strings.", action='store_true')
-
-        parser.add_argument(      "--allow-month-or-day-zero", dest="allow_month_or_day_zero",
-                                  help="Allow month or day zero in dates.", action='store_true')
-
-    @classmethod
-    # Build the value parsing option structure.
-    def from_args(cls, args: Namespace)->'KgtkValueOptions':
-        return cls(allow_month_or_day_zero=args.allow_month_or_day_zero,
-                   allow_lax_strings=args.allow_lax_strings,
-                   allow_lax_lq_strings=args.allow_lax_lq_strings,
-                   additional_language_codes=args.additional_language_codes)
-
-DEFAULT_KGTK_VALUE_OPTIONS: KgtkValueOptions = KgtkValueOptions()
-
 @attr.s(slots=True, frozen=False)
 class KgtkValue(KgtkFormat):
     value: str = attr.ib(validator=attr.validators.instance_of(str))
@@ -418,19 +367,6 @@ def is_language_qualified_string(self, idx: typing.Optional[int] = None)->bool:
     def is_valid_language_qualified_string(self, idx: typing.Optional[int] = None)->bool:
         """Return False if this value is a list and idx is None.
         Otherwise, return True if the value looks like a language-qualified string.
-
-        The language code may be a two- or three-character code from ISO
-        639-3, which replaces ISO 639-1 and ISO 639-2.  In addition, wikidata
-        may include language codes, such as 'mo', that have been retired.  The
-        additional_language_codes table supports these codes, when allowed.
-
-        Wikidata may also contain collective language codes, such as "nah",
-        referring the the Nahuatl languages. These codes from ISO 639-5 are
-        accepted as a fallback when ISO 639-3 lookup fails.
-
-        https://meta.wikimedia.org/wiki/Special_language_codes
-        https://en.wikipedia.org/wiki/Template:ISO_639_name_be-tarask
-
         """
         if self.is_list() and idx is None:
             return False
@@ -450,8 +386,7 @@ def is_valid_language_qualified_string(self, idx: typing.Optional[int] = None)->
         lang: str = m.group("lang").lower()
         # print("lang: %s" % lang)
 
-        return LanguageValidator.validate(lang,
-                                          additional_language_codes=self.options.additional_language_codes)
+        return LanguageValidator.validate(lang, options=self.options)
 
     def is_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
         """
diff --git a/kgtk/join/kgtkvalueoptions.py b/kgtk/join/kgtkvalueoptions.py
new file mode 100644
index 000000000..e29ac6367
--- /dev/null
+++ b/kgtk/join/kgtkvalueoptions.py
@@ -0,0 +1,77 @@
+"""
+KGTK value processing options.
+"""
+
+from argparse import ArgumentParser, Namespace
+import attr
+import sys
+import typing
+
+from kgtk.join.kgtkformat import KgtkFormat
+from kgtk.join.languagevalidator import LanguageValidator
+
+@attr.s(slots=True, frozen=True)
+class KgtkValueOptions:
+    """
+    These options will affect some aspects of value processing. They are in a
+    seperate class for efficiency.
+    """
+    
+    # Allow month 00 or day 00 in dates?  This isn't really allowed by ISO
+    # 8601, but appears in wikidata.
+    allow_month_or_day_zero: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+
+    # When allow_lax_strings is true, strings will be checked to see if they
+    # start and end with double quote ("), but we won't check if internal
+    # double quotes are excaped by backslash.
+    allow_lax_strings: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+
+    # When allow_lax_lq_strings is true, language qualified strings will be
+    # checked to see if they start and end with single quote ('), but we won't
+    # check if internal single quotes are excaped by backslash.
+    allow_lax_lq_strings: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+    
+    # If this list gets long, we may want to turn it into a map to make lookup
+    # more efficient.
+    additional_language_codes: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.optional(attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
+                                                                                                                                            iterable_validator=attr.validators.instance_of(list))),
+                                                                           default=None)
+    
+
+    @classmethod
+    def add_arguments(cls, parser: ArgumentParser):
+        parser.add_argument(      "--additional-language-codes", dest="additional_language_codes",
+                                  help="Additional language codes.", nargs="*", default=None)
+
+        parser.add_argument(      "--allow-lax-strings", dest="allow_lax_strings",
+                                  help="Do not check if double quotes are backslashed inside strings.", action='store_true')
+
+        parser.add_argument(      "--allow-lax-lq-strings", dest="allow_lax_lq_strings",
+                                  help="Do not check if single quotes are backslashed inside language qualified strings.", action='store_true')
+
+        parser.add_argument(      "--allow-month-or-day-zero", dest="allow_month_or_day_zero",
+                                  help="Allow month or day zero in dates.", action='store_true')
+
+    @classmethod
+    # Build the value parsing option structure.
+    def from_args(cls, args: Namespace)->'KgtkValueOptions':
+        return cls(allow_month_or_day_zero=args.allow_month_or_day_zero,
+                   allow_lax_strings=args.allow_lax_strings,
+                   allow_lax_lq_strings=args.allow_lax_lq_strings,
+                   additional_language_codes=args.additional_language_codes)
+
+DEFAULT_KGTK_VALUE_OPTIONS: KgtkValueOptions = KgtkValueOptions()
+
+def main():
+    """
+    Test the KGTK value options.
+    """
+    parser: ArgumentParser = ArgumentParser()
+    KgtkValueOptions.add_arguments(parser)
+    args: Namespace = parser.parse_args()
+
+    # Build the value parsing option structure.
+    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
+
+if __name__ == "__main__":
+    main()
diff --git a/kgtk/join/languagevalidator.py b/kgtk/join/languagevalidator.py
index 771859d71..b22366a2c 100644
--- a/kgtk/join/languagevalidator.py
+++ b/kgtk/join/languagevalidator.py
@@ -6,10 +6,10 @@
 import attr
 import iso639 # type: ignore
 import pycountry # type: ignore
-import re
-import sys
 import typing
 
+from kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
+
 # Problem: pycountry incorporates the Debian team's ISO 639-3 table,
 # which as of 03-May-2020 has not been updated in four years!
 # Meanwhile, iso639 (from pypi iso-639) has an ISO 639-3 table
@@ -26,6 +26,19 @@
 # Solution: We will keep a list of additional language codes.
 @attr.s(slots=True, frozen=True)
 class LanguageValidator:
+    """
+    The language code may be a two- or three-character code from ISO
+    639-3, which replaces ISO 639-1 and ISO 639-2.  In addition, wikidata
+    may include language codes, such as 'mo', that have been retired.  The
+    additional_language_codes table supports these codes, when allowed.
+
+    Wikidata may also contain collective language codes, such as "nah",
+    referring the the Nahuatl languages. These codes from ISO 639-5 are
+    accepted as a fallback when ISO 639-3 lookup fails.
+
+    https://meta.wikimedia.org/wiki/Special_language_codes
+    https://en.wikipedia.org/wiki/Template:ISO_639_name_be-tarask
+    """
 
     DEFAULT_ADDITIONAL_LANGUAGE_CODES: typing.List[str] = [
         # New codes:
@@ -33,14 +46,15 @@ class LanguageValidator:
         "hyw", # Wester Armenian.  Added 23-Jan-2018. https://iso639-3.sil.org/code/hyw
 
         # Obsolete codes:
-        "mo", # Retired, replaced by the codes for Romanian, but still appearing in wikidata.
+        "mo", # Moldavian. Retired 3-Nov-2008. Replaced by the codes for Romanian.
+              # http://www.personal.psu.edu/ejp10/blogs/gotunicode/2008/11/language-tage-mo-for-moldovan.html
         "eml", # Emiliano-Romagnolo. Split and retired 16-Jan-2009. https://iso639-3.sil.org/code/eml
     ]
 
     @classmethod
     def validate(cls,
                  lang: str,
-                 additional_language_codes: typing.Optional[typing.List[str]]=None,
+                 options: KgtkValueOptions=DEFAULT_KGTK_VALUE_OPTIONS,
                  verbose: bool = False,
     )->bool:
         # Wikidata contains entries such as:
@@ -82,13 +96,16 @@ def validate(cls,
             pass
 
         # If there's a table of additional language codes, check there:
-        if additional_language_codes is None:
+        additional_language_codes: typing.List[str]
+        if options.additional_language_codes is not None:
+            additional_language_codes = options.additional_language_codes
             if verbose:
-                print("Using the default list of additional language codes.")
-            additional_language_codes = LanguageValidator.DEFAULT_ADDITIONAL_LANGUAGE_CODES
+                print("Using a custom list of %d additional language codes." % len(additional_language_codes))
         else:
             if verbose:
-                print("Using a custom list of %d additional language codes." % len(additional_language_codes))
+                print("Using the default list of additional language codes.")
+            additional_language_codes = LanguageValidator.DEFAULT_ADDITIONAL_LANGUAGE_CODES
+
         if lang in additional_language_codes:
             if verbose:
                 print("found in the table of additional languages.")
diff --git a/kgtk/join/nodereader.py b/kgtk/join/nodereader.py
index 9b50fea02..0f83d8b8a 100644
--- a/kgtk/join/nodereader.py
+++ b/kgtk/join/nodereader.py
@@ -13,7 +13,7 @@
 from kgtk.join.closableiter import ClosableIter
 from kgtk.join.enumnameaction import EnumNameAction
 from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.kgtkvalue import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
+from kgtk.join.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=False)

From 0b76102e28ab1cd4b66421a7bb0590b8559ec279 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Sun, 3 May 2020 19:33:14 -0700
Subject: [PATCH 062/278] Compete refactoring the KgtkValueOptions and
 LanguageValidator.

---
 kgtk/cli/validate.py           |  2 +-
 kgtk/join/kgtkvalueoptions.py  |  4 ----
 kgtk/join/languagevalidator.py | 15 ++++++---------
 3 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index db46d66ee..82fc6ab01 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -18,7 +18,7 @@
 from kgtk.join.enumnameaction import EnumNameAction
 from kgtk.join.kgtkformat import KgtkFormat
 from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.kgtkvalue import KgtkValueOptions
+from kgtk.join.kgtkvalueoptions import KgtkValueOptions
 from kgtk.join.validationaction import ValidationAction
 
 def parser():
diff --git a/kgtk/join/kgtkvalueoptions.py b/kgtk/join/kgtkvalueoptions.py
index e29ac6367..d7e0acb29 100644
--- a/kgtk/join/kgtkvalueoptions.py
+++ b/kgtk/join/kgtkvalueoptions.py
@@ -4,12 +4,8 @@
 
 from argparse import ArgumentParser, Namespace
 import attr
-import sys
 import typing
 
-from kgtk.join.kgtkformat import KgtkFormat
-from kgtk.join.languagevalidator import LanguageValidator
-
 @attr.s(slots=True, frozen=True)
 class KgtkValueOptions:
     """
diff --git a/kgtk/join/languagevalidator.py b/kgtk/join/languagevalidator.py
index b22366a2c..5e4eedb7a 100644
--- a/kgtk/join/languagevalidator.py
+++ b/kgtk/join/languagevalidator.py
@@ -8,7 +8,7 @@
 import pycountry # type: ignore
 import typing
 
-from kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
+from kgtk.join.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
 
 # Problem: pycountry incorporates the Debian team's ISO 639-3 table,
 # which as of 03-May-2020 has not been updated in four years!
@@ -121,19 +121,16 @@ def main():
     """
     parser: ArgumentParser = ArgumentParser()
     parser.add_argument(dest="values", help="The values(s) to test", type=str, nargs="+")
-
-    parser.add_argument(      "--additional-language-codes", dest="additional_language_codes",
-                              help="Additional language codes.", nargs="*", default=None)
-
     parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
+    KgtkValueOptions.add_arguments(parser)
     args: Namespace = parser.parse_args()
 
+    # Build the value parsing option structure.
+    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
+
     value: str
     for value in args.values:
-        result: bool = LanguageValidator.validate(value,
-                                                  additional_language_codes=args.additional_language_codes,
-                                                  verbose=args.verbose)
-        
+        result: bool = LanguageValidator.validate(value, options=value_options, verbose=args.verbose)                                   
         print("%s: %s" % (value, str(result)), flush=True)
 
 if __name__ == "__main__":

From 7fbe7407bca49b36d2cccf08ce3663bd8a5b4d35 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Sun, 3 May 2020 19:59:56 -0700
Subject: [PATCH 063/278] Provide a control over language suffix processing. 
 Provide better arguments.

---
 kgtk/cli/validate.py           |  2 ++
 kgtk/join/kgtkvalueoptions.py  | 43 +++++++++++++++++++++++++++++-----
 kgtk/join/languagevalidator.py |  2 +-
 3 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 82fc6ab01..a823bcb5a 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -149,6 +149,7 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
         header_error_action: ValidationAction = ValidationAction.EXIT,
         unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
         additional_language_codes: typing.Optional[typing.List[str]] = None,
+        allow_language_suffixes: bool = False,
         allow_lax_strings: bool = False,
         allow_lax_lq_strings: bool = False,
         allow_month_or_day_zero: bool = False,
@@ -174,6 +175,7 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
     value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=allow_month_or_day_zero,
                                                        allow_lax_strings=allow_lax_strings,
                                                        allow_lax_lq_strings=allow_lax_lq_strings,
+                                                       allow_language_suffixes=allow_language_suffixes,
                                                        additional_language_codes=additional_language_codes)
 
     try:
diff --git a/kgtk/join/kgtkvalueoptions.py b/kgtk/join/kgtkvalueoptions.py
index d7e0acb29..cd07e5aa0 100644
--- a/kgtk/join/kgtkvalueoptions.py
+++ b/kgtk/join/kgtkvalueoptions.py
@@ -27,6 +27,8 @@ class KgtkValueOptions:
     # check if internal single quotes are excaped by backslash.
     allow_lax_lq_strings: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     
+    allow_language_suffixes: bool = attr.ib(validator=attr.validators.instance_of(bool), default=True)
+
     # If this list gets long, we may want to turn it into a map to make lookup
     # more efficient.
     additional_language_codes: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.optional(attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
@@ -39,19 +41,39 @@ def add_arguments(cls, parser: ArgumentParser):
         parser.add_argument(      "--additional-language-codes", dest="additional_language_codes",
                                   help="Additional language codes.", nargs="*", default=None)
 
-        parser.add_argument(      "--allow-lax-strings", dest="allow_lax_strings",
-                                  help="Do not check if double quotes are backslashed inside strings.", action='store_true')
+        lsgroup= parser.add_mutually_exclusive_group()
+        lsgroup.add_argument(      "--allow-language-suffixes", dest="allow_language_suffixes",
+                                   help="Allow language identifier suffixes starting with a dash.", action='store_true', default=True)
+
+        lsgroup.add_argument(      "--disallow-language-suffixes", dest="allow_language_suffixes",
+                                   help="Disallow language identifier suffixes starting with a dash.", action='store_false')
+
+        laxgroup= parser.add_mutually_exclusive_group()
+        laxgroup.add_argument(      "--allow-lax-strings", dest="allow_lax_strings",
+                                    help="Do not check if double quotes are backslashed inside strings.", action='store_true', default=False)
+
+        laxgroup.add_argument(      "--disallow-lax-strings", dest="allow_lax_strings",
+                                    help="Check if double quotes are backslashed inside strings.", action='store_false')
 
-        parser.add_argument(      "--allow-lax-lq-strings", dest="allow_lax_lq_strings",
-                                  help="Do not check if single quotes are backslashed inside language qualified strings.", action='store_true')
+        lqgroup= parser.add_mutually_exclusive_group()
+        lqgroup.add_argument(      "--allow-lax-lq-strings", dest="allow_lax_lq_strings",
+                                   help="Do not check if single quotes are backslashed inside language qualified strings.", action='store_true', default=False)
 
-        parser.add_argument(      "--allow-month-or-day-zero", dest="allow_month_or_day_zero",
-                                  help="Allow month or day zero in dates.", action='store_true')
+        lqgroup.add_argument(      "--disallow-lax-lq-strings", dest="allow_lax_lq_strings",
+                                   help="Check if single quotes are backslashed inside language qualified strings.", action='store_false')
+
+        md0group= parser.add_mutually_exclusive_group()
+        md0group.add_argument(      "--allow-month-or-day-zero", dest="allow_month_or_day_zero",
+                                    help="Allow month or day zero in dates.", action='store_true', default=False)
+
+        md0group.add_argument(      "--disallow-month-or-day-zero", dest="allow_month_or_day_zero",
+                                    help="Allow month or day zero in dates.", action='store_false')
 
     @classmethod
     # Build the value parsing option structure.
     def from_args(cls, args: Namespace)->'KgtkValueOptions':
         return cls(allow_month_or_day_zero=args.allow_month_or_day_zero,
+                   allow_language_suffixes=args.allow_language_suffixes,
                    allow_lax_strings=args.allow_lax_strings,
                    allow_lax_lq_strings=args.allow_lax_lq_strings,
                    additional_language_codes=args.additional_language_codes)
@@ -69,5 +91,14 @@ def main():
     # Build the value parsing option structure.
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
+    print("allow_month_or_day_zero: %s" % str(value_options.allow_month_or_day_zero))
+    print("allow_lax_strings: %s" % str(value_options.allow_lax_strings))
+    print("allow_lax_lq_strings: %s" % str(value_options.allow_lax_lq_strings))
+    print("allow_language_suffixes: %s" % str(value_options.allow_language_suffixes))
+    if value_options.additional_language_codes is None:
+        print("additional_language_codes: None")
+    else:
+        print("additional_language_codes: [ %s ]" % ", ".join(value_options.additional_language_codes))
+
 if __name__ == "__main__":
     main()
diff --git a/kgtk/join/languagevalidator.py b/kgtk/join/languagevalidator.py
index 5e4eedb7a..4c7c9ff2e 100644
--- a/kgtk/join/languagevalidator.py
+++ b/kgtk/join/languagevalidator.py
@@ -67,7 +67,7 @@ def validate(cls,
 
         save_lang: str = lang # for the debug prints below.
         country_or_dialect: str = ""
-        if "-" in lang:
+        if options.allow_language_suffixes and "-" in lang:
             (lang, country_or_dialect) = lang.split("-", 1)
             if verbose:
                 print("'%s' split into '%s' and '%s'" % (save_lang, lang, country_or_dialect))

From 40f6bb8045a87d8bb0941adf18c64d61e681e7c9 Mon Sep 17 00:00:00 2001
From: Naren <naren954@gmail.com>
Date: Sun, 3 May 2020 22:57:56 -0700
Subject: [PATCH 064/278] accept multiple languages

---
 kgtk/cli/import_wikidata.py | 60 ++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 28 deletions(-)

diff --git a/kgtk/cli/import_wikidata.py b/kgtk/cli/import_wikidata.py
index abf623746..0019fef32 100644
--- a/kgtk/cli/import_wikidata.py
+++ b/kgtk/cli/import_wikidata.py
@@ -129,7 +129,7 @@ def enter(self):
             self.cnt=0
             self.write_mode='w'
 
-        def process(self,line,node_file,edge_file,qual_file,lang,doc_id):
+        def process(self,line,node_file,edge_file,qual_file,languages,doc_id):
             write_mode='a'
             if self.first==True:
                 write_mode='w'
@@ -140,7 +140,6 @@ def process(self,line,node_file,edge_file,qual_file,lang,doc_id):
             nrows=[]
             erows=[]
             qrows=[]
-            site_filter = '{}wiki'.format(lang)
             clean_line = line.strip()
             if clean_line.endswith(b","):
                 clean_line = clean_line[:-1]
@@ -157,13 +156,15 @@ def process(self,line,node_file,edge_file,qual_file,lang,doc_id):
                     if self.parse_labels:
                         labels = obj["labels"]
                         if labels:
-                            lang_label = labels.get(lang, None)
-                            if lang_label:
-                                lang_label['value']=lang_label['value'].replace('|','\\|')
-                                row.append(
-                                    '\'' + lang_label['value'].replace("'","\\'") + '\'' + "@" + lang)
-                            else:
-                                row.append("")
+                            label_list=[]
+                            for lang in languages:
+                                lang_label = labels.get(lang, None)
+                                if lang_label:
+                                    lang_label['value']=lang_label['value'].replace('|','\\|')
+                                    label_list.append(
+                                        '\'' + lang_label['value'].replace("'","\\'") + '\'' + "@" + lang)
+                        if len(label_list)>0:
+                            row.append("|".join(label_list))
                         else:
                             row.append("")
                     row.append(entry_type)
@@ -171,31 +172,33 @@ def process(self,line,node_file,edge_file,qual_file,lang,doc_id):
                     if self.parse_descr:
                         descriptions = obj["descriptions"]
                         if descriptions:
-                            lang_descr = descriptions.get(lang, None)
-                            if lang_descr:
-                                lang_descr['value']=lang_descr['value'].replace('|','\\|')
-                                row.append(
-                                    '\'' + lang_descr['value'].replace("'","\\'") + '\'' + "@" + lang)
-                            else:
-                                row.append("")
+                            descr_list=[]
+                            for lang in languages:
+                                lang_descr = descriptions.get(lang, None)
+                                if lang_descr:
+                                    lang_descr['value']=lang_descr['value'].replace('|','\\|')
+                                    descr_list.append(
+                                        '\'' + lang_descr['value'].replace("'","\\'") + '\'' + "@" + lang)
+                        if len(descr_list)>0:
+                            row.append("|".join(descr_list))
                         else:
                             row.append("")
 
                     if self.parse_aliases:
                         aliases = obj["aliases"]
                         if aliases:
-                            lang_aliases = aliases.get(lang, None)
-                            if lang_aliases:
-                                alias_list = []
-                                for item in lang_aliases:
-                                    item['value']=item['value'].replace('|','\\|')
-                                    alias_list.append(
-                                        '\'' + item['value'].replace("'","\\'") + '\'' + "@" + lang)
-                                row.append("|".join(alias_list))
-                            else:
-                                row.append('')
+                            alias_list = []
+                            for lang in languages:
+                                lang_aliases = aliases.get(lang, None)
+                                if lang_aliases:
+                                    for item in lang_aliases:
+                                        item['value']=item['value'].replace('|','\\|')
+                                        alias_list.append(
+                                            '\'' + item['value'].replace("'","\\'") + '\'' + "@" + lang)
+                        if len(alias_list)>0:
+                            row.append("|".join(alias_list))
                         else:
-                            row.append('')
+                            row.append("")
 
                     #row.append(doc_id)
                     if node_file:
@@ -428,6 +431,7 @@ def process(self,line,node_file,edge_file,qual_file,lang,doc_id):
     
     try:
         start=time.time()
+        languages=lang.split(',')
         if node_file:
             header = ['id','label','type','description','alias']
             with open(node_file+'_header', 'w', newline='') as myfile:
@@ -466,7 +470,7 @@ def process(self,line,node_file,edge_file,qual_file,lang,doc_id):
             for cnt, line in enumerate(file):
                 if limit and cnt >= limit:
                     break
-                pp.add_task(line,node_file,edge_file,qual_file,lang,source)
+                pp.add_task(line,node_file,edge_file,qual_file,languages,source)
         pp.task_done()
         pp.join()
         if node_file:

From 664796e997724be38b70a2b1c4eac08a7da3729c Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Mon, 4 May 2020 11:27:00 -0700
Subject: [PATCH 065/278] add ids, options to sepcify label for properties

---
 kgtk/cli/gt_loader.py | 54 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 46 insertions(+), 8 deletions(-)

diff --git a/kgtk/cli/gt_loader.py b/kgtk/cli/gt_loader.py
index c65514849..b78167725 100644
--- a/kgtk/cli/gt_loader.py
+++ b/kgtk/cli/gt_loader.py
@@ -29,9 +29,25 @@ def add_arguments(parser):
                         help='Graph tool file to dump the graph too - if empty, it will not be saved.')
     parser.add_argument('--output-stats', action='store_true', dest='output_stats',
                         help='do not output the graph but statistics only')
-
-
-def run(filename, directed, compute_degrees, compute_pagerank, compute_hits, log_file, output, output_stats):
+    parser.add_argument('--vertex-in-degree-property', action='store', dest='vertex_in_degree',
+                        default='vertex_in_degree',
+                        help='label for edge: vertex in degree property')
+    parser.add_argument('--vertex-out-degree-property', action='store', dest='vertex_out_degree',
+                        default='vertex_out_degree',
+                        help='label for edge: vertex out degree property')
+    parser.add_argument('--page-rank-property', action='store', dest='vertex_pagerank',
+                        default='vertex_pagerank',
+                        help='label for pank rank property')
+    parser.add_argument('--vertex-hits-authority-property', action='store', dest='vertex_auth',
+                        default='vertex_auth',
+                        help='label for edge: vertext hits authority')
+    parser.add_argument('--vertex-hits-hubs-property', action='store', dest='vertex_hubs',
+                        default='vertex_hubs',
+                        help='label for edge: vertex hits hubs')
+
+
+def run(filename, directed, compute_degrees, compute_pagerank, compute_hits, log_file, output, output_stats,
+        vertex_in_degree, vertex_out_degree, vertex_pagerank, vertex_auth, vertex_hubs):
     from kgtk.exceptions import KGTKException
     def infer_index(h, options=[]):
         for o in options:
@@ -45,6 +61,11 @@ def infer_predicate(h, options=[]):
                 return o
         return ''
 
+    v_prop_dict = {
+        'vertex_pagerank': vertex_pagerank,
+        'vertex_hubs': vertex_hubs,
+        'vertex_auth': vertex_auth
+    }
     try:
         # import modules locally
         import socket
@@ -115,21 +136,38 @@ def infer_predicate(h, options=[]):
                 for n_id, n_label, authority in main_auth:
                     writer.write('%s\t%s\t%f\n' % (n_id, n_label, authority))
 
-            sys.stdout.write('node1\tproperty\tnode2\n')
+            sys.stdout.write('id\tnode1\tproperty\tnode2\n')
+            id_count = 0
             if not output_stats:
                 for e in G2.edges():
                     sid, oid = e
                     lbl = G2.ep[predicate][e]
-                    sys.stdout.write('%s\t%s\t%s\n' % (G2.vp[id_col][sid], lbl, G2.vp[id_col][oid]))
+                    sys.stdout.write(
+                        '%s\t%s\t%s\t%s\n' % (
+                            '{}-{}-{}'.format(G2.vp[id_col][sid], lbl, id_count), G2.vp[id_col][sid], lbl,
+                            G2.vp[id_col][oid]))
+                    id_count += 1
 
+            id_count = 0
             for v in G2.vertices():
                 v_id = G2.vp[id_col][v]
 
-                sys.stdout.write('{}\t{}\t{}\n'.format(v_id, 'vertex_in_degree', v.in_degree()))
-                sys.stdout.write('{}\t{}\t{}\n'.format(v_id, 'vertex_out_degree', v.out_degree()))
+                sys.stdout.write(
+                    '{}\t{}\t{}\t{}\n'.format('{}-{}-{}'.format(v_id, vertex_in_degree, id_count), v_id,
+                                              vertex_in_degree, v.in_degree()))
+                id_count += 1
+                sys.stdout.write(
+                    '{}\t{}\t{}\t{}\n'.format('{}-{}-{}'.format(v_id, vertex_out_degree, id_count), v_id,
+                                              vertex_out_degree, v.out_degree()))
+                id_count += 1
+
                 for vprop in G2.vertex_properties.keys():
                     if vprop == id_col: continue
-                    sys.stdout.write('%s\t%s\t%s\n' % (v_id, vprop, G2.vp[vprop][v]))
+                    sys.stdout.write(
+                        '%s\t%s\t%s\t%s\n' % (
+                            '{}-{}-{}'.format(v_id, v_prop_dict[vprop], id_count), v_id, v_prop_dict[vprop],
+                            G2.vp[vprop][v]))
+                    id_count += 1
 
             if output:
                 writer.write('now saving the graph to %s\n' % output)

From c46b1d2cbf23e7b7e514a92a33d9ffcd00195a67 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 4 May 2020 12:41:20 -0700
Subject: [PATCH 066/278] Documentation cleanup.  Replace erroneous backslash
 with vertical bar in su_unit_pat.

---
 kgtk/join/kgtkvalue.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 677bcdd51..2d95bd4aa 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -178,7 +178,7 @@ def is_number_or_quantity(self, idx: typing.Optional[int] = None)->bool:
     # http://www.csun.edu/~vceed002/ref/measurement/units/units.pdf
     #
     # Note: if Q were in this list, it would conflict with Wikidata nodes (below).
-    si_unit_pat: str = r'(?:m|kg|s|C|K|mol|cd|F|M|A|N|ohms|V|J|Hz|lx|H|Wb|V\W|Pa)'
+    si_unit_pat: str = r'(?:m|kg|s|C|K|mol|cd|F|M|A|N|ohms|V|J|Hz|lx|H|Wb|V|W|Pa)'
     si_power_pat: str = r'(?:-1|2|3)' # Might need more.
     si_combiner_pat: str = r'[./]'
     si_pat: str = r'(?:{si_unit}{si_power}?(?:{si_combiner}{si_unit}{si_power}?)*)'.format(si_unit=si_unit_pat,
@@ -407,9 +407,6 @@ def is_valid_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
         Return False if this value is a list and idx is None.
         Otherwise, return True if the value looks like valid location coordinates.
 
-        Note: The coordinates must look exactly like the examples in KGTK
-        File Format v2, excelt for optional +/- characters.
-
         @043.26193/010.92708
         """
         if self.is_list() and idx is None:

From 2ee03e46ad80ff6cbb64c8011d7a9296fa09f668 Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Mon, 4 May 2020 17:37:31 -0700
Subject: [PATCH 067/278] move main embedding codes to gt/embedding_utils.py,
 add support for kgtk format on multiprocessing

---
 kgtk/cli/text_embedding.py | 682 +++----------------------------------
 kgtk/gt/embedding_utils.py | 676 ++++++++++++++++++++++++++++++++++++
 2 files changed, 716 insertions(+), 642 deletions(-)
 create mode 100644 kgtk/gt/embedding_utils.py

diff --git a/kgtk/cli/text_embedding.py b/kgtk/cli/text_embedding.py
index 35f8e8c35..6083da388 100644
--- a/kgtk/cli/text_embedding.py
+++ b/kgtk/cli/text_embedding.py
@@ -21,621 +21,6 @@
 ]
 
 
-class EmbeddingVector:
-    def __init__(self, model_name=None, query_server=None, cache_config: dict = {}):
-        from sentence_transformers import SentenceTransformer, SentencesDataset, LoggingHandler, losses, models  # type: ignore
-        import logging
-        import re
-        self._logger = logging.getLogger(__name__)
-        from collections import defaultdict
-        if model_name is None:
-            self.model_name = 'bert-base-nli-mean-tokens'
-        # xlnet need to be trained before using, we can't use this for now
-        # elif model_name == "xlnet-base-cased":
-        #     word_embedding_model = models.XLNet('xlnet-base-cased')
-        # # Apply mean pooling to get one fixed sized sentence vector
-        #     pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
-        #                                pooling_mode_mean_tokens=True,
-        #                                pooling_mode_cls_token=False,
-        #                                pooling_mode_max_tokens=False)
-        #     self.model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
-        else:
-            self.model_name = model_name
-        self._logger.info("Using model {}".format(self.model_name))
-        self.model = SentenceTransformer(self.model_name)
-        # setup redis cache server
-        if query_server is None or query_server == "":
-            self.wikidata_server = "https://query.wikidata.org/sparql"
-        else:
-            self.wikidata_server = query_server
-        use_cache = cache_config.get("use_cache", False)
-        if use_cache:
-            import redis
-            host = cache_config.get("host", "dsbox01.isi.edu")
-            port = cache_config.get("port", 6379)
-            self.redis_server = redis.Redis(host=host, port=port, db=0)
-            try:
-                _ = self.redis_server.get("foo")
-                self._logger.debug("Cache server {}:{} connected!".format(host, port))
-            except:
-                self._logger.error("Cache server {}:{} is not able to be connected! Will not use cache!".format(host, port))
-                self.redis_server = None
-        else:
-            self.redis_server = None
-        self.qnodes_descriptions = dict()
-        self.vectors_map = dict()
-        self.property_labels_dict = dict()
-        self.q_node_to_label = dict()
-        self.node_labels = dict()
-        self.vectors_2D = None
-        self.vector_dump_file = None
-        self.gt_nodes = set()
-        self.candidates = defaultdict(dict)
-        self.metadata = []
-        self.gt_indexes = set()
-        self.input_format = ""
-        self.token_pattern = re.compile(r"(?u)\b\w\w+\b")
-
-    def get_sentences_embedding(self, sentences: typing.List[str], qnodes: typing.List[str]):
-        """
-            transform a list of sentences to embedding vectors
-        """
-        from ast import literal_eval
-        if self.redis_server is not None:
-            sentence_embeddings = []
-            for each_node, each_sentence in zip(qnodes, sentences):
-                query_cache_key = each_node + each_sentence
-                if self.model_name != "bert-base-wikipedia-sections-mean-tokens":
-                    query_cache_key += self.model_name
-                cache_res = self.redis_server.get(query_cache_key)
-                if cache_res is not None:
-                    sentence_embeddings.append(literal_eval(cache_res.decode("utf-8")))
-                    # self._logger.error("{} hit!".format(each_node+each_sentence))
-                else:
-                    each_embedding = self.model.encode([each_sentence], show_progress_bar=False)
-                    sentence_embeddings.extend(each_embedding)
-                    self.redis_server.set(query_cache_key, str(each_embedding[0].tolist()))
-        else:
-            sentence_embeddings = self.model.encode(sentences, show_progress_bar=False)
-        return sentence_embeddings
-
-    def send_sparql_query(self, query_body: str):
-        """
-            a simple wrap to send the query and return the returned results
-        """
-        from SPARQLWrapper import SPARQLWrapper, JSON, POST, URLENCODED  # type: ignore
-        qm = SPARQLWrapper(self.wikidata_server)
-        qm.setReturnFormat(JSON)
-        qm.setMethod(POST)
-        qm.setRequestMethod(URLENCODED)
-        self._logger.debug("Sent query is:")
-        self._logger.debug(str(query_body))
-        qm.setQuery(query_body)
-        try:
-            results = qm.query().convert()['results']['bindings']
-            return results
-        except:
-            raise KGTKException("Sending Sparql query to {} failed!".format(self.wikidata_server))
-
-    def _get_labels(self, nodes: typing.List[str]):
-        query_nodes = " ".join(["wd:{}".format(each) for each in nodes])
-        query = """
-        select ?item ?nodeLabel
-        where { 
-          values ?item {""" + query_nodes + """}
-          ?item rdfs:label ?nodeLabel.
-          FILTER(LANG(?nodeLabel) = "en").
-        }
-        """
-        results2 = self.send_sparql_query(query)
-        for each_res in results2:
-            node_id = each_res['item']['value'].split("/")[-1]
-            value = each_res['nodeLabel']['value']
-            self.node_labels[node_id] = value
-
-    def _get_labels_and_descriptions(self, query_qnodes: str, need_find_label: bool, need_find_description: bool):
-        query_body = """
-            select ?item ?itemDescription ?itemLabel
-            where {
-              values ?item {""" + query_qnodes + """ }
-                 SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
-            }
-        """
-        results = self.send_sparql_query(query_body)
-        for each in results:
-            each_node = each['item']['value'].split("/")[-1]
-            if 'itemDescription' in each:
-                description = each['itemDescription']['value']
-            else:
-                description = ""
-            if "itemLabel" in each:
-                label = each['itemLabel']['value']
-            else:
-                label = ""
-            if need_find_label:
-                self.candidates[each_node]["label_properties"] = [label]
-            if need_find_description:
-                self.candidates[each_node]["description_properties"] = [description]
-
-    def _get_property_values(self, query_qnodes, query_part_names, query_part_properties):
-        used_p_node_ids = set()
-        for part_name, part in zip(query_part_names, query_part_properties):
-            if part_name == "isa_properties":
-                self._get_labels(part)
-            for i, each in enumerate(part):
-                if each not in {"label", "description", "all"}:
-                    query_body2 = """
-                    select ?item ?eachPropertyLabel
-                    where {{
-                      values ?item {{{all_nodes}}}
-                    ?item wdt:{qnode} ?eachProperty.
-                      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
-                    }}
-                    """.format(all_nodes=query_qnodes, qnode=each)
-                    results2 = self.send_sparql_query(query_body2)
-
-                    for each_res in results2:
-                        node_id = each_res['item']['value'].split("/")[-1]
-                        value = each_res['eachPropertyLabel']['value']
-                        if part_name == "isa_properties" and self.node_labels[each].endswith("of"):
-                            value = self.node_labels[each] + "||" + value
-                        used_p_node_ids.add(node_id)
-                        if part_name in self.candidates[node_id]:
-                            self.candidates[node_id][part_name] = value
-                        else:
-                            self.candidates[node_id][part_name] = {value}
-        return used_p_node_ids
-
-    def _get_all_properties(self, query_qnodes, used_p_node_ids, properties_list):
-        has_properties_set = set(properties_list[3])
-        query_body3 = """
-                            select DISTINCT ?item ?p_entity ?p_entityLabel
-                            where {
-                              values ?item {""" + query_qnodes + """}
-                              ?item ?p ?o.
-                              FILTER regex(str(?p), "^http://www.wikidata.org/prop/P", "i")
-                              BIND (IRI(REPLACE(STR(?p), "http://www.wikidata.org/prop", "http://www.wikidata.org/entity")) AS ?p_entity) .
-                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
-                            }
-                        """
-        results3 = self.send_sparql_query(query_body3)
-        for each in results3:
-            node_name = each['item']['value'].split("/")[-1]
-            p_node_id = each['p_entity']['value'].split("/")[-1]
-            p_node_label = each['p_entityLabel']['value']
-            if p_node_id not in used_p_node_ids:
-                if properties_list[3] == ["all"] or p_node_id in has_properties_set:
-                    if "has_properties" in self.candidates[node_name]:
-                        self.candidates[node_name]["has_properties"].add(p_node_label)
-                    else:
-                        self.candidates[node_name]["has_properties"] = {p_node_label}
-
-    def get_item_description(self, qnodes: typing.List[str] = None, target_properties: dict = {}):
-        """
-            use sparql query to get the descriptions of given Q nodes
-        """
-        if qnodes is None:
-            qnodes = self.candidates
-        if "all" in target_properties:
-            find_all_properties = True
-        else:
-            find_all_properties = False
-        properties_list = [[] for _ in range(4)]
-        names = ["labels", "descriptions", "isa_properties", "has_properties"]
-        for k, v in target_properties.items():
-            if v == "label_properties":
-                properties_list[0].append(k)
-            elif v == "description_properties":
-                properties_list[1].append(k)
-            elif v == "isa_properties":
-                properties_list[2].append(k)
-            elif v == "has_properties":
-                properties_list[3].append(k)
-
-        sentences_cache_dict = {}
-        if self.redis_server is not None:
-            for each_node in qnodes:
-                cache_res = self.redis_server.get(each_node + str(properties_list))
-                if cache_res is not None:
-                    sentences_cache_dict[each_node] = cache_res.decode("utf-8")
-
-        if len(sentences_cache_dict) > 0:
-            qnodes = set(qnodes) - set(sentences_cache_dict.keys())
-
-        # only need to do query when we still have remained nodes
-        if len(qnodes) > 0:
-            need_find_label = "label" in properties_list[0]
-            need_find_description = "description" in properties_list[1]
-            query_qnodes = ""
-            for each in qnodes:
-                query_qnodes += "wd:{} ".format(each)
-
-            # this is used to get corresponding labels / descriptions
-            if need_find_label or need_find_description:
-                self._get_labels_and_descriptions(query_qnodes, need_find_label, need_find_description)
-
-            if len(properties_list[3]) > len(qnodes):
-                # in this condition, we have too many properties need to be queried, it will waste time
-                # query to get all properties then filtering would save more times
-                find_all_properties = True
-                query_part2_names = names[:3]
-                query_part2_properties = properties_list[:3]
-            else:
-                query_part2_names = names
-                query_part2_properties = properties_list
-            # this is used to get corresponding labels of properties values
-            used_p_node_ids = self._get_property_values(query_qnodes, query_part2_names, query_part2_properties)
-
-            # if need get all properties, we need to run extra query
-            if find_all_properties:
-                self._get_all_properties(query_qnodes, used_p_node_ids, properties_list)
-
-        for each_node_id in qnodes:
-            each_sentence = self.attribute_to_sentence(self.candidates[each_node_id], each_node_id)
-            self.candidates[each_node_id]["sentence"] = each_sentence
-            if self.redis_server is not None:
-                self.redis_server.set(each_node_id + str(properties_list), each_sentence)
-
-        for each_node_id, sentence in sentences_cache_dict.items():
-            self.candidates[each_node_id]["sentence"] = sentence
-
-    def read_input(self, file_path: str, skip_nodes_set: set = None,
-                   input_format: str = "kgtk_format", target_properties: dict = {},
-                   property_labels_dict: dict = {}, black_list_set: set = set()
-                   ):
-        """
-            load the input candidates files
-        """
-        from collections import defaultdict
-        import pandas as pd  # type: ignore
-        import numpy as np
-        import math
-
-        self.property_labels_dict = property_labels_dict
-
-        if input_format == "test_format":
-            self.input_format = input_format
-            input_df = pd.read_csv(file_path)
-            gt = {}
-            count = 0
-            if "GT_kg_id" in input_df.columns:
-                gt_column_id = "GT_kg_id"
-            elif "kg_id" in input_df.columns:
-                gt_column_id = "kg_id"
-            else:
-                raise KGTKException("Can't find ground truth id column! It should either named as `GT_kg_id` or `kg_id`")
-
-            for _, each in input_df.iterrows():
-                if isinstance(each["candidates"], str):
-                    temp = str(each['candidates']).split("|")
-                elif each['candidates'] is np.nan or math.isnan(each['candidates']):
-                    temp = []
-
-                to_remove_q = set()
-                if each[gt_column_id] is np.nan:
-                    self._logger.warning("Ignore NaN gt value form {}".format(str(each)))
-                    each[gt_column_id] = ""
-                gt_nodes = each[gt_column_id].split(" ")
-                label = str(each["label"])
-                if len(gt_nodes) == 0:
-                    self._logger.error("Skip a row with no ground truth node given: as {}".format(str(each)))
-                    continue
-                if label == "":
-                    self._logger.error("Skip a row with no label given: as {}".format(str(each)))
-                    continue
-                temp.extend(gt_nodes)
-
-                for each_q in temp:
-                    self.q_node_to_label[each_q] = label
-                    if skip_nodes_set is not None and each_q in skip_nodes_set:
-                        to_remove_q.add(each_q)
-                temp = set(temp) - to_remove_q
-                count += len(temp)
-                self.gt_nodes.add(each[gt_column_id])
-                self.get_item_description(temp, target_properties)
-
-            self._logger.info("Totally {} rows with {} candidates loaded.".format(str(len(gt)), str(count)))
-
-        elif input_format == "kgtk_format":
-            # assume the input edge file is sorted
-            if "all" in target_properties:
-                _ = target_properties.pop("all")
-                add_all_properties = True
-            else:
-                add_all_properties = False
-
-            self.input_format = input_format
-            with open(file_path, "r") as f:
-                # get header
-                headers = f.readline().replace("\n", "").split("\t")
-                if len(headers) < 3:
-                    raise KGTKException(
-                        "No enough columns found on given input file. Only {} columns given but at least 3 needed.".format(
-                            len(headers)))
-                elif "node" in headers and "property" in headers and "value" in headers:
-                    column_references = {"node": headers.index("node"),
-                                         "property": headers.index("property"),
-                                         "value": headers.index("value")}
-                elif len(headers) == 3:
-                    column_references = {"node": 0,
-                                         "property": 1,
-                                         "value": 2}
-                else:
-                    missing_column = {"node", "property", "value"} - set(headers)
-                    raise KGTKException("Missing column {}".format(missing_column))
-                self._logger.debug("column index information: ")
-                self._logger.debug(str(column_references))
-                # read contents
-                each_node_attributes = {"has_properties": [], "isa_properties": [], "label_properties": [],
-                                        "description_properties": []}
-                current_process_node_id = None
-                for each_line in f:
-                    each_line = each_line.replace("\n", "").split("\t")
-                    node_id = each_line[column_references["node"]]
-                    node_property = each_line[column_references["property"]]
-                    node_value = each_line[column_references["value"]]
-                    # remove @ mark
-                    if "@" in node_value and node_value[0] != "@":
-                        node_value_org = node_value
-                        node_value = node_value[:node_value.index("@")]
-
-                    # remove extra double quote " and single quote '
-                    if node_value[0] == '"' and node_value[-1] == '"':
-                        node_value = node_value[1:-1]
-                    if node_value[0] == "'" and node_value[-1] == "'":
-                        node_value = node_value[1:-1]
-
-                    if current_process_node_id != node_id:
-                        if current_process_node_id is None:
-                            current_process_node_id = node_id
-                        else:
-                            # if we get to next id
-                            # concate all properties into one sentence to represent the Q node
-                            concated_sentence = self.attribute_to_sentence(each_node_attributes, current_process_node_id)
-                            each_node_attributes["sentence"] = concated_sentence
-                            self.candidates[current_process_node_id] = each_node_attributes
-                            # after write down finish, we can cleaer and start parsing next one
-                            each_node_attributes = {"has_properties": [], "isa_properties": [], "label_properties": [],
-                                                    "description_properties": []}
-                            # update to new id
-                            current_process_node_id = node_id
-
-                    if node_property in target_properties:
-                        each_node_attributes[target_properties[node_property]].append(node_value)
-                    if add_all_properties and each_line[column_references["value"]][0] == "P":
-                        each_node_attributes["has_properties"].append(node_value)
-
-        else:
-            raise KGTKException("Unkonwn input format {}".format(input_format))
-
-        self._logger.info("Totally {} Q nodes loaded.".format(len(self.candidates)))
-        self.vector_dump_file = "dump_vectors_{}_{}.pkl".format(file_path[:file_path.rfind(".")], self.model_name)
-        # self._logger.debug("The cache file name will be {}".format(self.vector_dump_file))
-
-    def get_real_label_name(self, node):
-        if node in self.property_labels_dict:
-            return self.property_labels_dict[node]
-        else:
-            return node
-
-    def attribute_to_sentence(self, v, node_id=None):
-        concated_sentence = ""
-        have_isa_properties = False
-        # sort the properties to ensure the sentence always same
-        v = {key: sorted(list(value)) for key, value in v.items() if len(value) > 0}
-        if "label_properties" in v and len(v["label_properties"]) > 0:
-            concated_sentence += self.get_real_label_name(v["label_properties"][0])
-        if "description_properties" in v and len(v["description_properties"]) > 0:
-            if concated_sentence != "" and v["description_properties"][0] != "":
-                concated_sentence += ", "
-            concated_sentence += self.get_real_label_name(v["description_properties"][0])
-        if "isa_properties" in v and len(v["isa_properties"]) > 0:
-            have_isa_properties = True
-            temp = ""
-            for each in v["isa_properties"]:
-                each = self.get_real_label_name(each)
-                if "||" in each:
-                    if "instance of" in each:
-                        each = each.split("||")[1]
-                    else:
-                        each = each.replace("||", " ")
-                temp += each + ", "
-            if concated_sentence != "" and temp != "":
-                concated_sentence += " is a "
-            elif concated_sentence == "":
-                concated_sentence += "It is a "
-            concated_sentence += temp[:-2]
-        if "has_properties" in v and len(v["has_properties"]) > 0:
-            temp = [self.get_real_label_name(each) for each in v["has_properties"]]
-            if concated_sentence != "" and temp[0] != "":
-                if have_isa_properties:
-                    concated_sentence += ", and has "
-                else:
-                    concated_sentence += " has "
-            elif temp[0] != "":
-                concated_sentence += "It has "
-            concated_sentence += " and ".join(temp)
-        self._logger.debug("Transform node {} --> {}".format(node_id, concated_sentence))
-        return concated_sentence
-
-    def get_vetors(self):
-        """
-            main function to get the vector representations of the descriptions
-        """
-        import os
-        import time
-        from tqdm import tqdm  # type: ignore
-
-        start_all = time.time()
-        self._logger.info("Now generating embedding vector.")
-        for q_node, each_item in tqdm(self.candidates.items()):
-            # do process for each row(one target)
-            sentence = each_item["sentence"]
-            if isinstance(sentence, bytes):
-                sentence = sentence.decode("utf-8")
-            vectors = self.get_sentences_embedding([sentence], [q_node])
-            self.vectors_map[q_node] = vectors[0]
-        self._logger.info("Totally used {} seconds.".format(str(time.time() - start_all)))
-
-    def dump_vectors(self, file_name, type_=None):
-        import pickle
-        if file_name.endswith(".pkl"):
-            file_name = file_name.replace(".pkl", "")
-        if type_ == "2D":
-            with open(file_name + ".pkl", "wb") as f:
-                pickle.dump(self.vectors_2D, f)
-            dimension = len(self.vectors_2D[0])
-            with open(file_name + ".tsv", "w") as f:
-                for each in self.vectors_2D:
-                    for i, each_val in enumerate(each):
-                        _ = f.write(str(each_val))
-                        if i != dimension - 1:
-                            _ = f.write("\t")
-                    _ = f.write("\n")
-        elif type_ == "metadata":
-            with open(file_name + "_metadata.tsv", "w") as f:
-                for each in self.metadata:
-                    _ = f.write(each + "\n")
-        else:
-            with open(file_name + ".pkl", "wb") as f:
-                pickle.dump(self.vectors_map, f)
-            with open(file_name + ".tsv", "w") as f:
-                for each in self.vectors_map.values():
-                    for i in each:
-                        _ = f.write(str(i) + "\t")
-                    _ = f.write("\n")
-
-    def print_vector(self, vectors, output_properties: str = "text_embedding", output_format="kgtk_format"):
-        if output_format == "kgtk_format":
-            print("node\tproperty\tvalue\n", end="")
-            if self.input_format == "kgtk_format":
-                for i, each_vector in enumerate(vectors):
-                    print(str(list(self.candidates.keys())[i]) + "\t", end="")
-                    print(output_properties + "\t", end="")
-                    for j, each_dimension in enumerate(each_vector):
-                        if j != len(each_vector) - 1:
-                            print(str(each_dimension) + ",", end="")
-                        else:
-                            print(str(each_dimension) + "\n", end="")
-            elif self.input_format == "test_format":
-                all_nodes = list(self.vectors_map.keys())
-                for i, each_vector in enumerate(vectors):
-                    print(all_nodes[i] + "\t", end="")
-                    print(output_properties + "\t", end="")
-                    for j, each_dimension in enumerate(each_vector):
-                        if j != len(each_vector) - 1:
-                            print(str(each_dimension) + ",", end="")
-                        else:
-                            print(str(each_dimension) + "\n", end="")
-
-        elif output_format == "tsv_format":
-            for each_vector in vectors:
-                for i, each_dimension in enumerate(each_vector):
-                    if i != len(each_vector) - 1:
-                        print(str(each_dimension) + "\t", end="")
-                    else:
-                        print(str(each_dimension) + "\n", end="")
-
-    def plot_result(self, output_properties={}, input_format="kgtk_format",
-                    output_uri: str = "", output_format="kgtk_format",
-                    run_TSNE=True
-                    ):
-        """
-            transfer the vectors to lower dimension so that we can plot
-            Then save the 2D vector file for further purpose
-        """
-        import os
-        import time
-        from sklearn.manifold import TSNE  # type: ignore
-
-        self.vectors_map = {k: v for k, v in sorted(self.vectors_map.items(), key=lambda item: item[0], reverse=True)}
-        vectors = list(self.vectors_map.values())
-        # use tsne to reduce dimension
-        if run_TSNE:
-            self._logger.warning("Start running TSNE to reduce dimension. It will take a long time.")
-            start = time.time()
-            self.vectors_2D = TSNE(n_components=2, random_state=0).fit_transform(vectors)
-            self._logger.info("Totally used {} seconds.".format(time.time() - start))
-
-        if input_format == "test_format":
-            gt_indexes = set()
-            vector_map_keys = list(self.vectors_map.keys())
-            for each_node in self.gt_nodes:
-                gt_indexes.add(vector_map_keys.index(each_node))
-
-            self.metadata.append("Q_nodes\tType\tLabel\tDescription")
-            for i, each in enumerate(self.vectors_map.keys()):
-                label = self.q_node_to_label[each]
-                description = self.candidates[each]["sentence"]
-                if i in gt_indexes:
-                    self.metadata.append("{}\tground_truth_node\t{}\t{}".format(each, label, description))
-                else:
-                    self.metadata.append("{}\tcandidates\t{}\t{}".format(each, label, description))
-            self.gt_indexes = gt_indexes
-
-        elif input_format == "kgtk_format":
-            if len(output_properties.get("metatada_properties", [])) == 0:
-                for k, v in self.candidates.items():
-                    label = v.get("label_properties", "")
-                    if len(label) > 0 and isinstance(label, list):
-                        label = label[0]
-                    description = v.get("description_properties", "")
-                    if len(description) > 0 and isinstance(description, list):
-                        description = description[0]
-                    self.metadata.append("{}\t\t{}\t{}".format(k, label, description))
-            else:
-                required_properties = output_properties["metatada_properties"]
-                self.metadata.append("node\t" + "\t".join(required_properties))
-                for k, v in self.candidates.items():
-                    each_metadata = k + "\t"
-                    for each in required_properties:
-                        each_metadata += v.get(each, " ") + "\t"
-                    self.metadata.append(each_metadata)
-
-        metadata_output_path = os.path.join(output_uri, self.vector_dump_file.split("/")[-1])
-        if run_TSNE:
-            self.print_vector(self.vectors_2D, output_properties.get("output_properties"), output_format)
-        else:
-            self.print_vector(vectors, output_properties.get("output_properties"), output_format)
-        if output_uri != "none":
-            self.dump_vectors(metadata_output_path, "metadata")
-
-    def evaluate_result(self):
-        """
-            for the ground truth nodes, evaluate the average distance to the centroid, the lower the average distance, the better clustering results should be
-        """
-        import numpy as np
-        centroid = None
-        gt_nodes_vectors = []
-        if len(self.gt_indexes) == 0:
-            points = set(range(len(self.vectors_map)))
-        else:
-            points = self.gt_indexes
-        for i, each in enumerate(self.vectors_map.keys()):
-            if i in points:
-                if centroid is None:
-                    centroid = np.array(self.vectors_map[each])
-                else:
-                    centroid += np.array(self.vectors_map[each])
-                gt_nodes_vectors.append(self.vectors_map[each])
-        centroid = centroid / len(points)
-
-        distance_sum = 0
-        for each in gt_nodes_vectors:
-            distance_sum += self.calculate_distance(each, centroid)
-        self._logger.info("The average distance for the ground truth nodes to centroid is {}".format(distance_sum / len(points)))
-
-    @staticmethod
-    def calculate_distance(a, b):
-        if len(a) != len(b):
-            raise KGTKException("Vector dimension are different!")
-        dist = 0
-        for v1, v2 in zip(a, b):
-            dist += (v1 - v2) ** 2
-        dist = dist ** 0.5
-        return dist
-
-
 def load_property_labels_file(input_files: typing.List[str]):
     labels_dict = {}
     headers = None
@@ -718,11 +103,25 @@ def load_black_list_files(file_path):
 
 def main(**kwargs):
     from kgtk.exceptions import KGTKException
+    import logging
+    import os
+    from time import strftime
+    do_logging = kwargs.get("_debug", False)
+    if do_logging:
+        logging_level_class = logging.DEBUG
+        logger_path = os.path.join(os.environ.get("HOME"),
+                                   "kgtk_text_embedding_log_{}.log".format(strftime("%Y-%m-%d-%H-%M")))
+        logging.basicConfig(level=logging_level_class,
+                            format="%(asctime)s [%(levelname)s] %(name)s %(lineno)d -- %(message)s",
+                            datefmt='%m-%d %H:%M:%S',
+                            filename=logger_path,
+                            filemode='w')
+
+    _logger = logging.getLogger(__name__)
+    _logger.warning("Running with logging level {}".format(_logger.getEffectiveLevel()))
+
     try:
-        import logging
-        import os
         import time
-        from time import strftime
         import torch
         import typing
         import pandas as pd
@@ -731,29 +130,17 @@ def main(**kwargs):
         import re
         import argparse
         import pickle
-
-        do_logging = kwargs.get("_debug", False)
-        if do_logging:
-            logging_level_class = logging.DEBUG
-            logger_path = os.path.join(os.environ.get("HOME"),
-                                       "kgtk_text_embedding_log_{}.log".format(strftime("%Y-%m-%d-%H-%M")))
-            logging.basicConfig(level=logging_level_class,
-                                format="%(asctime)s [%(levelname)s] %(name)s %(lineno)d -- %(message)s",
-                                datefmt='%m-%d %H:%M:%S',
-                                filename=logger_path,
-                                filemode='w')
-
-        _logger = logging.getLogger(__name__)
-        _logger.warning("Running with logging level {}".format(_logger.getEffectiveLevel()))
+        from kgtk.gt.embedding_utils import EmbeddingVector
 
         # get input parameters from kwargs
         output_uri = kwargs.get("output_uri", "")
+        parallel_count = kwargs.get("parallel_count", "1")
         black_list_files = kwargs.get("black_list_files", "")
         all_models_names = kwargs.get("all_models_names", ['bert-base-wikipedia-sections-mean-tokens'])
         input_format = kwargs.get("input_format", "kgtk_format")
         input_uris = kwargs.get("input_uris", [])
         output_format = kwargs.get("output_format", "kgtk_format")
-        property_labels_files = kwargs.get("property_labels_file_uri", "")
+        property_labels_files = kwargs.get("property_labels_file_uri", [])
         query_server = kwargs.get("query_server")
         properties = dict()
         all_property_relate_inputs = [kwargs.get("label_properties", ["label"]),
@@ -802,11 +189,12 @@ def main(**kwargs):
         for each_model_name in all_models_names:
             for each_input_file in input_uris:
                 _logger.info("Running {} model on {}".format(each_model_name, each_input_file))
-                process = EmbeddingVector(each_model_name, query_server=query_server, cache_config=cache_config)
+                process = EmbeddingVector(each_model_name, query_server=query_server, cache_config=cache_config,
+                                          parallel_count=parallel_count)
                 process.read_input(file_path=each_input_file, skip_nodes_set=black_list_set,
                                    input_format=input_format, target_properties=properties,
                                    property_labels_dict=property_labels_dict)
-                process.get_vetors()
+                process.get_vectors()
                 process.plot_result(output_properties=output_properties,
                                     input_format=input_format, output_uri=output_uri,
                                     run_TSNE=run_TSNE, output_format=output_format)
@@ -866,23 +254,28 @@ def str2bool(v):
                         This argument is only valid for input in kgtk format.""")
     parser.add_argument('--isa-properties', action='store', nargs='+',
                         dest='isa_properties', default=["P31"],
-                        help="""The names of the eges for `isa` properties, Default is ["P31"] (the `instance of` node in wikidata).\n 
-                        This argument is only valid for input in kgtk format.""")
+                        help="""The names of the eges for `isa` properties, Default is ["P31"] (the `instance of` node in 
+                        wikidata).\n This argument is only valid for input in kgtk format.""")
     parser.add_argument('--has-properties', action='store', nargs='+',
                         dest='has_properties', default=["all"],
-                        help="""The names of the eges for `has` properties, Default is ["all"] (will automatically append all properties found for each node).\n This argument is only valid for input in kgtk format.""")
+                        help="""The names of the eges for `has` properties, Default is ["all"] (will automatically append all 
+                        properties found for each node).\n This argument is only valid for input in kgtk format.""")
     parser.add_argument('--output-property', action='store',
                         dest='output_properties', default="text_embedding",
-                        help="""The output property name used to record the embedding. Default is `output_properties`. \nThis argument is only valid for output in kgtk format.""")
+                        help="""The output property name used to record the embedding. Default is `output_properties`. \nThis 
+                        argument is only valid for output in kgtk format.""")
     # output
     parser.add_argument('-o', '--embedding-projector-metadata-path', action='store', dest='output_uri', default="",
                         help="output path for the metadata file, default will be current user's home directory")
     parser.add_argument('--output-format', action='store', dest='output_format',
                         default="kgtk", choices=("tsv_format", "kgtk_format"),
-                        help="output format, can either be `tsv_format` or `kgtk_format`. \nIf choose `tsv_format`, the output will be a tsv file, with each row contains only the vector representation of a node. Each dimension is separated by a tab")
+                        help="output format, can either be `tsv_format` or `kgtk_format`. \nIf choose `tsv_format`, the output "
+                             "will be a tsv file, with each row contains only the vector representation of a node. Each "
+                             "dimension is separated by a tab")
     parser.add_argument('--embedding-projector-metatada', action='store', nargs='+',
                         dest='metatada_properties', default=[],
-                        help="""list of properties used to construct a metadata file for use in the Google Embedding Projector: http://projector.tensorflow.org. \n Default: the label and description of each node.""")
+                        help="""list of properties used to construct a metadata file for use in the Google Embedding Projector: 
+                        http://projector.tensorflow.org. \n Default: the label and description of each node.""")
     # black list file
     parser.add_argument('-b', '--black-list', nargs='+', action='store', dest='black_list_files',
                         default="",
@@ -891,6 +284,10 @@ def str2bool(v):
     parser.add_argument("--run-TSNE", type=str2bool, nargs='?', action='store',
                         default=True, dest="run_TSNE",
                         help="whether to run TSNE or not after the embedding, default is true.")
+
+    parser.add_argument("--parallel", nargs='?', action='store',
+                        default="1", dest="parallel_count",
+                        help="How many processes to be run in same time, default is 1.")
     # cache config
     parser.add_argument("--use-cache", type=str2bool, nargs='?', action='store',
                         default=False, dest="use_cache",
@@ -906,7 +303,8 @@ def str2bool(v):
     # query server
     parser.add_argument("--query-server", nargs='?', action='store',
                         default="", dest="query_server",
-                        help="sparql query endpoint used for test_format input files, default is https://query.wikidata.org/sparql"
+                        help="sparql query endpoint used for test_format input files, default is "
+                             "https://query.wikidata.org/sparql "
                         )
 
 
diff --git a/kgtk/gt/embedding_utils.py b/kgtk/gt/embedding_utils.py
new file mode 100644
index 000000000..f4fb45f5f
--- /dev/null
+++ b/kgtk/gt/embedding_utils.py
@@ -0,0 +1,676 @@
+import logging
+import re
+import redis
+import typing
+import hashlib
+import pandas as pd  # type: ignore
+import numpy as np
+import math
+import pickle
+import os
+import time
+
+from pyrallel import ParallelProcessor
+from sklearn.manifold import TSNE  # type: ignore
+from tqdm import tqdm  # type: ignore
+from ast import literal_eval
+from sentence_transformers import SentenceTransformer, SentencesDataset, LoggingHandler, losses, models  # type: ignore
+from collections import defaultdict
+from SPARQLWrapper import SPARQLWrapper, JSON, POST, URLENCODED  # type: ignore
+from kgtk.exceptions import KGTKException
+
+
+class EmbeddingVector:
+    def __init__(self, model_name=None, query_server=None, cache_config: dict = None, parallel_count=1):
+        self._logger = logging.getLogger(__name__)
+        if model_name is None:
+            self.model_name = 'bert-base-nli-mean-tokens'
+        # xlnet need to be trained before using, we can't use this for now
+        # elif model_name == "xlnet-base-cased":
+        #     word_embedding_model = models.XLNet('xlnet-base-cased')
+        # # Apply mean pooling to get one fixed sized sentence vector
+        #     pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
+        #                                pooling_mode_mean_tokens=True,
+        #                                pooling_mode_cls_token=False,
+        #                                pooling_mode_max_tokens=False)
+        #     self.model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
+        else:
+            self.model_name = model_name
+        self._logger.info("Using model {}".format(self.model_name))
+        self.model = SentenceTransformer(self.model_name)
+        # setup redis cache server
+        if query_server is None or query_server == "":
+            self.wikidata_server = "https://query.wikidata.org/sparql"
+        else:
+            self.wikidata_server = query_server
+        if cache_config and cache_config.get("use_cache", False):
+            host = cache_config.get("host", "dsbox01.isi.edu")
+            port = cache_config.get("port", 6379)
+            self.redis_server = redis.Redis(host=host, port=port, db=0)
+            try:
+                _ = self.redis_server.get("foo")
+                self._logger.debug("Cache server {}:{} connected!".format(host, port))
+            except:
+                self._logger.error("Cache server {}:{} is not able to be connected! Will not use cache!".format(host, port))
+                self.redis_server = None
+        else:
+            self.redis_server = None
+        self._parallel_count = int(parallel_count)
+        self._logger.debug("Running with {} processes.".format(parallel_count))
+        self.qnodes_descriptions = dict()
+        self.vectors_map = dict()
+        self.property_labels_dict = dict()
+        self.q_node_to_label = dict()
+        self.node_labels = dict()
+        self.vectors_2D = None
+        self.vector_dump_file = None
+        self.gt_nodes = set()
+        self.candidates = defaultdict(dict)
+        self.metadata = []
+        self.gt_indexes = set()
+        self.input_format = ""
+        self.token_pattern = re.compile(r"(?u)\b\w\w+\b")
+
+    def get_sentences_embedding(self, sentences: typing.List[str], qnodes: typing.List[str]):
+        """
+            transform a list of sentences to embedding vectors
+        """
+
+        if self.redis_server is not None:
+            sentence_embeddings = []
+            for each_node, each_sentence in zip(qnodes, sentences):
+                query_cache_key = each_node + each_sentence
+                if self.model_name != "bert-base-wikipedia-sections-mean-tokens":
+                    query_cache_key += self.model_name
+                cache_res = self.redis_server.get(query_cache_key)
+                if cache_res is not None:
+                    sentence_embeddings.append(literal_eval(cache_res.decode("utf-8")))
+                    # self._logger.error("{} hit!".format(each_node+each_sentence))
+                else:
+                    each_embedding = self.model.encode([each_sentence], show_progress_bar=False)
+                    sentence_embeddings.extend(each_embedding)
+                    self.redis_server.set(query_cache_key, str(each_embedding[0].tolist()))
+        else:
+            sentence_embeddings = self.model.encode(sentences, show_progress_bar=False)
+        return sentence_embeddings
+
+    def send_sparql_query(self, query_body: str):
+        """
+            a simple wrap to send the query and return the returned results
+        """
+        qm = SPARQLWrapper(self.wikidata_server)
+        qm.setReturnFormat(JSON)
+        qm.setMethod(POST)
+        qm.setRequestMethod(URLENCODED)
+        self._logger.debug("Sent query is:")
+        self._logger.debug(str(query_body))
+        qm.setQuery(query_body)
+        try:
+            results = qm.query().convert()['results']['bindings']
+            return results
+        except Exception as e:
+            error_message = ("Sending Sparql query to {} failed!".format(self.wikidata_server))
+            self._logger.error(error_message)
+            self._logger.debug(e, exc_info=True)
+            raise KGTKException(error_message)
+
+    def _get_labels(self, nodes: typing.List[str]):
+        query_nodes = " ".join(["wd:{}".format(each) for each in nodes])
+        query = """
+        select ?item ?nodeLabel
+        where { 
+          values ?item {""" + query_nodes + """}
+          ?item rdfs:label ?nodeLabel.
+          FILTER(LANG(?nodeLabel) = "en").
+        }
+        """
+        results2 = self.send_sparql_query(query)
+        for each_res in results2:
+            node_id = each_res['item']['value'].split("/")[-1]
+            value = each_res['nodeLabel']['value']
+            self.node_labels[node_id] = value
+
+    def _get_labels_and_descriptions(self, query_qnodes: str, need_find_label: bool, need_find_description: bool):
+        query_body = """
+            select ?item ?itemDescription ?itemLabel
+            where {
+              values ?item {""" + query_qnodes + """ }
+                 SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
+            }
+        """
+        results = self.send_sparql_query(query_body)
+        for each in results:
+            each_node = each['item']['value'].split("/")[-1]
+            if 'itemDescription' in each:
+                description = each['itemDescription']['value']
+            else:
+                description = ""
+            if "itemLabel" in each:
+                label = each['itemLabel']['value']
+            else:
+                label = ""
+            if need_find_label:
+                self.candidates[each_node]["label_properties"] = [label]
+            if need_find_description:
+                self.candidates[each_node]["description_properties"] = [description]
+
+    def _get_property_values(self, query_qnodes, query_part_names, query_part_properties):
+        used_p_node_ids = set()
+        for part_name, part in zip(query_part_names, query_part_properties):
+            if part_name == "isa_properties":
+                self._get_labels(part)
+            for i, each in enumerate(part):
+                if each not in {"label", "description", "all"}:
+                    query_body2 = """
+                    select ?item ?eachPropertyLabel
+                    where {{
+                      values ?item {{{all_nodes}}}
+                    ?item wdt:{qnode} ?eachProperty.
+                      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
+                    }}
+                    """.format(all_nodes=query_qnodes, qnode=each)
+                    results2 = self.send_sparql_query(query_body2)
+
+                    for each_res in results2:
+                        node_id = each_res['item']['value'].split("/")[-1]
+                        value = each_res['eachPropertyLabel']['value']
+                        if part_name == "isa_properties" and self.node_labels[each].endswith("of"):
+                            value = self.node_labels[each] + "||" + value
+                        used_p_node_ids.add(node_id)
+                        if part_name in self.candidates[node_id]:
+                            self.candidates[node_id][part_name].add(value)
+                        else:
+                            self.candidates[node_id][part_name] = {value}
+        return used_p_node_ids
+
+    def _get_all_properties(self, query_qnodes, used_p_node_ids, properties_list):
+        has_properties_set = set(properties_list[3])
+        query_body3 = """
+                            select DISTINCT ?item ?p_entity ?p_entityLabel
+                            where {
+                              values ?item {""" + query_qnodes + """}
+                              ?item ?p ?o.
+                              FILTER regex(str(?p), "^http://www.wikidata.org/prop/P", "i")
+                              BIND (IRI(REPLACE(STR(?p), "http://www.wikidata.org/prop", "http://www.wikidata.org/entity")) AS ?p_entity) .
+                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
+                            }
+                        """
+        results3 = self.send_sparql_query(query_body3)
+        for each in results3:
+            node_name = each['item']['value'].split("/")[-1]
+            p_node_id = each['p_entity']['value'].split("/")[-1]
+            p_node_label = each['p_entityLabel']['value']
+            if p_node_id not in used_p_node_ids:
+                if properties_list[3] == ["all"] or p_node_id in has_properties_set:
+                    if "has_properties" in self.candidates[node_name]:
+                        self.candidates[node_name]["has_properties"].add(p_node_label)
+                    else:
+                        self.candidates[node_name]["has_properties"] = {p_node_label}
+
+    def get_item_description(self, qnodes: typing.List[str] = None, target_properties: dict = {}):
+        """
+            use sparql query to get the descriptions of given Q nodes
+        """
+        if qnodes is None:
+            qnodes = self.candidates
+        if "all" in target_properties:
+            find_all_properties = True
+        else:
+            find_all_properties = False
+        properties_list = [[] for _ in range(4)]
+        names = ["labels", "descriptions", "isa_properties", "has_properties"]
+        for k, v in target_properties.items():
+            if v == "label_properties":
+                properties_list[0].append(k)
+            elif v == "description_properties":
+                properties_list[1].append(k)
+            elif v == "isa_properties":
+                properties_list[2].append(k)
+            elif v == "has_properties":
+                properties_list[3].append(k)
+
+        hash_generator = hashlib.md5()
+        hash_generator.update(str(properties_list).encode('utf-8'))
+        properties_list_hash = "||" + str(hash_generator.hexdigest())
+
+        sentences_cache_dict = {}
+        if self.redis_server is not None:
+            for each_node in qnodes:
+                cache_key = each_node + properties_list_hash
+                cache_res = self.redis_server.get(cache_key)
+                self._logger.debug("Cached key is: {}".format(cache_key))
+                if cache_res is not None:
+                    self._logger.debug("Cache hitted {}".format(cache_key))
+                    sentences_cache_dict[each_node] = cache_res.decode("utf-8")
+
+        self._logger.debug("Cached for those nodes {} / {}".format(len(sentences_cache_dict), len(qnodes)))
+        self._logger.debug(str(set(sentences_cache_dict.keys())))
+        self._logger.debug("Need run query for those nodes {} / {}:".format(len(qnodes) - len(sentences_cache_dict), len(qnodes)))
+
+        # we do not need to get those node again
+        if len(sentences_cache_dict) > 0:
+            qnodes = set(qnodes) - set(sentences_cache_dict.keys())
+        self._logger.debug(str(qnodes))
+
+        # only need to do query when we still have remained nodes
+        if len(qnodes) > 0:
+            need_find_label = "label" in properties_list[0]
+            need_find_description = "description" in properties_list[1]
+            query_qnodes = ""
+            for each in qnodes:
+                query_qnodes += "wd:{} ".format(each)
+
+            # this is used to get corresponding labels / descriptions
+            if need_find_label or need_find_description:
+                self._get_labels_and_descriptions(query_qnodes, need_find_label, need_find_description)
+
+            if len(properties_list[3]) > len(qnodes):
+                # in this condition, we have too many properties need to be queried, it will waste time
+                # query to get all properties then filtering would save more times
+                find_all_properties = True
+                query_part2_names = names[:3]
+                query_part2_properties = properties_list[:3]
+            else:
+                query_part2_names = names
+                query_part2_properties = properties_list
+            # this is used to get corresponding labels of properties values
+            used_p_node_ids = self._get_property_values(query_qnodes, query_part2_names, query_part2_properties)
+
+            # if need get all properties, we need to run extra query
+            if find_all_properties:
+                self._get_all_properties(query_qnodes, used_p_node_ids, properties_list)
+
+        for each_node_id in qnodes:
+            each_sentence = self.attribute_to_sentence(self.candidates[each_node_id], each_node_id)
+            self.candidates[each_node_id]["sentence"] = each_sentence
+            # add to cache
+            if self.redis_server is not None:
+                response = self.redis_server.set(each_node_id + properties_list_hash, each_sentence)
+                if response:
+                    self._logger.debug("Pushed cache for {} success.".format(each_node_id + properties_list_hash))
+
+        for each_node_id, sentence in sentences_cache_dict.items():
+            self.candidates[each_node_id]["sentence"] = sentence
+
+    def _process_one(self, args):
+        """
+        one process for multiprocess calling
+        :param args:
+        :return:
+        """
+        node_id = args["node_id"]
+        each_node_attributes = args["attribute"]
+        concated_sentence = self.attribute_to_sentence(each_node_attributes, node_id)
+        vectors = self.get_sentences_embedding([concated_sentence], [node_id])[0]
+        return {"v_" + node_id: vectors, "c_" + node_id: each_node_attributes}
+
+    def _multiprocess_collector(self, data):
+        for k, v in data.items():
+            if k.startswith("v_"):
+                k = k.replace("v_", "")
+                self.vectors_map[k] = v
+            else:
+                k = k.replace("c_", "")
+                self.candidates[k] = v
+
+    def read_input(self, file_path: str, skip_nodes_set: set = None,
+                   input_format: str = "kgtk_format", target_properties: dict = {},
+                   property_labels_dict: dict = {}, black_list_set: set = set()
+                   ):
+        """
+            load the input candidates files
+        """
+        self.property_labels_dict = property_labels_dict
+
+        if input_format == "test_format":
+            self.input_format = input_format
+            input_df = pd.read_csv(file_path)
+            gt = {}
+            count = 0
+            if "GT_kg_id" in input_df.columns:
+                gt_column_id = "GT_kg_id"
+            elif "kg_id" in input_df.columns:
+                gt_column_id = "kg_id"
+            else:
+                raise KGTKException("Can't find ground truth id column! It should either named as `GT_kg_id` or `kg_id`")
+
+            for _, each in input_df.iterrows():
+                if isinstance(each["candidates"], str):
+                    temp = str(each['candidates']).split("|")
+                elif each['candidates'] is np.nan or math.isnan(each['candidates']):
+                    temp = []
+
+                to_remove_q = set()
+                if each[gt_column_id] is np.nan:
+                    self._logger.warning("Ignore NaN gt value form {}".format(str(each)))
+                    each[gt_column_id] = ""
+                gt_nodes = each[gt_column_id].split(" ")
+                label = str(each["label"])
+                if len(gt_nodes) == 0:
+                    self._logger.error("Skip a row with no ground truth node given: as {}".format(str(each)))
+                    continue
+                if label == "":
+                    self._logger.error("Skip a row with no label given: as {}".format(str(each)))
+                    continue
+                temp.extend(gt_nodes)
+
+                for each_q in temp:
+                    self.q_node_to_label[each_q] = label
+                    if skip_nodes_set is not None and each_q in skip_nodes_set:
+                        to_remove_q.add(each_q)
+                temp = set(temp) - to_remove_q
+                count += len(temp)
+                self.gt_nodes.add(each[gt_column_id])
+                self.get_item_description(temp, target_properties)
+
+            self._logger.info("Totally {} rows with {} candidates loaded.".format(str(len(gt)), str(count)))
+
+        elif input_format == "kgtk_format":
+            # assume the input edge file is sorted
+            if "all" in target_properties:
+                _ = target_properties.pop("all")
+                add_all_properties = True
+            else:
+                add_all_properties = False
+
+            self.input_format = input_format
+            with open(file_path, "r") as f:
+                # get header
+                headers = f.readline().replace("\n", "").split("\t")
+                if len(headers) < 3:
+                    raise KGTKException(
+                        "No enough columns found on given input file. Only {} columns given but at least 3 needed.".format(
+                            len(headers)))
+                elif "node" in headers and "property" in headers and "value" in headers:
+                    column_references = {"node": headers.index("node"),
+                                         "property": headers.index("property"),
+                                         "value": headers.index("value")}
+                elif len(headers) == 3:
+                    column_references = {"node": 0,
+                                         "property": 1,
+                                         "value": 2}
+                else:
+                    missing_column = {"node", "property", "value"} - set(headers)
+                    raise KGTKException("Missing column {}".format(missing_column))
+                self._logger.debug("column index information: ")
+                self._logger.debug(str(column_references))
+                # read contents
+                each_node_attributes = {"has_properties": [], "isa_properties": [], "label_properties": [],
+                                        "description_properties": []}
+                current_process_node_id = None
+
+                if self._parallel_count > 1:
+                    pp = ParallelProcessor(self._parallel_count, self._process_one, collector=self._multiprocess_collector)
+                    pp.start()
+
+                for each_line in f:
+                    each_line = each_line.replace("\n", "").split("\t")
+                    node_id = each_line[column_references["node"]]
+                    node_property = each_line[column_references["property"]]
+                    node_value = each_line[column_references["value"]]
+                    # remove @ mark
+                    if "@" in node_value and node_value[0] != "@":
+                        node_value_org = node_value
+                        node_value = node_value[:node_value.index("@")]
+
+                    # remove extra double quote " and single quote '
+                    if node_value[0] == '"' and node_value[-1] == '"':
+                        node_value = node_value[1:-1]
+                    if node_value[0] == "'" and node_value[-1] == "'":
+                        node_value = node_value[1:-1]
+
+                    if current_process_node_id != node_id:
+                        if current_process_node_id is None:
+                            current_process_node_id = node_id
+                        else:
+                            # if we get to next id, concate all properties into one sentence to represent the Q node
+
+                            # for multi process
+                            if self._parallel_count > 1:
+                                each_arg = {"node_id": current_process_node_id, "attribute": each_node_attributes}
+                                pp.add_task(each_arg)
+                            # for single process
+                            else:
+                                concated_sentence = self.attribute_to_sentence(each_node_attributes, current_process_node_id)
+                                each_node_attributes["sentence"] = concated_sentence
+                                self.candidates[current_process_node_id] = each_node_attributes
+
+                            # after write down finish, we can cleaer and start parsing next one
+                            each_node_attributes = {"has_properties": [], "isa_properties": [], "label_properties": [],
+                                                    "description_properties": []}
+                            # update to new id
+                            current_process_node_id = node_id
+
+                    if node_property in target_properties:
+                        each_node_attributes[target_properties[node_property]].append(node_value)
+                    if add_all_properties and each_line[column_references["value"]][0] == "P":
+                        each_node_attributes["has_properties"].append(node_value)
+
+                # close multiprocess pool
+                if self._parallel_count > 1:
+                    pp.task_done()
+                    pp.join()
+        else:
+            raise KGTKException("Unkonwn input format {}".format(input_format))
+
+        self._logger.info("Totally {} Q nodes loaded.".format(len(self.candidates)))
+        self.vector_dump_file = "dump_vectors_{}_{}.pkl".format(file_path[:file_path.rfind(".")], self.model_name)
+        # self._logger.debug("The cache file name will be {}".format(self.vector_dump_file))
+
+    def get_real_label_name(self, node):
+        if node in self.property_labels_dict:
+            return self.property_labels_dict[node]
+        else:
+            return node
+
+    def attribute_to_sentence(self, attribute_dict: dict, node_id=None):
+        concated_sentence = ""
+        have_isa_properties = False
+        # sort the properties to ensure the sentence always same
+        attribute_dict = {key: sorted(list(value)) for key, value in attribute_dict.items() if len(value) > 0}
+        if "label_properties" in attribute_dict and len(attribute_dict["label_properties"]) > 0:
+            concated_sentence += self.get_real_label_name(attribute_dict["label_properties"][0])
+        if "description_properties" in attribute_dict and len(attribute_dict["description_properties"]) > 0:
+            if concated_sentence != "" and attribute_dict["description_properties"][0] != "":
+                concated_sentence += ", "
+            concated_sentence += self.get_real_label_name(attribute_dict["description_properties"][0])
+        if "isa_properties" in attribute_dict and len(attribute_dict["isa_properties"]) > 0:
+            have_isa_properties = True
+            temp = ""
+            for each in attribute_dict["isa_properties"]:
+                each = self.get_real_label_name(each)
+                if "||" in each:
+                    if "instance of" in each:
+                        each = each.split("||")[1]
+                    else:
+                        each = each.replace("||", " ")
+                temp += each + ", "
+            if concated_sentence != "" and temp != "":
+                concated_sentence += " is a "
+            elif concated_sentence == "":
+                concated_sentence += "It is a "
+            concated_sentence += temp[:-2]
+        if "has_properties" in attribute_dict and len(attribute_dict["has_properties"]) > 0:
+            temp = [self.get_real_label_name(each) for each in attribute_dict["has_properties"]]
+            if concated_sentence != "" and temp[0] != "":
+                if have_isa_properties:
+                    concated_sentence += ", and has "
+                else:
+                    concated_sentence += " has "
+            elif temp[0] != "":
+                concated_sentence += "It has "
+            concated_sentence += " and ".join(temp)
+        self._logger.debug("Transform node {} --> {}".format(node_id, concated_sentence))
+        return concated_sentence
+
+    def get_vectors(self):
+        """
+            main function to get the vector representations of the descriptions
+        """
+        if self._parallel_count == 1:
+            start_all = time.time()
+            self._logger.info("Now generating embedding vector.")
+            for q_node, each_item in tqdm(self.candidates.items()):
+                # do process for each row(one target)
+                sentence = each_item["sentence"]
+                if isinstance(sentence, bytes):
+                    sentence = sentence.decode("utf-8")
+                vectors = self.get_sentences_embedding([sentence], [q_node])
+                self.vectors_map[q_node] = vectors[0]
+            self._logger.info("Totally used {} seconds.".format(str(time.time() - start_all)))
+        else:
+            # Skip get vector function because we already get them
+            pass
+
+    def dump_vectors(self, file_name, type_=None):
+        if file_name.endswith(".pkl"):
+            file_name = file_name.replace(".pkl", "")
+        if type_ == "2D":
+            with open(file_name + ".pkl", "wb") as f:
+                pickle.dump(self.vectors_2D, f)
+            dimension = len(self.vectors_2D[0])
+            with open(file_name + ".tsv", "w") as f:
+                for each in self.vectors_2D:
+                    for i, each_val in enumerate(each):
+                        _ = f.write(str(each_val))
+                        if i != dimension - 1:
+                            _ = f.write("\t")
+                    _ = f.write("\n")
+        elif type_ == "metadata":
+            with open(file_name + "_metadata.tsv", "w") as f:
+                for each in self.metadata:
+                    _ = f.write(each + "\n")
+        else:
+            with open(file_name + ".pkl", "wb") as f:
+                pickle.dump(self.vectors_map, f)
+            with open(file_name + ".tsv", "w") as f:
+                for each in self.vectors_map.values():
+                    for i in each:
+                        _ = f.write(str(i) + "\t")
+                    _ = f.write("\n")
+
+    def print_vector(self, vectors, output_properties: str = "text_embedding", output_format="kgtk_format"):
+        if output_format == "kgtk_format":
+            print("node\tproperty\tvalue\n", end="")
+            if self.input_format == "kgtk_format":
+                for i, each_vector in enumerate(vectors):
+                    print(str(list(self.candidates.keys())[i]) + "\t", end="")
+                    print(output_properties + "\t", end="")
+                    for j, each_dimension in enumerate(each_vector):
+                        if j != len(each_vector) - 1:
+                            print(str(each_dimension) + ",", end="")
+                        else:
+                            print(str(each_dimension) + "\n", end="")
+            elif self.input_format == "test_format":
+                all_nodes = list(self.vectors_map.keys())
+                for i, each_vector in enumerate(vectors):
+                    print(all_nodes[i] + "\t", end="")
+                    print(output_properties + "\t", end="")
+                    for j, each_dimension in enumerate(each_vector):
+                        if j != len(each_vector) - 1:
+                            print(str(each_dimension) + ",", end="")
+                        else:
+                            print(str(each_dimension) + "\n", end="")
+
+        elif output_format == "tsv_format":
+            for each_vector in vectors:
+                for i, each_dimension in enumerate(each_vector):
+                    if i != len(each_vector) - 1:
+                        print(str(each_dimension) + "\t", end="")
+                    else:
+                        print(str(each_dimension) + "\n", end="")
+
+    def plot_result(self, output_properties={}, input_format="kgtk_format",
+                    output_uri: str = "", output_format="kgtk_format",
+                    run_TSNE=True
+                    ):
+        """
+            transfer the vectors to lower dimension so that we can plot
+            Then save the 2D vector file for further purpose
+        """
+        self.vectors_map = {k: v for k, v in sorted(self.vectors_map.items(), key=lambda item: item[0], reverse=True)}
+        vectors = list(self.vectors_map.values())
+        # use TSNE to reduce dimension
+        if run_TSNE:
+            self._logger.warning("Start running TSNE to reduce dimension. It will take a long time.")
+            start = time.time()
+            self.vectors_2D = TSNE(n_components=2, random_state=0).fit_transform(vectors)
+            self._logger.info("Totally used {} seconds.".format(time.time() - start))
+
+        if input_format == "test_format":
+            gt_indexes = set()
+            vector_map_keys = list(self.vectors_map.keys())
+            for each_node in self.gt_nodes:
+                gt_indexes.add(vector_map_keys.index(each_node))
+
+            self.metadata.append("Q_nodes\tType\tLabel\tDescription")
+            for i, each in enumerate(self.vectors_map.keys()):
+                label = self.q_node_to_label[each]
+                description = self.candidates[each]["sentence"]
+                if i in gt_indexes:
+                    self.metadata.append("{}\tground_truth_node\t{}\t{}".format(each, label, description))
+                else:
+                    self.metadata.append("{}\tcandidates\t{}\t{}".format(each, label, description))
+            self.gt_indexes = gt_indexes
+
+        elif input_format == "kgtk_format":
+            if len(output_properties.get("metatada_properties", [])) == 0:
+                for k, v in self.candidates.items():
+                    label = v.get("label_properties", "")
+                    if len(label) > 0 and isinstance(label, list):
+                        label = label[0]
+                    description = v.get("description_properties", "")
+                    if len(description) > 0 and isinstance(description, list):
+                        description = description[0]
+                    self.metadata.append("{}\t\t{}\t{}".format(k, label, description))
+            else:
+                required_properties = output_properties["metatada_properties"]
+                self.metadata.append("node\t" + "\t".join(required_properties))
+                for k, v in self.candidates.items():
+                    each_metadata = k + "\t"
+                    for each in required_properties:
+                        each_metadata += v.get(each, " ") + "\t"
+                    self.metadata.append(each_metadata)
+
+        metadata_output_path = os.path.join(output_uri, self.vector_dump_file.split("/")[-1])
+        if run_TSNE:
+            self.print_vector(self.vectors_2D, output_properties.get("output_properties"), output_format)
+        else:
+            self.print_vector(vectors, output_properties.get("output_properties"), output_format)
+        if output_uri != "none":
+            self.dump_vectors(metadata_output_path, "metadata")
+
+    def evaluate_result(self):
+        """
+        for the ground truth nodes, evaluate the average distance to the centroid, the lower the average distance,
+        the better clustering results should be
+        """
+        centroid = None
+        gt_nodes_vectors = []
+        if len(self.gt_indexes) == 0:
+            points = set(range(len(self.vectors_map)))
+        else:
+            points = self.gt_indexes
+        for i, each in enumerate(self.vectors_map.keys()):
+            if i in points:
+                if centroid is None:
+                    centroid = np.array(self.vectors_map[each])
+                else:
+                    centroid += np.array(self.vectors_map[each])
+                gt_nodes_vectors.append(self.vectors_map[each])
+        centroid = centroid / len(points)
+
+        distance_sum = 0
+        for each in gt_nodes_vectors:
+            distance_sum += self.calculate_distance(each, centroid)
+        self._logger.info("The average distance for the ground truth nodes to centroid is {}".format(distance_sum / len(points)))
+
+    @staticmethod
+    def calculate_distance(a, b):
+        if len(a) != len(b):
+            raise KGTKException("Vector dimension are different!")
+        dist = 0
+        for v1, v2 in zip(a, b):
+            dist += (v1 - v2) ** 2
+        dist = dist ** 0.5
+        return dist

From 91f7b03a716a0eccc7f41bac808539e4be173c80 Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Mon, 4 May 2020 17:41:07 -0700
Subject: [PATCH 068/278] update embedding readme with parallel parameter

---
 kgtk/cli/text_embedding_README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kgtk/cli/text_embedding_README.md b/kgtk/cli/text_embedding_README.md
index d9e694a17..a05bf3ba7 100644
--- a/kgtk/cli/text_embedding_README.md
+++ b/kgtk/cli/text_embedding_README.md
@@ -23,6 +23,7 @@ kgtk text_embedding \
     --black-list/ -b <string> # optional,default is None
     --logging-level/ -l <string> \ # optional, default is `info`
     --run-TSNE False # optional, default is True
+    --parallel 4 # optional, default is 1
 ```
 ##### Example 1:
 For easiest running, just give the input file as 
@@ -147,6 +148,9 @@ First column is the node name.
 Second column is the property name as required, default is `text_embedding`.
 Third column is the embeded vecotrs.
 
+##### parallel
+You can also set up the parallel count to some number larger than 1 to run in multiprocess mode. Currently only support for kgtk format input data. For example: `--parallel 4`
+
 ##### Reduced Embedding Vectors
 This will have embedded vectors values after running TSNE and reduced dimension to 2-dimensions for each Q nodes. This is used for visulization. (for example, you can view it at Google's online tools here: http://projector.tensorflow.org/)
 3. Metadata for the generated vectors: This will contains the metadata information for the Q nodes generated from 2 files mentioned above. It will contains the Q node value of each vector, the type (it is a `candidate` or a `ground truth` node), the given label of the Q node and corresponding fetched description information from wikidata.

From 94a4c100e7ee58befad1ecc27659d5d2d51a34b3 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 4 May 2020 17:41:14 -0700
Subject: [PATCH 069/278] Process minimum and maximum year limits.

---
 kgtk/cli/validate.py          |  6 +++-
 kgtk/join/kgtkvalue.py        | 52 +++++++++++++++++++++++++++++++----
 kgtk/join/kgtkvalueoptions.py | 27 ++++++++++++++----
 3 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index a823bcb5a..074f56aed 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -153,6 +153,8 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
         allow_lax_strings: bool = False,
         allow_lax_lq_strings: bool = False,
         allow_month_or_day_zero: bool = False,
+        minimum_valid_year: int = KgtkValueOptions.MINIMUM_VALID_YEAR,
+        maximum_valid_year: int = KgtkValueOptions.MAXIMUM_VALID_YEAR,
         compression_type: typing.Optional[str] = None,
         gzip_in_parallel: bool = False,
         gzip_queue_size: int = KgtkReader.GZIP_QUEUE_SIZE_DEFAULT,
@@ -176,7 +178,9 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
                                                        allow_lax_strings=allow_lax_strings,
                                                        allow_lax_lq_strings=allow_lax_lq_strings,
                                                        allow_language_suffixes=allow_language_suffixes,
-                                                       additional_language_codes=additional_language_codes)
+                                                       additional_language_codes=additional_language_codes,
+                                                       minimum_valid_year=minimum_valid_year,
+                                                       maximum_valid_year=maximum_valid_year)
 
     try:
         kgtk_file: typing.Optional[Path]
diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 2d95bd4aa..7f86a79f7 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -448,11 +448,40 @@ def is_date_and_times(self, idx: typing.Optional[int] = None)->bool:
         v: str = self.get_item(idx)
         return v.startswith("^")
 
-    # This pattern allows month 00 and day 00, which are excluded by ISO 8601.
-    lax_date_and_times_re: typing.Pattern = re.compile(r"^\^(?P<year>[0-9]{4})(?:(?P<hyphen>-)?(?P<month>1[0-2]|0[0-9])(?:(?(hyphen)-)(?P<day>3[01]|0[0-9]|[12][0-9])))T(?P<hour>2[0-3]|[01][0-9])(?:(?(hyphen):)(?P<minute>[0-5][0-9])(?:(?(hyphen):)(?P<second>[0-5][0-9])))(?P<zone>Z|\[-+][0-9][0-9](?::[0-9][0-9])?)?(?P<precision>/[0-1]?[0-9])?$")
+    year_pat: str = r'(?P<year>[-+]?[0-9]{4})'
+
+    hour_pat: str = r'(?P<hour>2[0-3]|[01][0-9])'
+    minutes_pat: str = r'(?P<minutes>[0-5][0-9])'
+    seconds_pat: str = r'(?P<second>[0-5][0-9])'
+    zone_pat: str = r'(?P<zone>Z|\[-+][0-9][0-9](?::[0-9][0-9]))'
+    time_pat: str = r'(?:{hour}(?:(?(hyphen):){minutes}(?:(?(hyphen):){seconds})?)?{zone}?)'.format(hour=hour_pat,
+                                                                                                   minutes=minutes_pat,
+                                                                                                   seconds=seconds_pat,
+                                                                                                   zone=zone_pat)
 
-    strict_date_and_times_re: typing.Pattern = re.compile(r"^\^(?P<year>[0-9]{4})(?:(?P<hyphen>-)?(?P<month>1[0-2]|0[1-9])(?:(?(hyphen)-)(?P<day>3[01]|0[1-9]|[12][0-9])))T(?P<hour>2[0-3]|[01][0-9])(?:(?(hyphen):)(?P<minute>[0-5][0-9])(?:(?(hyphen):)(?P<second>[0-5][0-9])))(?P<zone>Z|\[-+][0-9][0-9](?::[0-9][0-9])?)?(?P<precision>/[0-1]?[0-9])?$")
+    precision_pat: str = r'(?P<precision>/[0-1]?[0-9])'
 
+    # This pattern allows month 00 and day 00, which are excluded by ISO 8601.
+    lax_month_pat: str = r'(?P<month>1[0-2]|0[0-9])'
+    lax_day_pat: str = r'(?P<day>3[01]|0[0-9]|[12][0-9])'
+    lax_date_pat: str = r'(?:{year}(?:(?P<hyphen>-)?{month}?(?:(?(hyphen)-){day})?)?)'.format(year=year_pat,
+                                                                                              month=lax_month_pat,
+                                                                                              day=lax_day_pat)
+    lax_date_and_times_pat: str = r'(?:\^{date}(?:T{time}{precision}?)?)'.format(date=lax_date_pat,
+                                                                            time=time_pat,
+                                                                            precision=precision_pat)
+    lax_date_and_times_re: typing.Pattern = re.compile(r'^{date_and_times}$'.format(date_and_times=lax_date_and_times_pat))
+                                                                        
+    strict_month_pat: str = r'(?P<month>1[0-2]|0[1-9])'
+    strict_day_pat: str = r'(?P<day>3[01]|0[1-9]|[12][0-9])'
+    strict_date_pat: str = r'(?:{year}(?:(?P<hyphen>-)?{month}?(?:(?(hyphen)-){day})?)?)'.format(year=year_pat,
+                                                                                                 month=strict_month_pat,
+                                                                                                 day=strict_day_pat)
+    strict_date_and_times_pat: str = r'(?:\^{date}(?:T{time}{precision}?)?)'.format(date=strict_date_pat,
+                                                                               time=time_pat,
+                                                                               precision=precision_pat)
+    strict_date_and_times_re: typing.Pattern = re.compile(r'^{date_and_times}$'.format(date_and_times=strict_date_and_times_pat))
+                                                                        
     def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
         """
         Return False if this value is a list and idx is None.
@@ -466,7 +495,7 @@ def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
         YYYY-MM-DD
 
         Valid date and time formats
-        YYMMDDTHH
+        YYYYMMDDTHH
         YYYY-MM-DDTHH
         YYMMDDTHHMM
         YYYY-MM-DDTHH:MM
@@ -511,7 +540,20 @@ def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
             m = KgtkValue.lax_date_and_times_re.match(v)
         else:
             m = KgtkValue.strict_date_and_times_re.match(v)
-        return m is not None
+        if m is None:
+            return False
+        year_str: str = m.group("year")
+        if year_str is None or len(year_str) == 0:
+            return False
+        try:
+            year: int = int(year_str)
+        except ValueError:
+            return False
+        if year < self.options.minimum_valid_year:
+            return False
+        if year > self.options.maximum_valid_year:
+            return False
+        return True
 
     def is_extension(self,  idx: typing.Optional[int] = None)->bool:
         """
diff --git a/kgtk/join/kgtkvalueoptions.py b/kgtk/join/kgtkvalueoptions.py
index cd07e5aa0..c6383e793 100644
--- a/kgtk/join/kgtkvalueoptions.py
+++ b/kgtk/join/kgtkvalueoptions.py
@@ -13,6 +13,10 @@ class KgtkValueOptions:
     seperate class for efficiency.
     """
     
+    # The default minimum and maximum valid year values.
+    MINIMUM_VALID_YEAR: int = 1583 # Per ISO 8601, years before this one require special agreement.
+    MAXIMUM_VALID_YEAR: int = 2100 # Arbitrarily chosen.
+
     # Allow month 00 or day 00 in dates?  This isn't really allowed by ISO
     # 8601, but appears in wikidata.
     allow_month_or_day_zero: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
@@ -31,10 +35,15 @@ class KgtkValueOptions:
 
     # If this list gets long, we may want to turn it into a map to make lookup
     # more efficient.
-    additional_language_codes: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.optional(attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
-                                                                                                                                            iterable_validator=attr.validators.instance_of(list))),
-                                                                           default=None)
-    
+    #
+    # TODO: fix this validation
+    # additional_language_codes: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
+    #                                                                                              iterable_validator=attr.validators.instance_of(list)))),
+    additional_language_codes: typing.Optional[typing.List[str]] = attr.ib(default=None)
+
+    # Minimum and maximum year range in dates.
+    minimum_valid_year: int = attr.ib(validator=attr.validators.instance_of(int), default=MINIMUM_VALID_YEAR)
+    maximum_valid_year: int = attr.ib(validator=attr.validators.instance_of(int), default=MAXIMUM_VALID_YEAR)
 
     @classmethod
     def add_arguments(cls, parser: ArgumentParser):
@@ -69,6 +78,12 @@ def add_arguments(cls, parser: ArgumentParser):
         md0group.add_argument(      "--disallow-month-or-day-zero", dest="allow_month_or_day_zero",
                                     help="Allow month or day zero in dates.", action='store_false')
 
+        parser.add_argument(      "--minimum-valid-year", dest="minimum_valid_year",
+                                  help="The minimum valid year in dates.", type=int, default=cls.MINIMUM_VALID_YEAR)
+
+        parser.add_argument(      "--maximum-valid-year", dest="maximum_valid_year",
+                                  help="The maximum valid year in dates.", type=int, default=cls.MAXIMUM_VALID_YEAR)
+
     @classmethod
     # Build the value parsing option structure.
     def from_args(cls, args: Namespace)->'KgtkValueOptions':
@@ -76,7 +91,9 @@ def from_args(cls, args: Namespace)->'KgtkValueOptions':
                    allow_language_suffixes=args.allow_language_suffixes,
                    allow_lax_strings=args.allow_lax_strings,
                    allow_lax_lq_strings=args.allow_lax_lq_strings,
-                   additional_language_codes=args.additional_language_codes)
+                   additional_language_codes=args.additional_language_codes,
+                   minimum_valid_year=args.minimum_valid_year,
+                   maximum_valid_year=args.maximum_valid_year)
 
 DEFAULT_KGTK_VALUE_OPTIONS: KgtkValueOptions = KgtkValueOptions()
 

From 479c650b603cc718f1aa4845796f5d03a7f7d745 Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Mon, 4 May 2020 18:00:06 -0700
Subject: [PATCH 070/278] update order of columns

---
 kgtk/cli/gt_loader.py | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/kgtk/cli/gt_loader.py b/kgtk/cli/gt_loader.py
index b78167725..00fa84404 100644
--- a/kgtk/cli/gt_loader.py
+++ b/kgtk/cli/gt_loader.py
@@ -9,6 +9,20 @@ def parser():
     }
 
 
+def convert_scientific_notation(num):
+    if isinstance(num, float):
+        num = str(num)
+        if 'e' in num:
+            vals = num.split('e')
+            formatter = int(vals[1].replace('-', '')) + 2
+            try:
+                return "{:.{formatter}f}".format(float(num), formatter=formatter)
+            except:
+                print(num, vals, formatter)
+                raise
+    return num
+
+
 def add_arguments(parser):
     """
     Parse arguments
@@ -136,16 +150,15 @@ def infer_predicate(h, options=[]):
                 for n_id, n_label, authority in main_auth:
                     writer.write('%s\t%s\t%f\n' % (n_id, n_label, authority))
 
-            sys.stdout.write('id\tnode1\tproperty\tnode2\n')
+            sys.stdout.write('node1\tproperty\tnode2\tid\n')
             id_count = 0
             if not output_stats:
                 for e in G2.edges():
                     sid, oid = e
                     lbl = G2.ep[predicate][e]
                     sys.stdout.write(
-                        '%s\t%s\t%s\t%s\n' % (
-                            '{}-{}-{}'.format(G2.vp[id_col][sid], lbl, id_count), G2.vp[id_col][sid], lbl,
-                            G2.vp[id_col][oid]))
+                        '%s\t%s\t%s\t%s\n' % (G2.vp[id_col][sid], lbl, G2.vp[id_col][oid],
+                                              '{}-{}-{}'.format(G2.vp[id_col][sid], lbl, id_count)))
                     id_count += 1
 
             id_count = 0
@@ -153,20 +166,19 @@ def infer_predicate(h, options=[]):
                 v_id = G2.vp[id_col][v]
 
                 sys.stdout.write(
-                    '{}\t{}\t{}\t{}\n'.format('{}-{}-{}'.format(v_id, vertex_in_degree, id_count), v_id,
-                                              vertex_in_degree, v.in_degree()))
+                    '{}\t{}\t{}\t{}\n'.format(v_id, vertex_in_degree, v.in_degree(),
+                                              '{}-{}-{}'.format(v_id, vertex_in_degree, id_count)))
                 id_count += 1
                 sys.stdout.write(
-                    '{}\t{}\t{}\t{}\n'.format('{}-{}-{}'.format(v_id, vertex_out_degree, id_count), v_id,
-                                              vertex_out_degree, v.out_degree()))
+                    '{}\t{}\t{}\t{}\n'.format(v_id, vertex_out_degree, v.out_degree(),
+                                              '{}-{}-{}'.format(v_id, vertex_out_degree, id_count)))
                 id_count += 1
 
                 for vprop in G2.vertex_properties.keys():
                     if vprop == id_col: continue
                     sys.stdout.write(
-                        '%s\t%s\t%s\t%s\n' % (
-                            '{}-{}-{}'.format(v_id, v_prop_dict[vprop], id_count), v_id, v_prop_dict[vprop],
-                            G2.vp[vprop][v]))
+                        '%s\t%s\t%s\t%s\n' % (v_id, v_prop_dict[vprop], convert_scientific_notation(G2.vp[vprop][v]),
+                                              '{}-{}-{}'.format(v_id, v_prop_dict[vprop], id_count)))
                     id_count += 1
 
             if output:

From 5c11c71b3f02ac3c3fd62ca3f6ba05b9887e46b6 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 4 May 2020 19:10:30 -0700
Subject: [PATCH 071/278] Improve coordinates and date/time parsing.

---
 kgtk/join/kgtkvalue.py | 85 ++++++++++++++++++++++++++++--------------
 1 file changed, 56 insertions(+), 29 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 7f86a79f7..df5b0cd16 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -400,7 +400,8 @@ def is_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
         return v.startswith("@")
 
     #location_coordinates_re: typing.Pattern = re.compile(r"^@(?P<lat>[-+]?\d{3}\.\d{5})/(?P<lon>[-+]?\d{3}\.\d{5})$")
-    location_coordinates_re: typing.Pattern = re.compile(r"^@(?P<lat>[-+]?(?:\d+(?:\.\d*)?)|(?:\.\d+))/(?P<lon>[-+]?(?:\d+(?:\.\d*)?)|(?:\.\d+))$")
+    degrees_pat: str = r'(?:[-+]?(?:\d+(?:\.\d*)?)|(?:\.\d+))'
+    location_coordinates_re: typing.Pattern = re.compile(r'^@(?P<lat>{degrees})/(?P<lon>{degrees})$'.format(degrees=degrees_pat))
 
     def is_valid_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
         """
@@ -448,40 +449,49 @@ def is_date_and_times(self, idx: typing.Optional[int] = None)->bool:
         v: str = self.get_item(idx)
         return v.startswith("^")
 
+    # https://en.wikipedia.org/wiki/ISO_8601
+    #
+    # The "lax" patterns allow month 00 and day 00, which are excluded by ISO 8601.
+    # We will allow those values when requested in the code below.
+    #
+    # The first possible hyphen position determines whether we will parse in
+    # value as a "basic" (no hyphen) or "extended" format date/time.  A
+    # mixture is not permitted: either all hyphens (colons in the time
+    # section) must be present, or none.
+    #
+    # Year-month-day
     year_pat: str = r'(?P<year>[-+]?[0-9]{4})'
-
+    lax_month_pat: str = r'(?P<month>1[0-2]|0[0-9])'
+    lax_day_pat: str = r'(?P<day>3[01]|0[0-9]|[12][0-9])'
+    lax_date_pat: str = r'(?:{year}(?:(?P<hyphen>-)?{month}?(?:(?(hyphen)-){day})?)?)'.format(year=year_pat,
+                                                                                              month=lax_month_pat,
+                                                                                              day=lax_day_pat)
+    # hour-minutes-seconds
     hour_pat: str = r'(?P<hour>2[0-3]|[01][0-9])'
     minutes_pat: str = r'(?P<minutes>[0-5][0-9])'
     seconds_pat: str = r'(?P<second>[0-5][0-9])'
-    zone_pat: str = r'(?P<zone>Z|\[-+][0-9][0-9](?::[0-9][0-9]))'
+
+    # NOTE: It might be the case that the ":" before the minutes in the time zone pattern
+    # should be conditioned upon the hyphen indicator.  The Wikipedia article doesn't
+    # mention this requirement.
+    #
+    # NOTE: This pattern accepts a wider range of offsets than actually occur.
+    #
+    # TODO: consult the actual standard about the colon.
+    zone_pat: str = r'(?P<zone>Z|[-+][01][0-9](?::?[0-5][0-9])?)'
+
     time_pat: str = r'(?:{hour}(?:(?(hyphen):){minutes}(?:(?(hyphen):){seconds})?)?{zone}?)'.format(hour=hour_pat,
                                                                                                    minutes=minutes_pat,
                                                                                                    seconds=seconds_pat,
                                                                                                    zone=zone_pat)
 
-    precision_pat: str = r'(?P<precision>/[0-1]?[0-9])'
+    precision_pat: str = r'(?P<precision>[0-1]?[0-9])'
 
-    # This pattern allows month 00 and day 00, which are excluded by ISO 8601.
-    lax_month_pat: str = r'(?P<month>1[0-2]|0[0-9])'
-    lax_day_pat: str = r'(?P<day>3[01]|0[0-9]|[12][0-9])'
-    lax_date_pat: str = r'(?:{year}(?:(?P<hyphen>-)?{month}?(?:(?(hyphen)-){day})?)?)'.format(year=year_pat,
-                                                                                              month=lax_month_pat,
-                                                                                              day=lax_day_pat)
-    lax_date_and_times_pat: str = r'(?:\^{date}(?:T{time}{precision}?)?)'.format(date=lax_date_pat,
-                                                                            time=time_pat,
-                                                                            precision=precision_pat)
+    lax_date_and_times_pat: str = r'(?:\^{date}(?:T{time})?(?:/{precision})?)'.format(date=lax_date_pat,
+                                                                                      time=time_pat,
+                                                                                      precision=precision_pat)
     lax_date_and_times_re: typing.Pattern = re.compile(r'^{date_and_times}$'.format(date_and_times=lax_date_and_times_pat))
                                                                         
-    strict_month_pat: str = r'(?P<month>1[0-2]|0[1-9])'
-    strict_day_pat: str = r'(?P<day>3[01]|0[1-9]|[12][0-9])'
-    strict_date_pat: str = r'(?:{year}(?:(?P<hyphen>-)?{month}?(?:(?(hyphen)-){day})?)?)'.format(year=year_pat,
-                                                                                                 month=strict_month_pat,
-                                                                                                 day=strict_day_pat)
-    strict_date_and_times_pat: str = r'(?:\^{date}(?:T{time}{precision}?)?)'.format(date=strict_date_pat,
-                                                                               time=time_pat,
-                                                                               precision=precision_pat)
-    strict_date_and_times_re: typing.Pattern = re.compile(r'^{date_and_times}$'.format(date_and_times=strict_date_and_times_pat))
-                                                                        
     def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
         """
         Return False if this value is a list and idx is None.
@@ -535,16 +545,14 @@ def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
             return False
 
         v: str = self.get_item(idx)
-        m: typing.Optional[typing.Match]
-        if self.options.allow_month_or_day_zero:
-            m = KgtkValue.lax_date_and_times_re.match(v)
-        else:
-            m = KgtkValue.strict_date_and_times_re.match(v)
+        m: typing.Optional[typing.Match] = KgtkValue.lax_date_and_times_re.match(v)
         if m is None:
             return False
+
+        # Validate the year:
         year_str: str = m.group("year")
         if year_str is None or len(year_str) == 0:
-            return False
+            return False # Years are mandatory
         try:
             year: int = int(year_str)
         except ValueError:
@@ -553,6 +561,25 @@ def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
             return False
         if year > self.options.maximum_valid_year:
             return False
+
+        month_str: str = m.group("month")
+        if month_str is not None:
+            try:
+                month: int = int(month_str)
+            except ValueError:
+                return False # shouldn't happen
+            if month == 0 and not self.options.allow_month_or_day_zero:
+                return False # month 0 was disallowed.
+
+        day_str: str = m.group("day")
+        if day_str is not None:
+            try:
+                day: int = int(day_str)
+            except ValueError:
+                return False # shouldn't happen
+            if day == 0 and not self.options.allow_month_or_day_zero:
+                return False # day 0 was disallowed.
+
         return True
 
     def is_extension(self,  idx: typing.Optional[int] = None)->bool:

From a32637202cb8354d243646b72e7892274cfa7bc0 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 4 May 2020 19:20:32 -0700
Subject: [PATCH 072/278] Prepare the min/max lat/lon values for override. 
 Describe lists a little differently.

---
 kgtk/join/kgtkvalue.py        | 12 ++++++------
 kgtk/join/kgtkvalueoptions.py | 17 +++++++++++++----
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index df5b0cd16..56939b949 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -418,20 +418,20 @@ def is_valid_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
         if m is None:
             return False
 
-        # Latitude runs from -90 to +90
+        # Latitude normally runs from -90 to +90:
         latstr: str = m.group("lat")
         try:
             lat: float = float(latstr)
-            if  lat < -90. or lat > 90.:
+            if  lat < self.options.minimum_valid_lat or lat > self.options.maximum_valid_lat:
                 return False
         except ValueError:
             return False
 
-        # Longitude runs from -180 to +180
+        # Longitude normally runs from -180 to +180:
         lonstr: str = m.group("lon")
         try:
             lon: float = float(lonstr)
-            if lon < -180. or lon > 180.:
+            if lon < self.options.minimum_valid_lon or lon > self.options.maximum_valid_lon:
                 return False
         except ValueError:
             return False
@@ -647,7 +647,7 @@ def describe(self, idx: typing.Optional[int] = None)->str:
         Return a string that describes the value.
         """
         if self.is_list() and idx is None:
-            result: str = ""
+            result: str = "List ("
             kv: KgtkValue
             first: bool = True
             for kv in self.get_values():
@@ -656,7 +656,7 @@ def describe(self, idx: typing.Optional[int] = None)->str:
                 else:
                     result += KgtkFormat.LIST_SEPARATOR
                 result += kv.describe()
-            return result
+            return result + ")"
 
         if self.is_empty(idx):
             return "Empty"
diff --git a/kgtk/join/kgtkvalueoptions.py b/kgtk/join/kgtkvalueoptions.py
index c6383e793..19252f3d9 100644
--- a/kgtk/join/kgtkvalueoptions.py
+++ b/kgtk/join/kgtkvalueoptions.py
@@ -13,10 +13,6 @@ class KgtkValueOptions:
     seperate class for efficiency.
     """
     
-    # The default minimum and maximum valid year values.
-    MINIMUM_VALID_YEAR: int = 1583 # Per ISO 8601, years before this one require special agreement.
-    MAXIMUM_VALID_YEAR: int = 2100 # Arbitrarily chosen.
-
     # Allow month 00 or day 00 in dates?  This isn't really allowed by ISO
     # 8601, but appears in wikidata.
     allow_month_or_day_zero: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
@@ -42,9 +38,22 @@ class KgtkValueOptions:
     additional_language_codes: typing.Optional[typing.List[str]] = attr.ib(default=None)
 
     # Minimum and maximum year range in dates.
+    MINIMUM_VALID_YEAR: int = 1583 # Per ISO 8601, years before this one require special agreement.
     minimum_valid_year: int = attr.ib(validator=attr.validators.instance_of(int), default=MINIMUM_VALID_YEAR)
+    MAXIMUM_VALID_YEAR: int = 2100 # Arbitrarily chosen.
     maximum_valid_year: int = attr.ib(validator=attr.validators.instance_of(int), default=MAXIMUM_VALID_YEAR)
 
+    MINIMUM_VALID_LAT: float = -90.
+    minimum_valid_lat: float = attr.ib(validator=attr.validators.instance_of(float), default=MINIMUM_VALID_LAT)
+    MAXIMUM_VALID_LAT: float = 90.
+    maximum_valid_lat: float = attr.ib(validator=attr.validators.instance_of(float), default=MAXIMUM_VALID_LAT)
+    
+    MINIMUM_VALID_LON: float = -180.
+    minimum_valid_lon: float = attr.ib(validator=attr.validators.instance_of(float), default=MINIMUM_VALID_LON)
+    MAXIMUM_VALID_LON: float = 180.
+    maximum_valid_lon: float = attr.ib(validator=attr.validators.instance_of(float), default=MAXIMUM_VALID_LON)
+    
+
     @classmethod
     def add_arguments(cls, parser: ArgumentParser):
         parser.add_argument(      "--additional-language-codes", dest="additional_language_codes",

From 991e486b473d81f4a31809c99741479df8abb2a5 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 4 May 2020 21:30:58 -0700
Subject: [PATCH 073/278] New set of datatypes.

---
 kgtk/join/kgtkformat.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/kgtk/join/kgtkformat.py b/kgtk/join/kgtkformat.py
index 6c392fe82..036d81e4b 100644
--- a/kgtk/join/kgtkformat.py
+++ b/kgtk/join/kgtkformat.py
@@ -20,11 +20,18 @@ class KgtkFormat:
     # There is only one required column in a node file:
     ID_COLUMN_NAMES: typing.List[str] = ["id", "ID"]
 
-    class DataTypes(Enum):
-        NUMBER = 0
-        STRING = 1
-        STRUCTURED_LITERAL = 2
-        SYMBOL = 3
+    class DataType(Enum):
+        EMPTY = 0
+        LIST = 1
+        NUMBER = 2
+        QUANTITY = 3
+        STRING = 4
+        LANGUAGE_QUALIFIED_STRING = 5
+        LOCATION_COORDINATES = 6
+        DATE_AND_TIMES = 7
+        EXTENSION = 8
+        BOOLEAN = 9
+        SYMBOL = 10
 
     TRUE_SYMBOL: str = "True"
     FALSE_SYMBOL: str = "False"

From fda2f128a352417f454f2e0ab885a6da2e468516 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 5 May 2020 14:49:51 -0700
Subject: [PATCH 074/278] Refactored value tests.

---
 kgtk/join/kgtkvalue.py | 598 ++++++++++++++++++++++-------------------
 1 file changed, 326 insertions(+), 272 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 56939b949..e1fb06f27 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -17,107 +17,145 @@ class KgtkValue(KgtkFormat):
     value: str = attr.ib(validator=attr.validators.instance_of(str))
     options: KgtkValueOptions = attr.ib(validator=attr.validators.instance_of(KgtkValueOptions), default=DEFAULT_KGTK_VALUE_OPTIONS)
 
-    split_list_re: typing.Pattern = re.compile(r"(?<!\\)" + "\\" + KgtkFormat.LIST_SEPARATOR)
+    data_type: typing.Optional[KgtkFormat.DataType] = None
+    valid: typing.Optional[bool] = None
 
-    # Cache the list of values.  This member is why the class isn't frozen.
-    values: typing.Optional[typing.List[str]] = None
+    # If this is a list, prepare a KgtkValue object for each item of the list.
+    list_items: typing.Optional[typing.List['KgtkValue']] = None
 
-    def get_list(self)->typing.List[str]:
-        if self.values is None:
-            self.values = KgtkValue.split_list_re.split(self.value)
-        return self.values
+    def get_data_type(self)->KgtkFormat.DataType:
 
-    def get_item(self, idx: typing.Optional[int])-> str:
-        if idx is None:
-            return self.value
-        else:
-            return self.get_list()[idx]
+        if self.data_type is not None:
+            pass
 
-    def is_list(self)->bool:
-        return len(self.get_list()) > 1
+        elif self.is_empty() or self.is_list():
+            pass
 
-    def get_values(self)->typing.List['KgtkValue']:
-        """
-        Convert the value into a list of KgtkValues.
-        """
-        if not self.is_list:
-            return [ self ]
+        elif self.is_string() or self.is_language_qualified_string():
+            pass
+
+        elif self.is_number_or_quantity():
+            # To determine whether this is a number or a quantity, we have
+            # to validate one of them.
+            if not self.is_valid_number():
+                # If it isn't a valid number, assume it's a quantity.
+                self.data_type = KgtkFormat.DataType.QUANTITY
+
+        elif self.is_location_coordinates():
+            pass
+
+        elif self.is_date_and_times():
+            pass
+
+        elif self.is_extension():
+            pass
+
+        elif self.is_boolean() or self.is_symbol():
+            pass
+
+        if self.data_type is not None:
+            return self.data_type
+
+        # Shouldn't get here.
+        raise ValueError("Unknown data type for '%s'" % self.value)
+
+    def is_valid(self)->bool:
+        dt: KgtkFormat.DataType = self.get_data_type()
+        if dt == KgtkFormat.DataType.EMPTY:
+            return self.is_valid_empty()
+        elif dt == KgtkFormat.DataType.LIST:
+            return self.is_valid_list()
+        elif dt == KgtkFormat.DataType.NUMBER:
+            return self.is_valid_number()
+        elif dt == KgtkFormat.DataType.QUANTITY:
+            return self.is_valid_quantity()
+        elif dt == KgtkFormat.DataType.STRING:
+            return self.is_valid_string()
+        elif dt == KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING:
+            return self.is_valid_language_qualified_string()
+        elif dt == KgtkFormat.DataType.LOCATION_COORDINATES:
+            return self.is_valid_location_coordinates()
+        elif dt == KgtkFormat.DataType.DATE_AND_TIMES:
+            return self.is_valid_date_and_times()
+        elif dt == KgtkFormat.DataType.EXTENSION:
+            return self.is_valid_extension()
+        elif dt == KgtkFormat.DataType.BOOLEAN:
+            return self.is_valid_boolean()
+        elif dt == KgtkFormat.DataType.SYMBOL:
+            return self.is_valid_symbol()
         else:
-            result: typing.List['KgtkValue'] = [ ]
-            v: str
-            for v in self.get_list():
-                result.append(KgtkValue(v, options=self.options))
-            return result
+            raise ValueError("Unrecognized DataType.")
 
-    def is_empty(self, idx: typing.Optional[int] = None)->bool:
-        """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the value is empty.
-        """
-        if self.is_list() and idx is None:
-            return False
         
-        v: str = self.get_item(idx)
-        return len(v) == 0
+    def is_empty(self)->bool:
+        if self.data_type is not None:
+            return self.data_type == KgtkFormat.DataType.EMPTY
 
-    def is_number_old(self, idx: typing.Optional[int] = None)->bool:
-        """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the first character is 0-9,+,-,. .
-        """
-        if self.is_list() and idx is None:
+        if len(self.value) != 0:
             return False
-        
-        v: str = self.get_item(idx)
-        return v.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", "."))
-    
-    def is_valid_number_old(self, idx: typing.Optional[int] = None)->bool:
-        """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the first character is 0-9,_,-,.
-        and Python can parse it.
 
-        Examples:
-        1
-        123
-        -123
-        +123
-        0b101
-        0o277
-        0x24F
-        .4
-        0.4
-        10.
-        10.4
-        10.4e10
-        """
-        if self.is_list() and idx is None:
+        # We are certain that this is an empty value.  We can be certain it is valid.
+        self.data_type = KgtkFormat.DataType.EMPTY
+        self.valid = True
+        return True
+
+    def is_valid_empty(self)->bool:
+        # If it is empty, it is validly so.
+        return self.is_empty()
+
+    split_list_re: typing.Pattern = re.compile(r"(?<!\\)" + "\\" + KgtkFormat.LIST_SEPARATOR)
+
+    def get_list(self)->typing.List['KgtkValue']:
+        if self.list_items is not None:
+            return self.list_items
+
+        self.list_items: typing.List['KgtkValue'] = [ ]
+        value: str
+        for value in KgtkValue.split_list_re.split(self.value):
+            self.list_items.append(KgtkValue(value, options=self.options))
+        return self.list_items
+
+    def is_list(self)->bool:
+        if self.data_type is not None:
+            return self.data_type == KgtkFormat.DataType.LIST
+
+        if len(self.get_list()) == 1:
             return False
-        
-        v: str = self.get_item(idx)
-        if not v.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", ".")):
+
+        # We aare certain that this is a list, although we haven't checked validity.
+        self.data_type = KgtkFormat.DataType.LIST
+        return True
+
+
+    def is_valid_list(self)->bool:
+        if not self.is_list():
             return False
-        try:
-            i: int = int(v, 0) # The 0 allows prefixes: 0b, 0o, and 0x.
-            return True
-        except ValueError:
-            try:
-                f: float = float(v)
-                return True
-            except ValueError:
+
+        if self.valid is not None:
+            return self.valid
+            
+        item: 'KgtkValue'
+        for item in self.get_list():
+            if not item.is_valid():
+                # The list is invalid if any item in the list is invalid.
+                self.valid = False
                 return False
-        
-    
-    def is_number_or_quantity(self, idx: typing.Optional[int] = None)->bool:
+
+        # This is a valid list.
+        self.valid = True
+        return True
+
+    def _is_number_or_quantity(self)->bool:
+        return self.value.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", "."))
+
+    def is_number_or_quantity(self)->bool:
         """
-        Return False if this value is a list and idx is None.
         Otherwise, return True if the first character is 0-9,+,-,. .
         """
-        if self.is_list() and idx is None:
-            return False
-        
-        v: str = self.get_item(idx)
-        return v.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", "."))
+        if self.data_type is not None:
+            return self.data_type == KgtkFormat.DataType.NUMBER or self.data_type == KgtkFormat.DataType.QUANTITY
+
+        return self._is_number_or_quantity()
     
     # The following lexical analysis is based on:
     # https://docs.python.org/3/reference/lexical_analysis.html
@@ -210,27 +248,33 @@ def is_number_or_quantity(self, idx: typing.Optional[int] = None)->bool:
     # This matches quantities excluding numbers.
     quantity_re: typing.Pattern = re.compile(r'^' + quantity_pat + r'$')
 
-    def is_valid_number_or_quantity(self, idx: typing.Optional[int] = None)->bool:
+    def is_valid_number_or_quantity(self)->bool:
         """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the first character is 0-9,_,-,.
+        Return True if the first character is 0-9,_,-,.
         and it is either a Python-compatible number or an enhanced
         quantity.
         """
-        if self.is_list() and idx is None:
-            return False
-        
-        v: str = self.get_item(idx)
-        if not v.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", ".")):
+        # If we know the specific data type, delegate the test to that data type.
+        if self.data_type is not None:
+            if self.data_type == KgtkFormat.DataType.NUMBER:
+                return self.is_valid_number()
+            elif self.data_type == KgtkFormat.DataType.QUANTITY:
+                return self.is_valid_quantity()
+            else:
+                return False # Not a number or quantity.
+
+        if not self._is_number_or_quantity():
             return False
 
-        m: typing.Optional[typing.Match] = KgtkValue.number_or_quantity_re.match(v)
+        # We cannot cache the result of this test because it would interfere
+        # if we later determined the exact data type.  We could work around
+        # this problem with more thought.
+        m: typing.Optional[typing.Match] = KgtkValue.number_or_quantity_re.match(self.value)
         return m is not None
         
     
-    def is_valid_number(self, idx: typing.Optional[int] = None)->bool:
+    def is_valid_number(self)->bool:
         """
-        Return False if this value is a list and idx is None.
         Otherwise, return True if the first character is 0-9,_,-,.
         and it is a Python-compatible number (with optional limited enhancements).
 
@@ -248,173 +292,215 @@ def is_valid_number(self, idx: typing.Optional[int] = None)->bool:
         10.4
         10.4e10
         """
-        if self.is_list() and idx is None:
-            return False
+        if self.data_type is not None:
+            if self.data_type != KgtkFormat.DataType.NUMBER:
+                return False
+            if self.valid is not None:
+                return self.valid
         
-        v: str = self.get_item(idx)
-        if not v.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", ".")):
+        if not self._is_number_or_quantity():
             return False
+        # We don't know yet if this is a number.  It could be a quantity.
 
-        m: typing.Optional[typing.Match] = KgtkValue.number_re.match(v)
-        return m is not None
+        m: typing.Optional[typing.Match] = KgtkValue.number_re.match(self.value)
+        if m is None:
+            return False
+
+        # Now we can be certain that this is a number.
+        self.data_type = KgtkFormat.DataType.NUMBER
+        self.valid = True
+        return True
         
     
-    def is_valid_quantity(self, idx: typing.Optional[int] = None)->bool:
+    def is_valid_quantity(self)->bool:
         """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the first character is 0-9,_,-,.
+        Return True if the first character is 0-9,_,-,.
         and it is an enhanced quantity.
         """
-        if self.is_list() and idx is None:
-            return False
+        if self.data_type is not None:
+            if self.data_type != KgtkFormat.DataType.QUANTITY:
+                return False
+            if self.valid is not None:
+                return self.valid
         
-        v: str = self.get_item(idx)
-        if not v.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", ".")):
+        if not self._is_number_or_quantity():
             return False
+        # We don't know yet if this is a quantity.  It could be a number.
 
-        m: typing.Optional[typing.Match] = KgtkValue.quantity_re.match(v)
-        return m is not None
-        
+        m: typing.Optional[typing.Match] = KgtkValue.quantity_re.match(self.value)
+        if m is None:
+            return False
+
+        # Now we can be certain that this is a quantity.
+        self.data_type = KgtkFormat.DataType.QUANTITY
+        self.valid = True
+        return True
     
-    def is_string(self, idx: typing.Optional[int] = None)->bool:
+    def is_string(self)->bool:
         """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the first character  is '"'.
+        Return True if the first character  is '"'.
 
         Strings begin and end with double quote (").  Any internal double
         quotes must be escaped with backslash (\").  Triple-double quoted
         strings are not supported by KGTK File Vormat v2.
 
         """
-        if self.is_list() and idx is None:
-            return False
+        if self.data_type is not None:
+            return self.data_type == KgtkFormat.DataType.STRING
         
-        v: str = self.get_item(idx)
-        return v.startswith('"')
+        if not self.value.startswith('"'):
+            return False
+
+        # We are certain this is a string.  We don't yet know if it is valid.
+        self.data_type = KgtkFormat.DataType.STRING
+        return True
 
     lax_string_re: typing.Pattern = re.compile(r'^".*"$')
     strict_string_re: typing.Pattern = re.compile(r'^"(?:[^"\\]|\\.)*"$')
 
-    def is_valid_string(self, idx: typing.Optional[int] = None)->bool:
+    def is_valid_string(self)->bool:
         """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the first character  is '"',
+        Strict: return True if the first character  is '"',
         the last character is '"', and any internal '"' characters are
         escaped by backslashes.
         """
-        if self.is_list() and idx is None:
-            return False
-        
-        v: str = self.get_item(idx)
-        if not v.startswith('"'):
+        if not self.is_string():
             return False
+
+        if self.valid is not None:
+            return self.valid
+
         m: typing.Optional[typing.Match]
         if self.options.allow_lax_strings:
-            m = KgtkValue.lax_string_re.match(v)
+            m = KgtkValue.lax_string_re.match(self.value)
         else:
-            m = KgtkValue.strict_string_re.match(v)
-        return m is not None
+            m = KgtkValue.strict_string_re.match(self.value)
+        if m is None:
+            return False
+
+        # We are certain that this is a valid string.
+        self.valid = True
+        return True
 
-    def is_structured_literal(self, idx: typing.Optional[int] = None)->bool:
+    def is_structured_literal(self)->bool:
         """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the first character  is ^@'!.
+        Return True if the first character  is ^@'!.
         """
-        if self.is_list() and idx is None:
-            return False
-        
-        v: str = self.get_item(idx)
-        return v.startswith(("^", "@", "'", "!"))
+        return self.value.startswith(("^", "@", "'", "!"))
 
-    def is_symbol(self, idx: typing.Optional[int] = None)->bool:
+    def is_symbol(self)->bool:
         """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if not a number, string, nor structured literal.
+        Return True if not a number, string, nor structured literal.
         """
-        if self.is_list() and idx is None:
-            return False
+        if self.data_type is not None:
+            return self.data_type == KgtkFormat.DataType.SYMBOL
 
-        return not (self.is_number_or_quantity(idx) or self.is_string(idx) or self.is_structured_literal(idx))
+        if self.is_number_or_quantity() or self.is_string() or self.is_structured_literal() or self.is_boolean():
+            return False
+            
+        # We are certain this is a symbol.  We assume, for now that it is valid.
+        self.data_type = KgtkFormat.DataType.SYMBOL
+        self.valid = True
+        return True
 
-    def is_boolean(self, idx: typing.Optional[int] = None)->bool:
+    def is_valid_symbol(self)->bool:
+        # If it is a suymbol, then it is valid.
+        return self.is_symbol()
+    
+    def is_boolean(self)->bool:
         """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the value matches one of the special boolean symbols..
+        return True if the value matches one of the special boolean symbols..
         """
-        if self.is_list() and idx is None:
-            return False
+        if self.data_type is not None:
+            return self.data_type == KgtkFormat.DataType.BOOLEAN
 
-        v: str = self.get_item(idx)
-        return v == KgtkFormat.TRUE_SYMBOL or v == KgtkFormat.FALSE_SYMBOL
+        if self.value != KgtkFormat.TRUE_SYMBOL and self.value != KgtkFormat.FALSE_SYMBOL:
+            return False
+            
+        # We are certain this is a valid boolean.
+        self.data_type = KgtkFormat.DataType.BOOLEAN
+        self.valid = True
+        return True
 
+    def is_valid_boolean(self)->bool:
+        # If it is a boolean, then it is valid.
+        return self.is_boolean()
     
-    def is_language_qualified_string(self, idx: typing.Optional[int] = None)->bool:
+    def is_language_qualified_string(self)->bool:
         """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the first character is '
+        Return True if the first character is '
         """
-        if self.is_list() and idx is None:
+        if self.data_type is not None:
+            return self.data_type == KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING
+
+        if not self.value.startswith("'"):
             return False
 
-        v: str = self.get_item(idx)
-        return v.startswith("'")
+        self.data_type = KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING
+        return True
 
     # Support two or three character language codes.  Suports hyphenated codes
     # with country codes or dialect names after a language code.
     lax_language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'.*')@(?P<lang>[a-zA-Z]{2,3}(?:-[a-zA-Z]+)?)$")
     strict_language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'(?:[^'\\]|\\.)*')@(?P<lang>[a-zA-Z]{2,3}(?:-[a-zA-Z]+)?)$")
 
-    def is_valid_language_qualified_string(self, idx: typing.Optional[int] = None)->bool:
-        """Return False if this value is a list and idx is None.
-        Otherwise, return True if the value looks like a language-qualified string.
+    def is_valid_language_qualified_string(self)->bool:
         """
-        if self.is_list() and idx is None:
+        Return True if the value looks like a language-qualified string.
+        """
+        if not self.is_language_qualified_string():
             return False
 
-        v: str = self.get_item(idx)
-        # print("checking %s" % v)
+        # print("checking %s" % self.value)
         m: typing.Optional[typing.Match]
         if self.options.allow_lax_lq_strings:
-            m = KgtkValue.lax_language_qualified_string_re.match(v)
+            m = KgtkValue.lax_language_qualified_string_re.match(self.value)
         else:
-            m = KgtkValue.strict_language_qualified_string_re.match(v)
+            m = KgtkValue.strict_language_qualified_string_re.match(self.value)
         if m is None:
-            # print("match failed for %s" % v)
+            # print("match failed for %s" % self.value)
             return False
 
         # Validate the language code:
         lang: str = m.group("lang").lower()
         # print("lang: %s" % lang)
 
-        return LanguageValidator.validate(lang, options=self.options)
+        if not LanguageValidator.validate(lang, options=self.options):
+            # print("language validation failed for %s" % self.value)
+            return False
+
+        # We are certain that this is a valid language qualified string.
+        self.valid = True
+        return True
 
-    def is_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
+    def is_location_coordinates(self)->bool:
         """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the first character is @
+        Return True if the first character is @
         """
-        if self.is_list() and idx is None:
+        if self.data_type is not None:
+            return self.data_type == KgtkFormat.DataType.LOCATION_COORDINATES
+
+        if not self.value.startswith("@"):
             return False
 
-        v: str = self.get_item(idx)
-        return v.startswith("@")
+        self.data_type = KgtkFormat.DataType.LOCATION_COORDINATES
+        return True
 
     #location_coordinates_re: typing.Pattern = re.compile(r"^@(?P<lat>[-+]?\d{3}\.\d{5})/(?P<lon>[-+]?\d{3}\.\d{5})$")
     degrees_pat: str = r'(?:[-+]?(?:\d+(?:\.\d*)?)|(?:\.\d+))'
     location_coordinates_re: typing.Pattern = re.compile(r'^@(?P<lat>{degrees})/(?P<lon>{degrees})$'.format(degrees=degrees_pat))
 
-    def is_valid_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
+    def is_valid_location_coordinates(self)->bool:
         """
         Return False if this value is a list and idx is None.
         Otherwise, return True if the value looks like valid location coordinates.
 
         @043.26193/010.92708
         """
-        if self.is_list() and idx is None:
+        if not self.is_location_coordinates():
             return False
 
-        v: str = self.get_item(idx)
-        m: typing.Optional[typing.Match] = KgtkValue.location_coordinates_re.match(v)
+        m: typing.Optional[typing.Match] = KgtkValue.location_coordinates_re.match(self.value)
         if m is None:
             return False
 
@@ -436,18 +522,23 @@ def is_valid_location_coordinates(self, idx: typing.Optional[int] = None)->bool:
         except ValueError:
             return False
 
+        # We are certain that this is valid.
+        self.valid = True
         return True
 
-    def is_date_and_times(self, idx: typing.Optional[int] = None)->bool:
+    def is_date_and_times(self)->bool:
         """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the first character is ^
+        Return True if the first character is ^
         """
-        if self.is_list() and idx is None:
+        if self.data_type is not None:
+            return self.data_type == KgtkFormat.DataType.DATE_AND_TIMES
+
+        if not self.value.startswith("^"):
             return False
 
-        v: str = self.get_item(idx)
-        return v.startswith("^")
+        # This is a date and times value.  We do not yet know if it si valid.
+        self.data_type = KgtkFormat.DataType.DATE_AND_TIMES
+        return True
 
     # https://en.wikipedia.org/wiki/ISO_8601
     #
@@ -492,10 +583,9 @@ def is_date_and_times(self, idx: typing.Optional[int] = None)->bool:
                                                                                       precision=precision_pat)
     lax_date_and_times_re: typing.Pattern = re.compile(r'^{date_and_times}$'.format(date_and_times=lax_date_and_times_pat))
                                                                         
-    def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
+    def is_valid_date_and_times(self)->bool:
         """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the value looks like valid date and times
+        Return True if the value looks like valid date and times
         literal based on ISO-8601.
 
         Valid date formats:
@@ -541,11 +631,10 @@ def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
 
         TODO: validate the calendar date, eg fail if 31-Apr-2020.
         """
-        if self.is_list() and idx is None:
+        if not self.is_date_and_times():
             return False
 
-        v: str = self.get_item(idx)
-        m: typing.Optional[typing.Match] = KgtkValue.lax_date_and_times_re.match(v)
+        m: typing.Optional[typing.Match] = KgtkValue.lax_date_and_times_re.match(self.value)
         if m is None:
             return False
 
@@ -580,77 +669,42 @@ def is_valid_date_and_times(self, idx: typing.Optional[int] = None)->bool:
             if day == 0 and not self.options.allow_month_or_day_zero:
                 return False # day 0 was disallowed.
 
+        # We are fairly certain that this is a valid date and times.
+        self.valid = True
         return True
 
-    def is_extension(self,  idx: typing.Optional[int] = None)->bool:
-        """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the first character is !
-        """
-        if self.is_list() and idx is None:
-            return False
-
-        v: str = self.get_item(idx)
-        return v.startswith("!")
-
-        
-    def is_valid_literal(self, idx: typing.Optional[int] = None)->bool:
+    def is_extension(self)->bool:
         """
-        Return False if this value is a list and idx is None.
-        Otherwise, return True if the value looks like a valid literal.
+        Return True if the first character is !
         """
-        if self.is_list() and idx is None:
-            return False
-
-        if self.is_string(idx):
-            return self.is_valid_string(idx)
-        elif self.is_number_or_quantity(idx):
-            return self.is_valid_number_or_quantity(idx)
-        elif self.is_structured_literal(idx):
-            if self.is_language_qualified_string(idx):
-                return self.is_valid_language_qualified_string(idx)
-            elif self.is_location_coordinates(idx):
-                return self.is_valid_location_coordinates(idx)
-            elif self.is_date_and_times(idx):
-                return self.is_valid_date_and_times(idx)
-            elif self.is_extension(idx):
-                return False # no validation presently available.
-            else:
-                return False # Shouldn't get here.
-        else:
-            return False
+        if self.data_type is not None:
+            return self.data_type == KgtkFormat.DataType.EXTENSION
 
-    def is_valid_item(self, idx: typing.Optional[int] = None)->bool:
-        if self.is_list() and idx is None:
+        if not self.value.startswith("!"):
             return False
 
-        if self.is_empty(idx):
-            return True
-        elif self.is_valid_literal(idx):
-            return True
-        else:
-            return self.is_symbol(idx) # Should always be True
+        # This is an extension, but for now, assume that all extensions are invalid.
+        self.data_type = KgtkFormat.DataType.EXTENSION
+        self.valid = False
+        return True
 
-    def is_valid(self)->bool:
-        """
-        Is this a valid KGTK cell value?  If the value is a list, are all the
-        components valid?
-        """        
-        result: bool = True
-        kv: KgtkValue
-        for kv in self.get_values():
-            result = result and kv.is_valid_item()
-        return result
+    def is_valid_extension(self)->bool:
+        # For now, all extensions are invalid.
+        return False
 
-    def describe(self, idx: typing.Optional[int] = None)->str:
+    def describe(self)->str:
         """
         Return a string that describes the value.
         """
-        if self.is_list() and idx is None:
-            result: str = "List ("
+        if self.is_list():
+            result: str
+            if self.is_valid_list:
+                result = "List ("
+            else:
+                result = "Invalid List ("
             kv: KgtkValue
             first: bool = True
-            for kv in self.get_values():
+            for kv in self.get_list():
                 if first:
                     first = not first
                 else:
@@ -658,43 +712,43 @@ def describe(self, idx: typing.Optional[int] = None)->str:
                 result += kv.describe()
             return result + ")"
 
-        if self.is_empty(idx):
+        if self.is_empty():
             return "Empty"
-        elif self.is_string(idx):
-            if self.is_valid_string(idx):
+        elif self.is_string():
+            if self.is_valid_string():
                 return "String"
             else:
                 return "Invalid String"
-        elif self.is_number_or_quantity(idx):
-            if self.is_valid_number(idx):
+        elif self.is_number_or_quantity():
+            if self.is_valid_number():
                 return "Number"
-            elif self.is_valid_quantity(idx):
+            elif self.is_valid_quantity():
                 return "Quantity"
             else:
                 return "Invalid Number or Quantity"
-        elif self.is_structured_literal(idx):
-            if self.is_language_qualified_string(idx):
-                if self.is_valid_language_qualified_string(idx):
-                    return "Language Qualified String"
-                else:
-                    return "Invalid Language Qualified String"
-            elif self.is_location_coordinates(idx):
-                if self.is_valid_location_coordinates(idx):
-                    return "Location Coordinates"
-                else:
-                    return "Invalid Location Coordinates"
-            elif self.is_date_and_times(idx):
-                if self.is_valid_date_and_times(idx):
-                    return "Date and Times"
-                else:
-                    return "Invalid Date and Times"
-            elif self.is_extension(idx):
-                return "Extension (unvalidated)"
+        elif self.is_language_qualified_string():
+            if self.is_valid_language_qualified_string():
+                return "Language Qualified String"
             else:
-                return "Invalid Structured Literal"
-        else:
+                return "Invalid Language Qualified String"
+        elif self.is_location_coordinates():
+            if self.is_valid_location_coordinates():
+                return "Location Coordinates"
+            else:
+                return "Invalid Location Coordinates"
+        elif self.is_date_and_times():
+            if self.is_valid_date_and_times():
+                return "Date and Times"
+            else:
+                return "Invalid Date and Times"
+        elif self.is_extension():
+            return "Extension (unvalidated)"
+        elif self.is_boolean():
+            return "Boolean Symbol"
+        elif self.is_symbol():
             return "Symbol"
-
+        else:
+            return "Unknown"
     
 def main():
     """

From a3d9dbd938eb0e1b6f55859fd72b36dd18000a52 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 5 May 2020 16:12:59 -0700
Subject: [PATCH 075/278] Even more refactoring of the value tests.

---
 kgtk/join/kgtkvalue.py | 441 +++++++++++++++++++----------------------
 1 file changed, 206 insertions(+), 235 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index e1fb06f27..9d2875309 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -23,71 +23,13 @@ class KgtkValue(KgtkFormat):
     # If this is a list, prepare a KgtkValue object for each item of the list.
     list_items: typing.Optional[typing.List['KgtkValue']] = None
 
-    def get_data_type(self)->KgtkFormat.DataType:
-
-        if self.data_type is not None:
-            pass
-
-        elif self.is_empty() or self.is_list():
-            pass
-
-        elif self.is_string() or self.is_language_qualified_string():
-            pass
-
-        elif self.is_number_or_quantity():
-            # To determine whether this is a number or a quantity, we have
-            # to validate one of them.
-            if not self.is_valid_number():
-                # If it isn't a valid number, assume it's a quantity.
-                self.data_type = KgtkFormat.DataType.QUANTITY
-
-        elif self.is_location_coordinates():
-            pass
-
-        elif self.is_date_and_times():
-            pass
-
-        elif self.is_extension():
-            pass
-
-        elif self.is_boolean() or self.is_symbol():
-            pass
-
-        if self.data_type is not None:
-            return self.data_type
-
-        # Shouldn't get here.
-        raise ValueError("Unknown data type for '%s'" % self.value)
-
     def is_valid(self)->bool:
-        dt: KgtkFormat.DataType = self.get_data_type()
-        if dt == KgtkFormat.DataType.EMPTY:
-            return self.is_valid_empty()
-        elif dt == KgtkFormat.DataType.LIST:
-            return self.is_valid_list()
-        elif dt == KgtkFormat.DataType.NUMBER:
-            return self.is_valid_number()
-        elif dt == KgtkFormat.DataType.QUANTITY:
-            return self.is_valid_quantity()
-        elif dt == KgtkFormat.DataType.STRING:
-            return self.is_valid_string()
-        elif dt == KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING:
-            return self.is_valid_language_qualified_string()
-        elif dt == KgtkFormat.DataType.LOCATION_COORDINATES:
-            return self.is_valid_location_coordinates()
-        elif dt == KgtkFormat.DataType.DATE_AND_TIMES:
-            return self.is_valid_date_and_times()
-        elif dt == KgtkFormat.DataType.EXTENSION:
-            return self.is_valid_extension()
-        elif dt == KgtkFormat.DataType.BOOLEAN:
-            return self.is_valid_boolean()
-        elif dt == KgtkFormat.DataType.SYMBOL:
-            return self.is_valid_symbol()
+        if self.valid is not None:
+            return self.valid
         else:
-            raise ValueError("Unrecognized DataType.")
+            return self.validate()
 
-        
-    def is_empty(self)->bool:
+    def is_empty(self, validate: bool = False)->bool:
         if self.data_type is not None:
             return self.data_type == KgtkFormat.DataType.EMPTY
 
@@ -99,41 +41,39 @@ def is_empty(self)->bool:
         self.valid = True
         return True
 
-    def is_valid_empty(self)->bool:
-        # If it is empty, it is validly so.
-        return self.is_empty()
-
     split_list_re: typing.Pattern = re.compile(r"(?<!\\)" + "\\" + KgtkFormat.LIST_SEPARATOR)
 
     def get_list(self)->typing.List['KgtkValue']:
         if self.list_items is not None:
             return self.list_items
 
+        # Return an empty list if this is not a list.
         self.list_items: typing.List['KgtkValue'] = [ ]
-        value: str
-        for value in KgtkValue.split_list_re.split(self.value):
-            self.list_items.append(KgtkValue(value, options=self.options))
+        values: typing.List[str] = KgtkValue.split_list_re.split(self.value)
+        if len(values) > 1:
+            # Populate list_items with a KgtkValue for each item in the list:
+            item_value: str
+            for item_value in values:
+                self.list_items.append(KgtkValue(item_value, options=self.options))
         return self.list_items
 
-    def is_list(self)->bool:
-        if self.data_type is not None:
-            return self.data_type == KgtkFormat.DataType.LIST
-
-        if len(self.get_list()) == 1:
-            return False
-
-        # We aare certain that this is a list, although we haven't checked validity.
-        self.data_type = KgtkFormat.DataType.LIST
-        return True
-
-
-    def is_valid_list(self)->bool:
-        if not self.is_list():
-            return False
+    def is_list(self, validate: bool = False)->bool:
+        # Must test for list before anything else (except empty)!
+        if self.data_type is None:
+            if len(self.get_list()) == 0:
+                return False
+            # We are certain that this is a list, although we haven't checked validity.
+            self.data_type = KgtkFormat.DataType.LIST
+        else:
+            if self.data_type != KgtkFormat.DataType.LIST:
+                return False
 
+        if not validate:
+            return True
         if self.valid is not None:
             return self.valid
-            
+        
+        # Validate the list.
         item: 'KgtkValue'
         for item in self.get_list():
             if not item.is_valid():
@@ -148,15 +88,6 @@ def is_valid_list(self)->bool:
     def _is_number_or_quantity(self)->bool:
         return self.value.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", "."))
 
-    def is_number_or_quantity(self)->bool:
-        """
-        Otherwise, return True if the first character is 0-9,+,-,. .
-        """
-        if self.data_type is not None:
-            return self.data_type == KgtkFormat.DataType.NUMBER or self.data_type == KgtkFormat.DataType.QUANTITY
-
-        return self._is_number_or_quantity()
-    
     # The following lexical analysis is based on:
     # https://docs.python.org/3/reference/lexical_analysis.html
 
@@ -248,7 +179,7 @@ def is_number_or_quantity(self)->bool:
     # This matches quantities excluding numbers.
     quantity_re: typing.Pattern = re.compile(r'^' + quantity_pat + r'$')
 
-    def is_valid_number_or_quantity(self)->bool:
+    def is_number_or_quantity(self, validate: bool=False)->bool:
         """
         Return True if the first character is 0-9,_,-,.
         and it is either a Python-compatible number or an enhanced
@@ -257,15 +188,22 @@ def is_valid_number_or_quantity(self)->bool:
         # If we know the specific data type, delegate the test to that data type.
         if self.data_type is not None:
             if self.data_type == KgtkFormat.DataType.NUMBER:
-                return self.is_valid_number()
+                if not validate:
+                    return True
+                return self.is_number(validate=validate)
             elif self.data_type == KgtkFormat.DataType.QUANTITY:
-                return self.is_valid_quantity()
+                if not validate:
+                    return True
+                return self.is_quantity(validate=validate)
             else:
                 return False # Not a number or quantity.
 
         if not self._is_number_or_quantity():
             return False
 
+        if not validate:
+            return True
+
         # We cannot cache the result of this test because it would interfere
         # if we later determined the exact data type.  We could work around
         # this problem with more thought.
@@ -273,7 +211,7 @@ def is_valid_number_or_quantity(self)->bool:
         return m is not None
         
     
-    def is_valid_number(self)->bool:
+    def is_number(self, validate: bool=False)->bool:
         """
         Otherwise, return True if the first character is 0-9,_,-,.
         and it is a Python-compatible number (with optional limited enhancements).
@@ -295,6 +233,8 @@ def is_valid_number(self)->bool:
         if self.data_type is not None:
             if self.data_type != KgtkFormat.DataType.NUMBER:
                 return False
+            if not validate:
+                return True
             if self.valid is not None:
                 return self.valid
         
@@ -312,7 +252,7 @@ def is_valid_number(self)->bool:
         return True
         
     
-    def is_valid_quantity(self)->bool:
+    def is_quantity(self, validate: bool=False)->bool:
         """
         Return True if the first character is 0-9,_,-,.
         and it is an enhanced quantity.
@@ -320,6 +260,8 @@ def is_valid_quantity(self)->bool:
         if self.data_type is not None:
             if self.data_type != KgtkFormat.DataType.QUANTITY:
                 return False
+            if not validate:
+                return True
             if self.valid is not None:
                 return self.valid
         
@@ -336,7 +278,10 @@ def is_valid_quantity(self)->bool:
         self.valid = True
         return True
     
-    def is_string(self)->bool:
+    lax_string_re: typing.Pattern = re.compile(r'^".*"$')
+    strict_string_re: typing.Pattern = re.compile(r'^"(?:[^"\\]|\\.)*"$')
+
+    def is_string(self, validate: bool = False)->bool:
         """
         Return True if the first character  is '"'.
 
@@ -345,31 +290,21 @@ def is_string(self)->bool:
         strings are not supported by KGTK File Vormat v2.
 
         """
-        if self.data_type is not None:
-            return self.data_type == KgtkFormat.DataType.STRING
-        
-        if not self.value.startswith('"'):
-            return False
-
-        # We are certain this is a string.  We don't yet know if it is valid.
-        self.data_type = KgtkFormat.DataType.STRING
-        return True
-
-    lax_string_re: typing.Pattern = re.compile(r'^".*"$')
-    strict_string_re: typing.Pattern = re.compile(r'^"(?:[^"\\]|\\.)*"$')
-
-    def is_valid_string(self)->bool:
-        """
-        Strict: return True if the first character  is '"',
-        the last character is '"', and any internal '"' characters are
-        escaped by backslashes.
-        """
-        if not self.is_string():
-            return False
+        if self.data_type is None:
+            if not self.value.startswith('"'):
+                return False
+            # We are certain this is a string.  We don't yet know if it is valid.
+            self.data_type = KgtkFormat.DataType.STRING
+        else:
+            if self.data_type != KgtkFormat.DataType.STRING:
+                return False
 
+        if not validate:
+            return True
         if self.valid is not None:
             return self.valid
-
+        
+        # Validate the string:
         m: typing.Optional[typing.Match]
         if self.options.allow_lax_strings:
             m = KgtkValue.lax_string_re.match(self.value)
@@ -388,32 +323,30 @@ def is_structured_literal(self)->bool:
         """
         return self.value.startswith(("^", "@", "'", "!"))
 
-    def is_symbol(self)->bool:
+    def is_symbol(self, validate: bool = False)->bool:
         """
         Return True if not a number, string, nor structured literal.
         """
         if self.data_type is not None:
             return self.data_type == KgtkFormat.DataType.SYMBOL
 
+        # Is this a symbol?  It is, if it is not something else.
         if self.is_number_or_quantity() or self.is_string() or self.is_structured_literal() or self.is_boolean():
             return False
             
-        # We are certain this is a symbol.  We assume, for now that it is valid.
+        # We are certain this is a symbol.  We assume that it is valid.
         self.data_type = KgtkFormat.DataType.SYMBOL
         self.valid = True
         return True
 
-    def is_valid_symbol(self)->bool:
-        # If it is a suymbol, then it is valid.
-        return self.is_symbol()
-    
-    def is_boolean(self)->bool:
+    def is_boolean(self, validate: bool = False)->bool:
         """
         return True if the value matches one of the special boolean symbols..
         """
         if self.data_type is not None:
             return self.data_type == KgtkFormat.DataType.BOOLEAN
 
+        # Is this a boolean?
         if self.value != KgtkFormat.TRUE_SYMBOL and self.value != KgtkFormat.FALSE_SYMBOL:
             return False
             
@@ -422,35 +355,30 @@ def is_boolean(self)->bool:
         self.valid = True
         return True
 
-    def is_valid_boolean(self)->bool:
-        # If it is a boolean, then it is valid.
-        return self.is_boolean()
-    
-    def is_language_qualified_string(self)->bool:
-        """
-        Return True if the first character is '
-        """
-        if self.data_type is not None:
-            return self.data_type == KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING
-
-        if not self.value.startswith("'"):
-            return False
-
-        self.data_type = KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING
-        return True
-
     # Support two or three character language codes.  Suports hyphenated codes
     # with country codes or dialect names after a language code.
     lax_language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'.*')@(?P<lang>[a-zA-Z]{2,3}(?:-[a-zA-Z]+)?)$")
     strict_language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'(?:[^'\\]|\\.)*')@(?P<lang>[a-zA-Z]{2,3}(?:-[a-zA-Z]+)?)$")
 
-    def is_valid_language_qualified_string(self)->bool:
+    def is_language_qualified_string(self, validate: bool=False)->bool:
         """
         Return True if the value looks like a language-qualified string.
         """
-        if not self.is_language_qualified_string():
-            return False
+        if self.data_type is None:
+            if not self.value.startswith("'"):
+                return False
+            # We are certain that this is a language qualified string, although we haven't checked validity.
+            self.data_type = KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING
+        else:
+            if self.data_type != KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING:
+                return False
 
+        if not validate:
+            return True
+        if self.valid is not None:
+            return self.valid
+        
+        # Validate the language qualified string.
         # print("checking %s" % self.value)
         m: typing.Optional[typing.Match]
         if self.options.allow_lax_lq_strings:
@@ -473,33 +401,32 @@ def is_valid_language_qualified_string(self)->bool:
         self.valid = True
         return True
 
-    def is_location_coordinates(self)->bool:
-        """
-        Return True if the first character is @
-        """
-        if self.data_type is not None:
-            return self.data_type == KgtkFormat.DataType.LOCATION_COORDINATES
-
-        if not self.value.startswith("@"):
-            return False
-
-        self.data_type = KgtkFormat.DataType.LOCATION_COORDINATES
-        return True
-
     #location_coordinates_re: typing.Pattern = re.compile(r"^@(?P<lat>[-+]?\d{3}\.\d{5})/(?P<lon>[-+]?\d{3}\.\d{5})$")
     degrees_pat: str = r'(?:[-+]?(?:\d+(?:\.\d*)?)|(?:\.\d+))'
     location_coordinates_re: typing.Pattern = re.compile(r'^@(?P<lat>{degrees})/(?P<lon>{degrees})$'.format(degrees=degrees_pat))
 
-    def is_valid_location_coordinates(self)->bool:
+    def is_location_coordinates(self, validate: bool=False)->bool:
         """
         Return False if this value is a list and idx is None.
         Otherwise, return True if the value looks like valid location coordinates.
 
         @043.26193/010.92708
         """
-        if not self.is_location_coordinates():
-            return False
+        if self.data_type is None:
+            if not self.value.startswith("@"):
+                return False
+            # We are certain that this is location coordinates, although we haven't checked validity.
+            self.data_type = KgtkFormat.DataType.LOCATION_COORDINATES
+        else:
+            if self.data_type != KgtkFormat.DataType.LOCATION_COORDINATES:
+                return False
 
+        if not validate:
+            return True
+        if self.valid is not None:
+            return self.valid
+        
+        # Validate the location coordinates:
         m: typing.Optional[typing.Match] = KgtkValue.location_coordinates_re.match(self.value)
         if m is None:
             return False
@@ -526,20 +453,6 @@ def is_valid_location_coordinates(self)->bool:
         self.valid = True
         return True
 
-    def is_date_and_times(self)->bool:
-        """
-        Return True if the first character is ^
-        """
-        if self.data_type is not None:
-            return self.data_type == KgtkFormat.DataType.DATE_AND_TIMES
-
-        if not self.value.startswith("^"):
-            return False
-
-        # This is a date and times value.  We do not yet know if it si valid.
-        self.data_type = KgtkFormat.DataType.DATE_AND_TIMES
-        return True
-
     # https://en.wikipedia.org/wiki/ISO_8601
     #
     # The "lax" patterns allow month 00 and day 00, which are excluded by ISO 8601.
@@ -583,7 +496,7 @@ def is_date_and_times(self)->bool:
                                                                                       precision=precision_pat)
     lax_date_and_times_re: typing.Pattern = re.compile(r'^{date_and_times}$'.format(date_and_times=lax_date_and_times_pat))
                                                                         
-    def is_valid_date_and_times(self)->bool:
+    def is_date_and_times(self, validate: bool=False)->bool:
         """
         Return True if the value looks like valid date and times
         literal based on ISO-8601.
@@ -631,9 +544,21 @@ def is_valid_date_and_times(self)->bool:
 
         TODO: validate the calendar date, eg fail if 31-Apr-2020.
         """
-        if not self.is_date_and_times():
-            return False
+        if self.data_type is None:
+            if not self.value.startswith("^"):
+                return False
+            # We are certain that this is location coordinates, although we haven't checked validity.
+            self.data_type = KgtkFormat.DataType.DATE_AND_TIMES
+        else:
+            if self.data_type != KgtkFormat.DataType.DATE_AND_TIMES:
+                return False
 
+        if not validate:
+            return True
+        if self.valid is not None:
+            return self.valid
+        
+        # Validate the date and times:
         m: typing.Optional[typing.Match] = KgtkValue.lax_date_and_times_re.match(self.value)
         if m is None:
             return False
@@ -673,35 +598,99 @@ def is_valid_date_and_times(self)->bool:
         self.valid = True
         return True
 
-    def is_extension(self)->bool:
+    def is_extension(self, validate=False)->bool:
         """
         Return True if the first character is !
         """
         if self.data_type is not None:
-            return self.data_type == KgtkFormat.DataType.EXTENSION
+            if not self.value.startswith("!"):
+                return False
+            # This is an extension, but for now, assume that all extensions are invalid.
+            self.data_type = KgtkFormat.DataType.EXTENSION
+            self.valid = False
+        else:
+            if self.data_type != KgtkFormat.DataType.EXTENSION:
+                return False
 
-        if not self.value.startswith("!"):
-            return False
+        if not validate:
+            return True
+        if self.valid is not None:
+            return self.valid
+        raise ValueError("Inconsistent extension state.")
 
-        # This is an extension, but for now, assume that all extensions are invalid.
-        self.data_type = KgtkFormat.DataType.EXTENSION
-        self.valid = False
-        return True
+    def classify(self)->KgtkFormat.DataType:
+        if self.data_type is not None:
+            return self.data_type
+
+        # Must test for list before anything else (except empty)!
+        if self.is_empty() or self.is_list():
+            pass
+
+        elif self.is_string() or self.is_language_qualified_string():
+            pass
+
+        elif self.is_number_or_quantity():
+            # To determine whether this is a number or a quantity, we have
+            # to validate one of them.
+            if not self.is_number():
+                # If it isn't a valid number, assume it's a quantity.
+                self.data_type = KgtkFormat.DataType.QUANTITY
+
+        elif self.is_location_coordinates():
+            pass
+
+        elif self.is_date_and_times():
+            pass
+
+        elif self.is_extension():
+            pass
+
+        elif self.is_boolean() or self.is_symbol():
+            pass
+
+        if self.data_type is not None:
+            return self.data_type
 
-    def is_valid_extension(self)->bool:
-        # For now, all extensions are invalid.
-        return False
+        # Shouldn't get here.
+        raise ValueError("Unknown data type for '%s'" % self.value)
+
+    def validate(self)->bool:
+        dt: KgtkFormat.DataType = self.classify()
+        if dt == KgtkFormat.DataType.EMPTY:
+            return self.is_empty(validate=True)
+        elif dt == KgtkFormat.DataType.LIST:
+            return self.is_list(validate=True)
+        elif dt == KgtkFormat.DataType.NUMBER:
+            return self.is_number(validate=True)
+        elif dt == KgtkFormat.DataType.QUANTITY:
+            return self.is_quantity(validate=True)
+        elif dt == KgtkFormat.DataType.STRING:
+            return self.is_string(validate=True)
+        elif dt == KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING:
+            return self.is_language_qualified_string(validate=True)
+        elif dt == KgtkFormat.DataType.LOCATION_COORDINATES:
+            return self.is_location_coordinates(validate=True)
+        elif dt == KgtkFormat.DataType.DATE_AND_TIMES:
+            return self.is_date_and_times(validate=True)
+        elif dt == KgtkFormat.DataType.EXTENSION:
+            return self.is_extension(validate=True)
+        elif dt == KgtkFormat.DataType.BOOLEAN:
+            return self.is_boolean(validate=True)
+        elif dt == KgtkFormat.DataType.SYMBOL:
+            return self.is_symbol(validate=True)
+        else:
+            raise ValueError("Unrecognized DataType.")
 
+        
     def describe(self)->str:
         """
         Return a string that describes the value.
         """
-        if self.is_list():
-            result: str
-            if self.is_valid_list:
-                result = "List ("
-            else:
-                result = "Invalid List ("
+        dt: KgtkFormat.DataType = self.classify()
+        if dt == KgtkFormat.DataType.EMPTY:
+            return "Empty" if self.is_empty(validate=True) else "Invalid Empty"
+        elif dt == KgtkFormat.DataType.LIST:
+            result: str = "List (" if self.is_list(validate=True) else "Invalid List ("
             kv: KgtkValue
             first: bool = True
             for kv in self.get_list():
@@ -711,42 +700,24 @@ def describe(self)->str:
                     result += KgtkFormat.LIST_SEPARATOR
                 result += kv.describe()
             return result + ")"
-
-        if self.is_empty():
-            return "Empty"
-        elif self.is_string():
-            if self.is_valid_string():
-                return "String"
-            else:
-                return "Invalid String"
-        elif self.is_number_or_quantity():
-            if self.is_valid_number():
-                return "Number"
-            elif self.is_valid_quantity():
-                return "Quantity"
-            else:
-                return "Invalid Number or Quantity"
-        elif self.is_language_qualified_string():
-            if self.is_valid_language_qualified_string():
-                return "Language Qualified String"
-            else:
-                return "Invalid Language Qualified String"
-        elif self.is_location_coordinates():
-            if self.is_valid_location_coordinates():
-                return "Location Coordinates"
-            else:
-                return "Invalid Location Coordinates"
-        elif self.is_date_and_times():
-            if self.is_valid_date_and_times():
-                return "Date and Times"
-            else:
-                return "Invalid Date and Times"
-        elif self.is_extension():
-            return "Extension (unvalidated)"
-        elif self.is_boolean():
-            return "Boolean Symbol"
-        elif self.is_symbol():
-            return "Symbol"
+        elif dt == KgtkFormat.DataType.NUMBER:
+            return "Number" if self.is_number(validate=True) else "Invali Number"
+        elif dt == KgtkFormat.DataType.QUANTITY:
+            return "Quantity" if self.is_quantity(validate=True) else "Invalid Quantity"
+        elif dt == KgtkFormat.DataType.STRING:
+            return "String" if self.is_string(validate=True) else "Invalid String"
+        elif dt == KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING:
+            return "Language Qualified String" if self.is_language_qualified_string(validate=True) else "Invalid Language Qualified String"
+        elif dt == KgtkFormat.DataType.LOCATION_COORDINATES:
+            return "Location Coordinates" if self.is_location_coordinates(validate=True) else "Invalid Location Coordinates"
+        elif dt == KgtkFormat.DataType.DATE_AND_TIMES:
+            return "Date and Times" if self.is_date_and_times(validate=True) else "Invalid Date and Times"
+        elif dt == KgtkFormat.DataType.EXTENSION:
+            return "Extension" if self.is_extension(validate=True) else "Invalid Extension"
+        elif dt == KgtkFormat.DataType.BOOLEAN:
+            return "Boolean" if self.is_boolean(validate=True) else "Invalid Boolean"
+        elif dt == KgtkFormat.DataType.SYMBOL:
+            return "Symbol" if self.is_symbol(validate=True) else "Invalid Symbol"
         else:
             return "Unknown"
     

From d7d2f8f410f7a8d72b06d5ec605cbcc88f3ff2d4 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 5 May 2020 16:27:59 -0700
Subject: [PATCH 076/278] Additional documentation.  Improve the use of the
 cache.

---
 kgtk/join/kgtkvalue.py | 47 ++++++++++++++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 11 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 9d2875309..a5cb77d44 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -17,19 +17,25 @@ class KgtkValue(KgtkFormat):
     value: str = attr.ib(validator=attr.validators.instance_of(str))
     options: KgtkValueOptions = attr.ib(validator=attr.validators.instance_of(KgtkValueOptions), default=DEFAULT_KGTK_VALUE_OPTIONS)
 
+    # Cache some properties of the value that would be expensive to
+    # continuously recompute. The class is not frozen because we have these
+    # cache members.
     data_type: typing.Optional[KgtkFormat.DataType] = None
     valid: typing.Optional[bool] = None
 
-    # If this is a list, prepare a KgtkValue object for each item of the list.
+    # If this is a list, cache a KgtkValue object for each item of the list.
     list_items: typing.Optional[typing.List['KgtkValue']] = None
 
     def is_valid(self)->bool:
+        # Is this a valid whatever it is?
         if self.valid is not None:
             return self.valid
         else:
             return self.validate()
 
     def is_empty(self, validate: bool = False)->bool:
+        # Is this an empty item?  If so, assume it is valid and ignore the
+        # validate parameter.
         if self.data_type is not None:
             return self.data_type == KgtkFormat.DataType.EMPTY
 
@@ -43,7 +49,9 @@ def is_empty(self, validate: bool = False)->bool:
 
     split_list_re: typing.Pattern = re.compile(r"(?<!\\)" + "\\" + KgtkFormat.LIST_SEPARATOR)
 
-    def get_list(self)->typing.List['KgtkValue']:
+    def get_list_items(self)->typing.List['KgtkValue']:
+        # If this is a KGTK List, return a list of KGTK values representing
+        # the items in the list.  If this is not a KGTK List, return an empty list.
         if self.list_items is not None:
             return self.list_items
 
@@ -60,7 +68,7 @@ def get_list(self)->typing.List['KgtkValue']:
     def is_list(self, validate: bool = False)->bool:
         # Must test for list before anything else (except empty)!
         if self.data_type is None:
-            if len(self.get_list()) == 0:
+            if len(self.get_list_items()) == 0:
                 return False
             # We are certain that this is a list, although we haven't checked validity.
             self.data_type = KgtkFormat.DataType.LIST
@@ -75,7 +83,7 @@ def is_list(self, validate: bool = False)->bool:
         
         # Validate the list.
         item: 'KgtkValue'
-        for item in self.get_list():
+        for item in self.get_list_items():
             if not item.is_valid():
                 # The list is invalid if any item in the list is invalid.
                 self.valid = False
@@ -209,7 +217,6 @@ def is_number_or_quantity(self, validate: bool=False)->bool:
         # this problem with more thought.
         m: typing.Optional[typing.Match] = KgtkValue.number_or_quantity_re.match(self.value)
         return m is not None
-        
     
     def is_number(self, validate: bool=False)->bool:
         """
@@ -325,7 +332,9 @@ def is_structured_literal(self)->bool:
 
     def is_symbol(self, validate: bool = False)->bool:
         """
-        Return True if not a number, string, nor structured literal.
+        Return True if not a number, string, nor structured literal, nor boolean.
+
+        The validate parameter is ignored.
         """
         if self.data_type is not None:
             return self.data_type == KgtkFormat.DataType.SYMBOL
@@ -341,7 +350,9 @@ def is_symbol(self, validate: bool = False)->bool:
 
     def is_boolean(self, validate: bool = False)->bool:
         """
-        return True if the value matches one of the special boolean symbols..
+        Return True if the value matches one of the special boolean symbols.
+
+        The validate parameter is ignored.
         """
         if self.data_type is not None:
             return self.data_type == KgtkFormat.DataType.BOOLEAN
@@ -599,8 +610,11 @@ def is_date_and_times(self, validate: bool=False)->bool:
         return True
 
     def is_extension(self, validate=False)->bool:
-        """
-        Return True if the first character is !
+        """Return True if the first character is !
+
+        Although we refer to the validate parameter in the code below, we
+        force self.valid to False.
+
         """
         if self.data_type is not None:
             if not self.value.startswith("!"):
@@ -619,7 +633,9 @@ def is_extension(self, validate=False)->bool:
         raise ValueError("Inconsistent extension state.")
 
     def classify(self)->KgtkFormat.DataType:
+        # Classify this KgtkValue into a KgtkDataType.
         if self.data_type is not None:
+            # Return the cached value.
             return self.data_type
 
         # Must test for list before anything else (except empty)!
@@ -655,7 +671,16 @@ def classify(self)->KgtkFormat.DataType:
         raise ValueError("Unknown data type for '%s'" % self.value)
 
     def validate(self)->bool:
+        # Validate this KgtkValue.
+
+        # Start by classifying the KgtkValue.
         dt: KgtkFormat.DataType = self.classify()
+
+        # If the valid flag has already been cached, return that.
+        if self.valid is not None:
+            return self.valid
+        
+        # Validate the value.
         if dt == KgtkFormat.DataType.EMPTY:
             return self.is_empty(validate=True)
         elif dt == KgtkFormat.DataType.LIST:
@@ -684,7 +709,7 @@ def validate(self)->bool:
         
     def describe(self)->str:
         """
-        Return a string that describes the value.
+        Return a string that describes this KGTK Value.
         """
         dt: KgtkFormat.DataType = self.classify()
         if dt == KgtkFormat.DataType.EMPTY:
@@ -693,7 +718,7 @@ def describe(self)->str:
             result: str = "List (" if self.is_list(validate=True) else "Invalid List ("
             kv: KgtkValue
             first: bool = True
-            for kv in self.get_list():
+            for kv in self.get_list_items():
                 if first:
                     first = not first
                 else:

From 6177f4987c42c35d126378bcc3e72edbfbc5287d Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 5 May 2020 16:48:30 -0700
Subject: [PATCH 077/278] Extract language-qualified string components.

---
 kgtk/join/kgtkvalue.py | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index a5cb77d44..b80bd5ad3 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -26,6 +26,11 @@ class KgtkValue(KgtkFormat):
     # If this is a list, cache a KgtkValue object for each item of the list.
     list_items: typing.Optional[typing.List['KgtkValue']] = None
 
+    # Offer the components of a language-qualified string:
+    string: typing.Optional[str] = None
+    lang: typing.Optional[str] = None
+    suffix: typing.Optional[str] = None # Includes the leading dash.
+
     def is_valid(self)->bool:
         # Is this a valid whatever it is?
         if self.valid is not None:
@@ -367,14 +372,19 @@ def is_boolean(self, validate: bool = False)->bool:
         return True
 
     # Support two or three character language codes.  Suports hyphenated codes
-    # with country codes or dialect names after a language code.
-    lax_language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'.*')@(?P<lang>[a-zA-Z]{2,3}(?:-[a-zA-Z]+)?)$")
-    strict_language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'(?:[^'\\]|\\.)*')@(?P<lang>[a-zA-Z]{2,3}(?:-[a-zA-Z]+)?)$")
+    # with a country code or dialect namesuffix after the language code.
+    lax_language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'.*')@(?P<lang>[a-zA-Z]{2,3}(?P<suffix>-[a-zA-Z]+)?)$")
+    strict_language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'(?:[^'\\]|\\.)*')@(?P<lang_suffix>(?P<lang>[a-zA-Z]{2,3})(?P<suffix>-[a-zA-Z]+)?)$")
 
     def is_language_qualified_string(self, validate: bool=False)->bool:
         """
         Return True if the value looks like a language-qualified string.
         """
+        # Clear the cached components lf the lanjguage qualified string:
+        self.string = None
+        self.lang = None
+        self.suffix = None
+
         if self.data_type is None:
             if not self.value.startswith("'"):
                 return False
@@ -400,11 +410,17 @@ def is_language_qualified_string(self, validate: bool=False)->bool:
             # print("match failed for %s" % self.value)
             return False
 
-        # Validate the language code:
-        lang: str = m.group("lang").lower()
-        # print("lang: %s" % lang)
+        # Extract the string, lang, and optional suffix components:
+        self.string = m.group("string")
+        self.lang = m.group("lang")
+        self.suffix = m.group("suffix")
 
-        if not LanguageValidator.validate(lang, options=self.options):
+        # Extract the combined lang and suffix for use by the LanguageValidator.
+        lang_suffix: str = m.group("lang_suffix")
+        # print("lang: %s" % lang_suffix)
+
+        # Validate the language code:
+        if not LanguageValidator.validate(lang_suffix.lower(), options=self.options):
             # print("language validation failed for %s" % self.value)
             return False
 
@@ -732,7 +748,7 @@ def describe(self)->str:
         elif dt == KgtkFormat.DataType.STRING:
             return "String" if self.is_string(validate=True) else "Invalid String"
         elif dt == KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING:
-            return "Language Qualified String" if self.is_language_qualified_string(validate=True) else "Invalid Language Qualified String"
+            return "Language Qualified String (%s)" % self.lang if self.is_language_qualified_string(validate=True) else "Invalid Language Qualified String"
         elif dt == KgtkFormat.DataType.LOCATION_COORDINATES:
             return "Location Coordinates" if self.is_location_coordinates(validate=True) else "Invalid Location Coordinates"
         elif dt == KgtkFormat.DataType.DATE_AND_TIMES:

From 640af13475393019fd708f5fc73f7d022beffef7 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 5 May 2020 18:07:59 -0700
Subject: [PATCH 078/278] Offer selected components of various KGTK data types.

---
 kgtk/join/kgtkvalue.py | 242 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 201 insertions(+), 41 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index b80bd5ad3..574f68da0 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -26,11 +26,32 @@ class KgtkValue(KgtkFormat):
     # If this is a list, cache a KgtkValue object for each item of the list.
     list_items: typing.Optional[typing.List['KgtkValue']] = None
 
-    # Offer the components of a language-qualified string:
-    string: typing.Optional[str] = None
+    # Offer the components of a string or language-qualified string:
+    contents: typing.Optional[str] = None # String contents without the enclosing quotes
     lang: typing.Optional[str] = None
     suffix: typing.Optional[str] = None # Includes the leading dash.
 
+    # Offer the components of a number or quantity:
+    number: typing.Optional[str] = None # Note: not converted to int or float
+    low_tolerance: typing.Optional[str] = None # Note: not converted to int or float
+    high_tolerance: typing.Optional[str] = None # Note: not converted to int or float
+    si_units: typing.Optional[str] = None
+    wikidata_node: typing.Optional[str] = None
+
+    # Offer the components of a location coordinates:
+    latstr: typing.Optional[str] = None
+    lat: typing.Optional[float] = None
+    lonstr: typing.Optional[str] = None
+    lon: typing.Optional[float] = None
+
+    # Offer the components of a date and times:
+    yearstr: typing.Optional[str] = None # Note: not converted to int
+    monthstr: typing.Optional[str] = None # Note: not converted to int
+    daystr: typing.Optional[str] = None # Note: not converted to int
+    hourstr: typing.Optional[str] = None # Note: not converted to int or float
+    minutesstr: typing.Optional[str] = None # Note: not converted to int or float
+    secondsstr: typing.Optional[str] = None # Note: not converted to int or float
+
     def is_valid(self)->bool:
         # Is this a valid whatever it is?
         if self.valid is not None:
@@ -153,8 +174,11 @@ def _is_number_or_quantity(self)->bool:
                                                                                               floatnumber=floatnumber_pat,
                                                                                               imagnumber=imagnumber_pat)
 
+    # Numeric literals with componet labeling:
+    number_pat: str = r'(?P<number>{numeric})'.format(numeric=numeric_pat)
+
     # Tolerances
-    tolerance_pat: str = r'(?:\[{numeric},{numeric}\])'.format(numeric=numeric_pat)
+    tolerance_pat: str = r'(?:\[(?P<low_tolerance>{numeric}),(?P<high_tolerance>{numeric})\])'.format(numeric=numeric_pat)
 
     # SI units taken from:
     # http://www.csun.edu/~vceed002/ref/measurement/units/units.pdf
@@ -163,12 +187,12 @@ def _is_number_or_quantity(self)->bool:
     si_unit_pat: str = r'(?:m|kg|s|C|K|mol|cd|F|M|A|N|ohms|V|J|Hz|lx|H|Wb|V|W|Pa)'
     si_power_pat: str = r'(?:-1|2|3)' # Might need more.
     si_combiner_pat: str = r'[./]'
-    si_pat: str = r'(?:{si_unit}{si_power}?(?:{si_combiner}{si_unit}{si_power}?)*)'.format(si_unit=si_unit_pat,
+    si_pat: str = r'(?P<si_units>{si_unit}{si_power}?(?:{si_combiner}{si_unit}{si_power}?)*)'.format(si_unit=si_unit_pat,
                                                                                            si_combiner=si_combiner_pat,
                                                                                            si_power=si_power_pat)
     # Wikidata nodes (for units):
     nonzero_digit_pat: str = r'[1-9]'
-    wikidata_node_pat: str = r'(?:Q{nonzero_digit}{digit}*)'.format(nonzero_digit=nonzero_digit_pat,
+    wikidata_node_pat: str = r'(?P<wikidata_node>Q{nonzero_digit}{digit}*)'.format(nonzero_digit=nonzero_digit_pat,
                                                                     digit=digit_pat)
 
     units_pat: str = r'(?:{si}|{wikidata_node})'.format(si=si_pat,
@@ -176,21 +200,15 @@ def _is_number_or_quantity(self)->bool:
     
 
     # This definition matches numbers or quantities.
-    number_or_quantity_pat: str = r'{numeric}{tolerance}?{units}?'.format(numeric=numeric_pat,
+    number_or_quantity_pat: str = r'{numeric}{tolerance}?{units}?'.format(numeric=number_pat,
                                                                           tolerance=tolerance_pat,
                                                                           units=units_pat)
-    # This definition for quantity excludes plain numbers.
-    quantity_pat: str = r'{numeric}(?:(?:{tolerance}{units}?)|{units})'.format(numeric=numeric_pat,
-                                                                               tolerance=tolerance_pat,
-                                                                               units=units_pat)
+
     # This matches numbers or quantities.
     number_or_quantity_re: typing.Pattern = re.compile(r'^' + number_or_quantity_pat + r'$')
 
     # This matches numbers but not quantities.
-    number_re: typing.Pattern = re.compile(r'^' + numeric_pat + r'$')
-
-    # This matches quantities excluding numbers.
-    quantity_re: typing.Pattern = re.compile(r'^' + quantity_pat + r'$')
+    number_re: typing.Pattern = re.compile(r'^' + number_pat + r'$')
 
     def is_number_or_quantity(self, validate: bool=False)->bool:
         """
@@ -201,16 +219,25 @@ def is_number_or_quantity(self, validate: bool=False)->bool:
         # If we know the specific data type, delegate the test to that data type.
         if self.data_type is not None:
             if self.data_type == KgtkFormat.DataType.NUMBER:
-                if not validate:
-                    return True
                 return self.is_number(validate=validate)
             elif self.data_type == KgtkFormat.DataType.QUANTITY:
-                if not validate:
-                    return True
                 return self.is_quantity(validate=validate)
             else:
+                # Clear the number or quantity components:
+                self.number = None
+                self.low_tolerance = None
+                self.high_tolerance = None
+                self.si_units = None
+                self.wikidata_node = None
                 return False # Not a number or quantity.
 
+        # Clear the number or quantity components:
+        self.number = None
+        self.low_tolerance = None
+        self.high_tolerance = None
+        self.si_units = None
+        self.wikidata_node = None
+
         if not self._is_number_or_quantity():
             return False
 
@@ -221,7 +248,25 @@ def is_number_or_quantity(self, validate: bool=False)->bool:
         # if we later determined the exact data type.  We could work around
         # this problem with more thought.
         m: typing.Optional[typing.Match] = KgtkValue.number_or_quantity_re.match(self.value)
-        return m is not None
+        if m is None:
+            return False
+
+        # Extract the number or quantity components:
+        self.number = m.group("number")
+        self.low_tolerance = m.group("low_tolerance")
+        self.high_tolerance = m.group("high_tolerance")
+        self.si_units = m.group("si_units")
+        self.wikidata_node = m.group("wikidata_node")
+
+        if self.low_tolerance is not None or self.high_tolerance is not None or self.si_units is not None or self.wikidata_node is not None:
+            # We can be certain that this is a quantity.
+            self.data_type = KgtkFormat.DataType.QUANTITY
+        else:
+            # We can be certain that this is a number
+            self.data_type = KgtkFormat.DataType.NUMBER
+
+        self.valid = True
+        return True
     
     def is_number(self, validate: bool=False)->bool:
         """
@@ -244,12 +289,18 @@ def is_number(self, validate: bool=False)->bool:
         """
         if self.data_type is not None:
             if self.data_type != KgtkFormat.DataType.NUMBER:
+                # Clear the number components:
+                self.number = None
                 return False
+
             if not validate:
                 return True
             if self.valid is not None:
                 return self.valid
         
+        # Clear the number components:
+        self.number = None
+
         if not self._is_number_or_quantity():
             return False
         # We don't know yet if this is a number.  It could be a quantity.
@@ -258,6 +309,9 @@ def is_number(self, validate: bool=False)->bool:
         if m is None:
             return False
 
+        # Extract the number components:
+        self.number = m.group("number")
+
         # Now we can be certain that this is a number.
         self.data_type = KgtkFormat.DataType.NUMBER
         self.valid = True
@@ -271,27 +325,54 @@ def is_quantity(self, validate: bool=False)->bool:
         """
         if self.data_type is not None:
             if self.data_type != KgtkFormat.DataType.QUANTITY:
+                # Clear the quantity components:
+                self.number = None
+                self.low_tolerance = None
+                self.high_tolerance = None
+                self.si_units = None
+                self.wikidata_node = None
                 return False
+            
             if not validate:
                 return True
             if self.valid is not None:
                 return self.valid
         
+        # Clear the quantity components:
+        self.number = None
+        self.low_tolerance = None
+        self.high_tolerance = None
+        self.si_units = None
+        self.wikidata_node = None
+
         if not self._is_number_or_quantity():
             return False
         # We don't know yet if this is a quantity.  It could be a number.
 
-        m: typing.Optional[typing.Match] = KgtkValue.quantity_re.match(self.value)
+        m: typing.Optional[typing.Match] = KgtkValue.number_or_quantity_re.match(self.value)
         if m is None:
             return False
 
+        # Extract the quantity components:
+        self.number = m.group("number")
+        self.low_tolerance = m.group("low_tolerance")
+        self.high_tolerance = m.group("high_tolerance")
+        self.si_units = m.group("si_units")
+        self.wikidata_node = m.group("wikidata_node")
+
+        if self.low_tolerance is None and self.high_tolerance is None and self.si_units is None and self.wikidata_node is None:
+            # This is a number, not a quantity
+            self.data_type = KgtkFormat.DataType.NUMBER
+            self.valid = True
+            return False
+
         # Now we can be certain that this is a quantity.
         self.data_type = KgtkFormat.DataType.QUANTITY
         self.valid = True
         return True
     
-    lax_string_re: typing.Pattern = re.compile(r'^".*"$')
-    strict_string_re: typing.Pattern = re.compile(r'^"(?:[^"\\]|\\.)*"$')
+    lax_string_re: typing.Pattern = re.compile(r'^"(?P<contents>.*)"$')
+    strict_string_re: typing.Pattern = re.compile(r'^"(?P<contents>(?:[^"\\]|\\.)*"$)')
 
     def is_string(self, validate: bool = False)->bool:
         """
@@ -304,11 +385,15 @@ def is_string(self, validate: bool = False)->bool:
         """
         if self.data_type is None:
             if not self.value.startswith('"'):
+                # Clear the string components:
+                self.contents = None
                 return False
             # We are certain this is a string.  We don't yet know if it is valid.
             self.data_type = KgtkFormat.DataType.STRING
         else:
             if self.data_type != KgtkFormat.DataType.STRING:
+                # Clear the string components:
+                self.contents = None
                 return False
 
         if not validate:
@@ -316,6 +401,9 @@ def is_string(self, validate: bool = False)->bool:
         if self.valid is not None:
             return self.valid
         
+        # Clear the string components:
+        self.contents = None
+        
         # Validate the string:
         m: typing.Optional[typing.Match]
         if self.options.allow_lax_strings:
@@ -325,6 +413,9 @@ def is_string(self, validate: bool = False)->bool:
         if m is None:
             return False
 
+        # Extract the contents components:
+        self.contents = m.group("contents")
+
         # We are certain that this is a valid string.
         self.valid = True
         return True
@@ -373,25 +464,28 @@ def is_boolean(self, validate: bool = False)->bool:
 
     # Support two or three character language codes.  Suports hyphenated codes
     # with a country code or dialect namesuffix after the language code.
-    lax_language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'.*')@(?P<lang>[a-zA-Z]{2,3}(?P<suffix>-[a-zA-Z]+)?)$")
-    strict_language_qualified_string_re: typing.Pattern = re.compile(r"^(?P<string>'(?:[^'\\]|\\.)*')@(?P<lang_suffix>(?P<lang>[a-zA-Z]{2,3})(?P<suffix>-[a-zA-Z]+)?)$")
+    lax_language_qualified_string_re: typing.Pattern = re.compile(r"^'(?P<contents>.*)'@(?P<lang>[a-zA-Z]{2,3}(?P<suffix>-[a-zA-Z]+)?)$")
+    strict_language_qualified_string_re: typing.Pattern = re.compile(r"^'(?P<contents>(?:[^'\\]|\\.)*)'@(?P<lang_suffix>(?P<lang>[a-zA-Z]{2,3})(?P<suffix>-[a-zA-Z]+)?)$")
 
     def is_language_qualified_string(self, validate: bool=False)->bool:
         """
         Return True if the value looks like a language-qualified string.
         """
-        # Clear the cached components lf the lanjguage qualified string:
-        self.string = None
-        self.lang = None
-        self.suffix = None
-
         if self.data_type is None:
             if not self.value.startswith("'"):
+                # Clear the cached components of the language qualified string:
+                self.contents = None
+                self.lang = None
+                self.suffix = None
                 return False
             # We are certain that this is a language qualified string, although we haven't checked validity.
             self.data_type = KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING
         else:
             if self.data_type != KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING:
+                # Clear the cached components of the language qualified string:
+                self.contents = None
+                self.lang = None
+                self.suffix = None
                 return False
 
         if not validate:
@@ -399,6 +493,11 @@ def is_language_qualified_string(self, validate: bool=False)->bool:
         if self.valid is not None:
             return self.valid
         
+        # Clear the cached components of the language qualified string:
+        self.contents = None
+        self.lang = None
+        self.suffix = None
+
         # Validate the language qualified string.
         # print("checking %s" % self.value)
         m: typing.Optional[typing.Match]
@@ -410,8 +509,8 @@ def is_language_qualified_string(self, validate: bool=False)->bool:
             # print("match failed for %s" % self.value)
             return False
 
-        # Extract the string, lang, and optional suffix components:
-        self.string = m.group("string")
+        # Extract the contents, lang, and optional suffix components:
+        self.contents = m.group("contents")
         self.lang = m.group("lang")
         self.suffix = m.group("suffix")
 
@@ -441,11 +540,19 @@ def is_location_coordinates(self, validate: bool=False)->bool:
         """
         if self.data_type is None:
             if not self.value.startswith("@"):
+                self.latstr = None
+                self.lat = None
+                self.lonstr = None
+                self.lon = None
                 return False
             # We are certain that this is location coordinates, although we haven't checked validity.
             self.data_type = KgtkFormat.DataType.LOCATION_COORDINATES
         else:
             if self.data_type != KgtkFormat.DataType.LOCATION_COORDINATES:
+                self.latstr = None
+                self.lat = None
+                self.lonstr = None
+                self.lon = None
                 return False
 
         if not validate:
@@ -453,25 +560,34 @@ def is_location_coordinates(self, validate: bool=False)->bool:
         if self.valid is not None:
             return self.valid
         
+        # Clear the lat/lon components:
+        self.latstr = None
+        self.lat = None
+        self.lonstr = None
+        self.lon = None
+
         # Validate the location coordinates:
         m: typing.Optional[typing.Match] = KgtkValue.location_coordinates_re.match(self.value)
         if m is None:
             return False
 
-        # Latitude normally runs from -90 to +90:
         latstr: str = m.group("lat")
+        self.latstr = latstr
+        lonstr: str = m.group("lon")
+        self.lonstr = lonstr
+
+        # Latitude normally runs from -90 to +90:
         try:
-            lat: float = float(latstr)
-            if  lat < self.options.minimum_valid_lat or lat > self.options.maximum_valid_lat:
+            self.lat = float(latstr)
+            if  self.lat < self.options.minimum_valid_lat or self.lat > self.options.maximum_valid_lat:
                 return False
         except ValueError:
             return False
 
         # Longitude normally runs from -180 to +180:
-        lonstr: str = m.group("lon")
         try:
-            lon: float = float(lonstr)
-            if lon < self.options.minimum_valid_lon or lon > self.options.maximum_valid_lon:
+            self.lon = float(lonstr)
+            if self.lon < self.options.minimum_valid_lon or self.lon > self.options.maximum_valid_lon:
                 return False
         except ValueError:
             return False
@@ -500,7 +616,7 @@ def is_location_coordinates(self, validate: bool=False)->bool:
     # hour-minutes-seconds
     hour_pat: str = r'(?P<hour>2[0-3]|[01][0-9])'
     minutes_pat: str = r'(?P<minutes>[0-5][0-9])'
-    seconds_pat: str = r'(?P<second>[0-5][0-9])'
+    seconds_pat: str = r'(?P<seconds>[0-5][0-9])'
 
     # NOTE: It might be the case that the ":" before the minutes in the time zone pattern
     # should be conditioned upon the hyphen indicator.  The Wikipedia article doesn't
@@ -573,11 +689,25 @@ def is_date_and_times(self, validate: bool=False)->bool:
         """
         if self.data_type is None:
             if not self.value.startswith("^"):
+                # Clear the cached date and times components:
+                self.yearstr = None
+                self.monthstr = None
+                self.daystr = None
+                self.hourstr = None
+                self.minutesstr = None
+                self.secondsstr = None
                 return False
             # We are certain that this is location coordinates, although we haven't checked validity.
             self.data_type = KgtkFormat.DataType.DATE_AND_TIMES
         else:
             if self.data_type != KgtkFormat.DataType.DATE_AND_TIMES:
+                # Clear the cached date and times components:
+                self.yearstr = None
+                self.monthstr = None
+                self.daystr = None
+                self.hourstr = None
+                self.minutesstr = None
+                self.secondsstr = None
                 return False
 
         if not validate:
@@ -585,13 +715,33 @@ def is_date_and_times(self, validate: bool=False)->bool:
         if self.valid is not None:
             return self.valid
         
+        # Clear the cached date and times components:
+        self.yearstr = None
+        self.monthstr = None
+        self.daystr = None
+        self.hourstr = None
+        self.minutesstr = None
+        self.secondsstr = None
+
         # Validate the date and times:
         m: typing.Optional[typing.Match] = KgtkValue.lax_date_and_times_re.match(self.value)
         if m is None:
             return False
 
-        # Validate the year:
         year_str: str = m.group("year")
+        self.yearstr = year_str
+        month_str: str = m.group("month")
+        self.monthstr = month_str
+        day_str: str = m.group("day")
+        self.daystr = day_str
+        hour_str: str = m.group("hour")
+        self.hourstr = hour_str
+        minutes_str: str = m.group("minutes")
+        self.minutesstr = minutes_str
+        seconds_str: str = m.group("seconds")
+        self.secondsstr = seconds_str
+
+        # Validate the year:
         if year_str is None or len(year_str) == 0:
             return False # Years are mandatory
         try:
@@ -603,7 +753,6 @@ def is_date_and_times(self, validate: bool=False)->bool:
         if year > self.options.maximum_valid_year:
             return False
 
-        month_str: str = m.group("month")
         if month_str is not None:
             try:
                 month: int = int(month_str)
@@ -612,7 +761,6 @@ def is_date_and_times(self, validate: bool=False)->bool:
             if month == 0 and not self.options.allow_month_or_day_zero:
                 return False # month 0 was disallowed.
 
-        day_str: str = m.group("day")
         if day_str is not None:
             try:
                 day: int = int(day_str)
@@ -686,6 +834,12 @@ def classify(self)->KgtkFormat.DataType:
         # Shouldn't get here.
         raise ValueError("Unknown data type for '%s'" % self.value)
 
+    def reclassify(self)->KgtkFormat.DataType:
+        # Classify this KgtkValue into a KgtkDataType, ignoring any cached data_type.
+        self.data_type = None
+        self.valid = None
+        return self.classify()
+
     def validate(self)->bool:
         # Validate this KgtkValue.
 
@@ -722,6 +876,12 @@ def validate(self)->bool:
         else:
             raise ValueError("Unrecognized DataType.")
 
+    def revalidate(self, reclassify: bool=False)->bool:
+        # Revalidate this KgtkValue after clearing cached values.
+        if reclassify:
+            self.data_type = None
+        self.valid = None
+        return self.validate()
         
     def describe(self)->str:
         """

From b768f44ae59d4937867ea836cdfbeb2ac022b72b Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 5 May 2020 19:45:59 -0700
Subject: [PATCH 079/278] Make year/month//day available as ints.

---
 kgtk/join/kgtkvalue.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 574f68da0..24bf3e890 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -46,8 +46,11 @@ class KgtkValue(KgtkFormat):
 
     # Offer the components of a date and times:
     yearstr: typing.Optional[str] = None # Note: not converted to int
+    year: typing.Optional[int] = None
     monthstr: typing.Optional[str] = None # Note: not converted to int
+    month: typing.Optional[int] = None
     daystr: typing.Optional[str] = None # Note: not converted to int
+    day: typing.Optional[int] = None
     hourstr: typing.Optional[str] = None # Note: not converted to int or float
     minutesstr: typing.Optional[str] = None # Note: not converted to int or float
     secondsstr: typing.Optional[str] = None # Note: not converted to int or float
@@ -696,6 +699,9 @@ def is_date_and_times(self, validate: bool=False)->bool:
                 self.hourstr = None
                 self.minutesstr = None
                 self.secondsstr = None
+                self.year = None
+                self.month = None
+                self.day = None
                 return False
             # We are certain that this is location coordinates, although we haven't checked validity.
             self.data_type = KgtkFormat.DataType.DATE_AND_TIMES
@@ -708,6 +714,9 @@ def is_date_and_times(self, validate: bool=False)->bool:
                 self.hourstr = None
                 self.minutesstr = None
                 self.secondsstr = None
+                self.year = None
+                self.month = None
+                self.day = None
                 return False
 
         if not validate:
@@ -722,6 +731,9 @@ def is_date_and_times(self, validate: bool=False)->bool:
         self.hourstr = None
         self.minutesstr = None
         self.secondsstr = None
+        self.year = None
+        self.month = None
+        self.day = None
 
         # Validate the date and times:
         m: typing.Optional[typing.Match] = KgtkValue.lax_date_and_times_re.match(self.value)
@@ -745,28 +757,28 @@ def is_date_and_times(self, validate: bool=False)->bool:
         if year_str is None or len(year_str) == 0:
             return False # Years are mandatory
         try:
-            year: int = int(year_str)
+            self.year: int = int(year_str)
         except ValueError:
             return False
-        if year < self.options.minimum_valid_year:
+        if self.year < self.options.minimum_valid_year:
             return False
-        if year > self.options.maximum_valid_year:
+        if self.year > self.options.maximum_valid_year:
             return False
 
         if month_str is not None:
             try:
-                month: int = int(month_str)
+                self.month: int = int(month_str)
             except ValueError:
                 return False # shouldn't happen
-            if month == 0 and not self.options.allow_month_or_day_zero:
+            if self.month == 0 and not self.options.allow_month_or_day_zero:
                 return False # month 0 was disallowed.
 
         if day_str is not None:
             try:
-                day: int = int(day_str)
+                self.day: int = int(day_str)
             except ValueError:
                 return False # shouldn't happen
-            if day == 0 and not self.options.allow_month_or_day_zero:
+            if self.day == 0 and not self.options.allow_month_or_day_zero:
                 return False # day 0 was disallowed.
 
         # We are fairly certain that this is a valid date and times.

From 02a5fbb35b09f0b6f8e4d51c5be57719ead9c16c Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 5 May 2020 19:51:35 -0700
Subject: [PATCH 080/278] Incicate whether hyphens/colons were present in
 date/times.

---
 kgtk/join/kgtkvalue.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 24bf3e890..5f5e486b2 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -54,6 +54,7 @@ class KgtkValue(KgtkFormat):
     hourstr: typing.Optional[str] = None # Note: not converted to int or float
     minutesstr: typing.Optional[str] = None # Note: not converted to int or float
     secondsstr: typing.Optional[str] = None # Note: not converted to int or float
+    iso8601basic: typing.Optional[bool] = None # True when hyphens/colons present.
 
     def is_valid(self)->bool:
         # Is this a valid whatever it is?
@@ -702,6 +703,7 @@ def is_date_and_times(self, validate: bool=False)->bool:
                 self.year = None
                 self.month = None
                 self.day = None
+                self.iso8601basic = None
                 return False
             # We are certain that this is location coordinates, although we haven't checked validity.
             self.data_type = KgtkFormat.DataType.DATE_AND_TIMES
@@ -717,6 +719,7 @@ def is_date_and_times(self, validate: bool=False)->bool:
                 self.year = None
                 self.month = None
                 self.day = None
+                self.iso8601basic = None
                 return False
 
         if not validate:
@@ -733,6 +736,7 @@ def is_date_and_times(self, validate: bool=False)->bool:
         self.secondsstr = None
         self.year = None
         self.month = None
+        self.iso8601basic = None
         self.day = None
 
         # Validate the date and times:
@@ -752,6 +756,7 @@ def is_date_and_times(self, validate: bool=False)->bool:
         self.minutesstr = minutes_str
         seconds_str: str = m.group("seconds")
         self.secondsstr = seconds_str
+        self.iso8601basic = m.group("hyphen") is None
 
         # Validate the year:
         if year_str is None or len(year_str) == 0:

From ed62331204a68b20e0164201e9c13ca52de7ab27 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 5 May 2020 19:57:27 -0700
Subject: [PATCH 081/278] DOn't duplucate year/month/day/... strings.

---
 kgtk/join/kgtkvalue.py | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 5f5e486b2..0b3e62e21 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -744,25 +744,19 @@ def is_date_and_times(self, validate: bool=False)->bool:
         if m is None:
             return False
 
-        year_str: str = m.group("year")
-        self.yearstr = year_str
-        month_str: str = m.group("month")
-        self.monthstr = month_str
-        day_str: str = m.group("day")
-        self.daystr = day_str
-        hour_str: str = m.group("hour")
-        self.hourstr = hour_str
-        minutes_str: str = m.group("minutes")
-        self.minutesstr = minutes_str
-        seconds_str: str = m.group("seconds")
-        self.secondsstr = seconds_str
+        self.yearstr = m.group("year")
+        self.monthstr = m.group("month")
+        self.daystr = m.group("day")
+        self.hourstr = m.group("hour")
+        self.minutesstr = m.group("minutes")
+        self.secondsstr = m.group("seconds")
         self.iso8601basic = m.group("hyphen") is None
 
         # Validate the year:
-        if year_str is None or len(year_str) == 0:
+        if self.yearstr is None or len(self.yearstr) == 0:
             return False # Years are mandatory
         try:
-            self.year: int = int(year_str)
+            self.year: int = int(self.yearstr)
         except ValueError:
             return False
         if self.year < self.options.minimum_valid_year:
@@ -770,17 +764,17 @@ def is_date_and_times(self, validate: bool=False)->bool:
         if self.year > self.options.maximum_valid_year:
             return False
 
-        if month_str is not None:
+        if self.monthstr is not None:
             try:
-                self.month: int = int(month_str)
+                self.month: int = int(self.monthstr)
             except ValueError:
                 return False # shouldn't happen
             if self.month == 0 and not self.options.allow_month_or_day_zero:
                 return False # month 0 was disallowed.
 
-        if day_str is not None:
+        if self.daystr is not None:
             try:
-                self.day: int = int(day_str)
+                self.day: int = int(self.daystr)
             except ValueError:
                 return False # shouldn't happen
             if self.day == 0 and not self.options.allow_month_or_day_zero:

From 1fef82bed3d929c45a3840cc808cea335f9c7fbb Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 5 May 2020 20:23:51 -0700
Subject: [PATCH 082/278] Initial date and time repair hack.:

---
 kgtk/join/kgtkvalue.py        | 70 ++++++++++++++++++++++++++++++++---
 kgtk/join/kgtkvalueoptions.py |  1 +
 2 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 0b3e62e21..4604f6f87 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -54,6 +54,8 @@ class KgtkValue(KgtkFormat):
     hourstr: typing.Optional[str] = None # Note: not converted to int or float
     minutesstr: typing.Optional[str] = None # Note: not converted to int or float
     secondsstr: typing.Optional[str] = None # Note: not converted to int or float
+    zonestr: typing.Optional[str] = None
+    precisionstr: typing.Optional[str] = None
     iso8601basic: typing.Optional[bool] = None # True when hyphens/colons present.
 
     def is_valid(self)->bool:
@@ -703,6 +705,8 @@ def is_date_and_times(self, validate: bool=False)->bool:
                 self.year = None
                 self.month = None
                 self.day = None
+                self.zonestr = None
+                self.precisionstr = None
                 self.iso8601basic = None
                 return False
             # We are certain that this is location coordinates, although we haven't checked validity.
@@ -719,6 +723,8 @@ def is_date_and_times(self, validate: bool=False)->bool:
                 self.year = None
                 self.month = None
                 self.day = None
+                self.zonestr = None
+                self.precisionstr = None
                 self.iso8601basic = None
                 return False
 
@@ -736,6 +742,8 @@ def is_date_and_times(self, validate: bool=False)->bool:
         self.secondsstr = None
         self.year = None
         self.month = None
+        self.zonestr = None
+        self.precisionstr = None
         self.iso8601basic = None
         self.day = None
 
@@ -750,8 +758,12 @@ def is_date_and_times(self, validate: bool=False)->bool:
         self.hourstr = m.group("hour")
         self.minutesstr = m.group("minutes")
         self.secondsstr = m.group("seconds")
+        self.zonestr = m.group("zone")
+        self.precisionstr = m.group("precision")
         self.iso8601basic = m.group("hyphen") is None
 
+        fixup_needed: bool = False
+
         # Validate the year:
         if self.yearstr is None or len(self.yearstr) == 0:
             return False # Years are mandatory
@@ -769,21 +781,62 @@ def is_date_and_times(self, validate: bool=False)->bool:
                 self.month: int = int(self.monthstr)
             except ValueError:
                 return False # shouldn't happen
-            if self.month == 0 and not self.options.allow_month_or_day_zero:
-                return False # month 0 was disallowed.
+            if self.month == 0:
+                if self.options.repair_month_or_day_zero:
+                    self.month = 1
+                    self.monthstr = "01"
+                    fixup_needed = True
+                elif not self.options.allow_month_or_day_zero:
+                    return False # month 0 was disallowed.
 
         if self.daystr is not None:
             try:
                 self.day: int = int(self.daystr)
             except ValueError:
                 return False # shouldn't happen
-            if self.day == 0 and not self.options.allow_month_or_day_zero:
-                return False # day 0 was disallowed.
+            if self.day == 0:
+                if self.options.repair_month_or_day_zero:
+                    self.day = 1
+                    self.daystr = "01"
+                    fixup_needed = True
+                if not self.options.allow_month_or_day_zero:
+                    return False # day 0 was disallowed.
+
+        if fixup_needed:
+            self.update_date_and_times()
 
         # We are fairly certain that this is a valid date and times.
         self.valid = True
         return True
 
+    def update_date_and_times(self):
+        v: str = "^" + self.yearstr
+        if self.monthstr is not None:
+            if not self.iso8601basic:
+                v += "-"
+            v += self.monthstr
+        if self.daystr is not None:
+            if not self.iso8601basic:
+                v += "-"
+            v += self.daystr
+        if self.hourstr is not None:
+            v += "T"
+            v += self.hourstr
+        if self.minutesstr is not None:
+            if not self.iso8601basic:
+                v += ":"
+            v += self.minutesstr
+        if self.secondsstr is not None:
+            if not self.iso8601basic:
+                v += ":"
+            v += self.secondssr
+        if self.zonestr is not None:
+            v += self.zonestr
+        if self.precisionstr is not None:
+            v += "/"
+            v += self.precisionstr
+        self.value = v
+
     def is_extension(self, validate=False)->bool:
         """Return True if the first character is !
 
@@ -949,7 +1002,14 @@ def main():
 
     value: str
     for value in args.values:
-        print("%s: %s" % (value, KgtkValue(value, options=value_options).describe()), flush=True)
+        kv: KgtkValue = KgtkValue(value, options=value_options)
+        kv.validate()
+        nv: str = kv.value
+        if value == nv:
+            print("%s: %s" % (value, kv.describe()), flush=True)
+        else:
+            print("%s => %s: %s" % (value, nv, kv.describe()), flush=True)
+            
 
 if __name__ == "__main__":
     main()
diff --git a/kgtk/join/kgtkvalueoptions.py b/kgtk/join/kgtkvalueoptions.py
index 19252f3d9..d576f649c 100644
--- a/kgtk/join/kgtkvalueoptions.py
+++ b/kgtk/join/kgtkvalueoptions.py
@@ -16,6 +16,7 @@ class KgtkValueOptions:
     # Allow month 00 or day 00 in dates?  This isn't really allowed by ISO
     # 8601, but appears in wikidata.
     allow_month_or_day_zero: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+    repair_month_or_day_zero: bool = attr.ib(validator=attr.validators.instance_of(bool), default=True)
 
     # When allow_lax_strings is true, strings will be checked to see if they
     # start and end with double quote ("), but we won't check if internal

From f3acd680168915a9ba483c94a7dabc0c12f07810 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Wed, 6 May 2020 08:57:15 -0700
Subject: [PATCH 083/278] remove line-by-line options

---
 kgtk/cli/generate_wikidata_triples.py |  34 +-----
 kgtk/triple_generator.py              | 147 ++++++++++++++------------
 2 files changed, 85 insertions(+), 96 deletions(-)

diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index c87495434..d65aa9037 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -97,14 +97,6 @@ def add_arguments(parser):
         help="if set to yes, read from compressed gz file",
         dest="use_gz",
     )
-    parser.add_argument(
-        "-lbl",
-        "--line-by-line",
-        action="store",
-        type=str2bool,
-        help="if set to yes, read from standard input line by line, otherwise loads whole file into memory",
-        dest="line_by_line",
-    )
 
 
 def run(
@@ -116,7 +108,6 @@ def run(
     truthy: bool,
     ignore: bool,
     use_gz: bool,
-    line_by_line: bool,
 ):
     # import modules locally
     import gzip
@@ -136,27 +127,12 @@ def run(
         fp = gzip.open(sys.stdin.buffer, 'rt')
     else:
         fp = sys.stdin
-    if line_by_line:
-        print("#line-by-line")
-        num_line = 1
-        while True:
-            edge = fp.readline()
-            if not edge:
-                break
-            if edge.startswith("#") or num_line == 1: # TODO First line omit
-                num_line += 1
-                continue
-            else:
-                generator.entry_point(num_line, edge)
-                num_line += 1
-    else:
         # not line by line
-        print("#not line-by-line")
-        for num, edge in enumerate(fp.readlines()):
-            if edge.startswith("#") or num == 0:
-                continue
-            else:
-                generator.entry_point(num+1,edge)
+    for num, edge in enumerate(fp):
+        if edge.startswith("#") or num == 0:
+            continue
+        else:
+            generator.entry_point(num+1,edge)
     generator.finalize()
 
 # testing profiling locally with direct call
diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 57a32907b..67e3485cd 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -8,24 +8,27 @@
 from etk.knowledge_graph import KGSchema
 from etk.wikidata import wiki_namespaces
 import rfc3986
-from etk.wikidata.value import ( 
-Precision,
-Item,
-StringValue,
-TimeValue,
-QuantityValue,
-MonolingualText,
-GlobeCoordinate,
-ExternalIdentifier,
-URLValue
+from etk.wikidata.value import (
+    Precision,
+    Item,
+    StringValue,
+    TimeValue,
+    QuantityValue,
+    MonolingualText,
+    GlobeCoordinate,
+    ExternalIdentifier,
+    URLValue
 )
 
 BAD_CHARS = [":", "-", "&", ",", " ",
              "(", ")", "\'", '\"', "/", "\\", "[", "]", ";"]
+
+
 class TripleGenerator:
     """
     A class to maintain the status of the generator
     """
+
     def __init__(
         self,
         prop_file: str,
@@ -35,7 +38,7 @@ def __init__(
         ignore: bool,
         n: int,
         dest_fp: TextIO = sys.stdout,
-        truthy:bool =False
+        truthy: bool = False
     ):
         from etk.wikidata.statement import Rank
         self.ignore = ignore
@@ -48,19 +51,20 @@ def __init__(
         self.read_num_of_lines = 0
         # ignore-logging, if not ignore, log them and move on.
         if not self.ignore:
-            self.ignore_file = open("ignored.log","w")
+            self.ignore_file = open("ignored.log", "w")
         # corrupted statement id
         self.corrupted_statement_id = None
         # truthy
-        self.truthy = truthy        
+        self.truthy = truthy
         self.reset_etk_doc()
         self.serialize_prefix()
-        self.yyyy_mm_dd_pattern = re.compile("[12]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])")
+        self.yyyy_mm_dd_pattern = re.compile(
+            "[12]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])")
         self.yyyy_pattern = re.compile("[12]\d{3}")
-        self.quantity_pattern = re.compile("([\+|\-]?[0-9]+\.?[0-9]*)(?:\[([\+|\-]?[0-9]+\.?[0-9]*),([\+|\-]?[0-9]+\.?[0-9]*)\])?([U|Q](?:[0-9]+))?")
+        self.quantity_pattern = re.compile(
+            "([\+|\-]?[0-9]+\.?[0-9]*)(?:\[([\+|\-]?[0-9]+\.?[0-9]*),([\+|\-]?[0-9]+\.?[0-9]*)\])?([U|Q](?:[0-9]+))?")
 
-    
-    def _node_2_entity(self, node:str):
+    def _node_2_entity(self, node: str):
         '''
         A node can be Qxxx or Pxxx, return the proper entity.
         '''
@@ -70,7 +74,6 @@ def _node_2_entity(self, node:str):
             entity = WDItem(TripleGenerator.replace_illegal_string(node))
         return entity
 
-
     def set_properties(self, prop_file: str):
         datatype_mapping = {
             "item": Item,
@@ -79,8 +82,8 @@ def set_properties(self, prop_file: str):
             "quantity": QuantityValue,
             "monolingualtext": MonolingualText,
             "string": StringValue,
-            "external-identifier":ExternalIdentifier,
-            "url":URLValue
+            "external-identifier": ExternalIdentifier,
+            "url": URLValue
         }
         with open(prop_file, "r") as fp:
             props = fp.readlines()
@@ -90,7 +93,7 @@ def set_properties(self, prop_file: str):
             try:
                 prop_types[node1] = datatype_mapping[node2.strip()]
             except:
-                if not self.ignore:                    
+                if not self.ignore:
                     raise KGTKException(
                         "DataType {} of node {} is not supported.\n".format(
                             node2, node1
@@ -114,14 +117,15 @@ def reset_etk_doc(self, doc_id: str = "http://isi.edu/default-ns/projects"):
         self.etk = ETK(kg_schema=kg_schema, modules=ETKModule)
         self.doc = self.etk.create_document({}, doc_id=doc_id)
         for k, v in wiki_namespaces.items():
-            self.doc.kg.bind(k, v) 
-    
+            self.doc.kg.bind(k, v)
+
     def serialize(self):
         """
         Seriealize the triples. Used a hack to avoid serializing the prefix again.
         """
         docs = self.etk.process_ems(self.doc)
-        self.fp.write("\n\n".join(docs[0].kg.serialize("ttl").split("\n\n")[1:]))
+        self.fp.write("\n\n".join(
+            docs[0].kg.serialize("ttl").split("\n\n")[1:]))
         self.fp.flush()
         self.reset()
 
@@ -132,7 +136,7 @@ def serialize_prefix(self):
         Relevent issue: https://github.com/RDFLib/rdflib/issues/965
         """
         for k, v in wiki_namespaces.items():
-            line = "@prefix " + k + ": <" + v + "> .\n" 
+            line = "@prefix " + k + ": <" + v + "> .\n"
             self.fp.write(line)
         self.fp.write("\n")
         self.fp.flush()
@@ -148,18 +152,18 @@ def finalize(self):
         self.serialize()
 
     @staticmethod
-    def process_text_string(string:str)->[str,str]:
+    def process_text_string(string: str) -> [str, str]:
         ''' 
         Language detection is removed from triple generation. The user is responsible for detect the language
         '''
-        if len(string)==0:
-            return ["","en"]
+        if len(string) == 0:
+            return ["", "en"]
         if "@" in string:
             res = string.split("@")
             text_string = "@".join(res[:-1]).replace('"', "").replace("'", "")
-            lang = res[-1].replace('"','').replace("'","")
+            lang = res[-1].replace('"', '').replace("'", "")
             if len(lang) > 2:
-                lang ="en"      
+                lang = "en"
         else:
             text_string = string.replace('"', "").replace("'", "")
             lang = "en"
@@ -192,7 +196,7 @@ def generate_prop_declaration_triple(self, node1: str, label: str, node2: str) -
         return True
 
     def generate_normal_triple(
-        self, node1: str, label: str, node2: str, is_qualifier_edge: bool) -> bool:
+            self, node1: str, label: str, node2: str, is_qualifier_edge: bool) -> bool:
         entity = self._node_2_entity(node1)
         # determine the edge type
         edge_type = self.prop_types[label]
@@ -206,7 +210,7 @@ def generate_normal_triple(
                 try:
                     dateTimeString = node2
                     object = TimeValue(
-                        value=dateTimeString, #TODO
+                        value=dateTimeString,  # TODO
                         calendar=Item("Q1985727"),
                         precision=Precision.year,
                         time_zone=0,
@@ -214,10 +218,10 @@ def generate_normal_triple(
                 except:
                     return False
             elif self.yyyy_pattern.match(node2):
-                try:                   
+                try:
                     dateTimeString = node2 + "-01-01"
                     object = TimeValue(
-                        value=dateTimeString, #TODO
+                        value=dateTimeString,  # TODO
                         calendar=Item("Q1985727"),
                         precision=Precision.year,
                         time_zone=0,
@@ -227,23 +231,24 @@ def generate_normal_triple(
             else:
                 try:
                     dateTimeString, precision = node2[1:].split("/")
-                    dateTimeString = dateTimeString[:-1] # remove "Z"
+                    dateTimeString = dateTimeString[:-1]  # remove "Z"
                     # 2016-00-00T00:00:00 case
                     if "-00-00" in dateTimeString:
-                        dateTimeString = "-01-01".join(dateTimeString.split("-00-00"))
+                        dateTimeString = "-01-01".join(
+                            dateTimeString.split("-00-00"))
                     elif dateTimeString[8:10] == "00":
-                        dateTimeString = dateTimeString[:8]+"01" + dateTimeString[10:]
+                        dateTimeString = dateTimeString[:8] + \
+                            "01" + dateTimeString[10:]
                     object = TimeValue(
                         value=dateTimeString,
                         calendar=Item("Q1985727"),
                         precision=precision,
                         time_zone=0,
                     )
-                except: 
+                except:
                     return False
 
-            #TODO other than that, not supported. Creation of normal triple fails
-            
+            # TODO other than that, not supported. Creation of normal triple fails
 
         elif edge_type == GlobeCoordinate:
             latitude, longitude = node2[1:].split("/")
@@ -264,14 +269,16 @@ def generate_normal_triple(
             upper_bound = TripleGenerator.clean_number_string(upper_bound)
             if unit != None:
                 if upper_bound != None and lower_bound != None:
-                    object = QuantityValue(amount, unit=Item(unit),upper_bound=upper_bound,lower_bound=lower_bound)
+                    object = QuantityValue(amount, unit=Item(
+                        unit), upper_bound=upper_bound, lower_bound=lower_bound)
                 else:
                     object = QuantityValue(amount, unit=Item(unit))
             else:
                 if upper_bound != None and lower_bound != None:
-                    object = QuantityValue(amount, upper_bound=upper_bound,lower_bound=lower_bound)
+                    object = QuantityValue(
+                        amount, upper_bound=upper_bound, lower_bound=lower_bound)
                 else:
-                    object = QuantityValue(amount)                   
+                    object = QuantityValue(amount)
         elif edge_type == MonolingualText:
             text_string, lang = TripleGenerator.process_text_string(node2)
             object = MonolingualText(text_string, lang)
@@ -291,7 +298,8 @@ def generate_normal_triple(
             if type(object) == WDItem:
                 self.doc.kg.add_subject(object)
             self.to_append_statement.add_qualifier(label, object)
-            self.doc.kg.add_subject(self.to_append_statement) #TODO maybe can be positioned better for the edge cases.
+            # TODO maybe can be positioned better for the edge cases.
+            self.doc.kg.add_subject(self.to_append_statement)
 
         else:
             # edge: q1 p8 q2 e8
@@ -299,12 +307,13 @@ def generate_normal_triple(
             if type(object) == WDItem:
                 self.doc.kg.add_subject(object)
             if self.truthy:
-                self.to_append_statement = entity.add_truthy_statement(label, object) 
+                self.to_append_statement = entity.add_truthy_statement(
+                    label, object)
             else:
-                self.to_append_statement = entity.add_statement(label, object) 
+                self.to_append_statement = entity.add_statement(label, object)
             self.doc.kg.add_subject(entity)
         return True
-    
+
     @staticmethod
     def is_invalid_decimal_string(num_string):
         '''
@@ -315,18 +324,19 @@ def is_invalid_decimal_string(num_string):
         else:
             if abs(float(num_string)) < 0.0001 and float(num_string) != 0:
                 return True
-            return False        
+            return False
 
     @staticmethod
-    def is_valid_uri_with_scheme_and_host(uri:str):
+    def is_valid_uri_with_scheme_and_host(uri: str):
         '''
         https://github.com/python-hyper/rfc3986/issues/30#issuecomment-461661883
         '''
         try:
             uri = rfc3986.URIReference.from_string(uri)
-            rfc3986.validators.Validator().require_presence_of("scheme", "host").check_validity_of("scheme", "host").validate(uri)
+            rfc3986.validators.Validator().require_presence_of(
+                "scheme", "host").check_validity_of("scheme", "host").validate(uri)
             return True
-        except :
+        except:
             return False
 
     @staticmethod
@@ -335,9 +345,9 @@ def clean_number_string(num):
         if num == None:
             return None
         else:
-            return format_float_positional(float(num),trim="-")
+            return format_float_positional(float(num), trim="-")
 
-    def entry_point(self, line_number:int , edge: str):
+    def entry_point(self, line_number: int, edge: str):
         """
         generates a list of two, the first element is the determination of the edge type using corresponding edge type
         the second element is a bool indicating whether this is a valid property edge or qualifier edge.
@@ -345,12 +355,12 @@ def entry_point(self, line_number:int , edge: str):
         """
         edge_list = edge.strip().split("\t")
         l = len(edge_list)
-        if l!=4:
+        if l != 4:
             return
 
         [node1, label, node2, e_id] = edge_list
-        node1, label, node2, e_id = node1.strip(),label.strip(),node2.strip(),e_id.strip()
-        if line_number == 0: #TODO ignore header mode
+        node1, label, node2, e_id = node1.strip(), label.strip(), node2.strip(), e_id.strip()
+        if line_number == 0:  # TODO ignore header mode
             # by default a statement edge
             is_qualifier_edge = False
             # print("#Debug Info: ",line_number, self.to_append_statement_id, e_id, is_qualifier_edge,self.to_append_statement)
@@ -363,10 +373,10 @@ def entry_point(self, line_number:int , edge: str):
                     self.serialize()
                 is_qualifier_edge = False
                 # print("#Debug Info: ",line_number, self.to_append_statement_id, node1, is_qualifier_edge,self.to_append_statement)
-                self.to_append_statement_id= e_id
+                self.to_append_statement_id = e_id
                 self.corrupted_statement_id = None
             else:
-            # qualifier edge or property declaration edge
+                # qualifier edge or property declaration edge
                 is_qualifier_edge = True
                 if self.corrupted_statement_id == e_id:
                     # Met a qualifier which associates with a corrupted statement
@@ -383,36 +393,39 @@ def entry_point(self, line_number:int , edge: str):
         if label in self.label_set:
             success = self.generate_label_triple(node1, label, node2)
         elif label in self.description_set:
-            success= self.generate_description_triple(node1, label, node2)
+            success = self.generate_description_triple(node1, label, node2)
         elif label in self.alias_set:
             success = self.generate_alias_triple(node1, label, node2)
         elif label == "type":
             # special edge of prop declaration
-            success = self.generate_prop_declaration_triple(node1, label, node2)
+            success = self.generate_prop_declaration_triple(
+                node1, label, node2)
         else:
             if label in self.prop_types:
-                success= self.generate_normal_triple(node1, label, node2, is_qualifier_edge)
+                success = self.generate_normal_triple(
+                    node1, label, node2, is_qualifier_edge)
             else:
                 if not self.ignore:
                     raise KGTKException(
-                        "property {}'s type is unknown at line {}.\n".format(label, line_number)
+                        "property {}'s type is unknown at line {}.\n".format(
+                            label, line_number)
                     )
                     success = False
         if (not success) and (not is_qualifier_edge) and (not self.ignore):
             # We have a corrupted edge here.
-            self.ignore_file.write("Corrupted statement at line number: {} with id {} with current corrupted id {}\n".format(line_number, e_id, self.corrupted_statement_id))
+            self.ignore_file.write("Corrupted statement at line number: {} with id {} with current corrupted id {}\n".format(
+                line_number, e_id, self.corrupted_statement_id))
             self.ignore_file.flush()
             self.corrupted_statement_id = e_id
         else:
             self.read_num_of_lines += 1
             self.corrupted_statement_id = None
 
-    
     @staticmethod
-    def replace_illegal_string(s:str)->str:
+    def replace_illegal_string(s: str) -> str:
         '''
         this function serves as the last gate of keeping illegal characters outside of entity creation.
         '''
         for char in BAD_CHARS:
-            s = s.replace(char,"_")
-        return s
\ No newline at end of file
+            s = s.replace(char, "_")
+        return s

From 31622c819175faddf9f94f766802682b171a26dc Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Wed, 6 May 2020 09:02:19 -0700
Subject: [PATCH 084/278] added several default values for commandline
 arguments

---
 kgtk/cli/generate_wikidata_triples.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index d65aa9037..475885d31 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -38,6 +38,7 @@ def add_arguments(parser):
         "--label-property",
         action="store",
         type=str,
+        const="label",
         help="property identifiers which will create labels, separated by comma','.",
         dest="labels",
     )
@@ -46,6 +47,7 @@ def add_arguments(parser):
         "--alias-property",
         action="store",
         type=str,
+        const="aliases",
         help="alias identifiers which will create labels, separated by comma','.",
         dest="aliases",
     )
@@ -54,6 +56,7 @@ def add_arguments(parser):
         "--description-property",
         action="store",
         type=str,
+        const="descriptions",
         help="description identifiers which will create labels, separated by comma','.",
         dest="descriptions",
     )
@@ -70,6 +73,7 @@ def add_arguments(parser):
         "--output-n-lines",
         action="store",
         type=int,
+        const=1000,
         help="output triples approximately every {n} lines of reading stdin.",
         dest="n",
     )
@@ -78,6 +82,7 @@ def add_arguments(parser):
         "--generate-truthy",
         action="store",
         type=str2bool,
+        const="yes",
         help="the default is to not generate truthy triples. Specify this option to generate truthy triples. NOTIMPLEMENTED",
         dest="truthy",
     )
@@ -86,6 +91,7 @@ def add_arguments(parser):
         "--ignore",
         action="store",
         type=str2bool,
+        const="no",
         help="if set to yes, ignore various kinds of exceptions and mistakes and log them to a log file with line number in input file, rather than stopping. logging",
         dest="ignore",
     )
@@ -94,6 +100,7 @@ def add_arguments(parser):
         "--use-gz",
         action="store",
         type=str2bool,
+        const="no",
         help="if set to yes, read from compressed gz file",
         dest="use_gz",
     )

From d0c14b14280342035a228d86ea013b0b207ee738 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Wed, 6 May 2020 09:15:35 -0700
Subject: [PATCH 085/278] fix the option issue, now only needs property file to
 run

---
 kgtk/cli/generate_wikidata_triples.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index 475885d31..a9e861117 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -38,7 +38,8 @@ def add_arguments(parser):
         "--label-property",
         action="store",
         type=str,
-        const="label",
+        default="label",
+        required=False,
         help="property identifiers which will create labels, separated by comma','.",
         dest="labels",
     )
@@ -47,7 +48,8 @@ def add_arguments(parser):
         "--alias-property",
         action="store",
         type=str,
-        const="aliases",
+        required = False,
+        default="aliases",
         help="alias identifiers which will create labels, separated by comma','.",
         dest="aliases",
     )
@@ -56,7 +58,8 @@ def add_arguments(parser):
         "--description-property",
         action="store",
         type=str,
-        const="descriptions",
+        required = False,
+        default="descriptions",
         help="description identifiers which will create labels, separated by comma','.",
         dest="descriptions",
     )
@@ -65,6 +68,7 @@ def add_arguments(parser):
         "--property-types",
         action="store",
         type=str,
+        required = True,
         help="path to the file which contains the property datatype mapping in kgtk format.",
         dest="prop_file",
     )
@@ -73,7 +77,8 @@ def add_arguments(parser):
         "--output-n-lines",
         action="store",
         type=int,
-        const=1000,
+        required = False,
+        default=1000,
         help="output triples approximately every {n} lines of reading stdin.",
         dest="n",
     )
@@ -82,7 +87,8 @@ def add_arguments(parser):
         "--generate-truthy",
         action="store",
         type=str2bool,
-        const="yes",
+        required = False,
+        default="yes",
         help="the default is to not generate truthy triples. Specify this option to generate truthy triples. NOTIMPLEMENTED",
         dest="truthy",
     )
@@ -91,7 +97,8 @@ def add_arguments(parser):
         "--ignore",
         action="store",
         type=str2bool,
-        const="no",
+        required = False,
+        default="no",
         help="if set to yes, ignore various kinds of exceptions and mistakes and log them to a log file with line number in input file, rather than stopping. logging",
         dest="ignore",
     )
@@ -100,7 +107,8 @@ def add_arguments(parser):
         "--use-gz",
         action="store",
         type=str2bool,
-        const="no",
+        required = False,
+        default="no",
         help="if set to yes, read from compressed gz file",
         dest="use_gz",
     )

From fb32d6963abe3be861483e8730bfc23032f999d3 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Wed, 6 May 2020 09:40:45 -0700
Subject: [PATCH 086/278] support arbitrary order of required columns

---
 kgtk/cli/generate_wikidata_triples.py |  6 +--
 kgtk/triple_generator.py              | 54 ++++++++++++++++++---------
 2 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index a9e861117..f1cc8dfdc 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -143,11 +143,11 @@ def run(
     else:
         fp = sys.stdin
         # not line by line
-    for num, edge in enumerate(fp):
-        if edge.startswith("#") or num == 0:
+    for line_num, edge in enumerate(fp):
+        if edge.startswith("#"):
             continue
         else:
-            generator.entry_point(num+1,edge)
+            generator.entry_point(line_num+1,edge)
     generator.finalize()
 
 # testing profiling locally with direct call
diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 67e3485cd..766154f7f 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -63,6 +63,8 @@ def __init__(
         self.yyyy_pattern = re.compile("[12]\d{3}")
         self.quantity_pattern = re.compile(
             "([\+|\-]?[0-9]+\.?[0-9]*)(?:\[([\+|\-]?[0-9]+\.?[0-9]*),([\+|\-]?[0-9]+\.?[0-9]*)\])?([U|Q](?:[0-9]+))?")
+        # order map, know the column index of ["node1","property","node2",id]
+        self.order_map = {}
 
     def _node_2_entity(self, node: str):
         '''
@@ -353,14 +355,32 @@ def entry_point(self, line_number: int, edge: str):
         the second element is a bool indicating whether this is a valid property edge or qualifier edge.
         Call corresponding downstream functions
         """
+
         edge_list = edge.strip().split("\t")
         l = len(edge_list)
-        if l != 4:
-            return
+        if line_number == 1:
+            # initialize the order_map
+            edge_list = edge.strip().split("\t")
+            node1_index = edge_list.index("node1")
+            node2_index = edge_list.index("node2")
+            prop_index = edge_list.index("property")
+            id_index = edge_list.index("id")
+            if not all([node1_index>-1,node2_index>-1,prop_index>-1,id_index>-1]):
+                raise KGTKException("Header of kgtk file misses at least one of required column names: (node1, node2, property and id)")
+            else:
+                self.order_map["node1"] = node1_index
+                self.order_map["node2"] = node2_index
+                self.order_map["prop"] = prop_index
+                self.order_map["id"] = id_index
+                return
 
-        [node1, label, node2, e_id] = edge_list
-        node1, label, node2, e_id = node1.strip(), label.strip(), node2.strip(), e_id.strip()
-        if line_number == 0:  # TODO ignore header mode
+        # use the order_map to map the node
+         
+        node1 = edge_list[self.order_map["node1"]].strip()
+        node2 = edge_list[self.order_map["node2"]].strip()
+        prop = edge_list[self.order_map["prop"]].strip()
+        e_id = edge_list[self.order_map["id"]].strip()
+        if line_number == 2: 
             # by default a statement edge
             is_qualifier_edge = False
             # print("#Debug Info: ",line_number, self.to_append_statement_id, e_id, is_qualifier_edge,self.to_append_statement)
@@ -381,7 +401,7 @@ def entry_point(self, line_number: int, edge: str):
                 if self.corrupted_statement_id == e_id:
                     # Met a qualifier which associates with a corrupted statement
                     return
-                if label != "type" and node1 != self.to_append_statement_id:
+                if prop != "type" and node1 != self.to_append_statement_id:
                     # 1. not a property declaration edge and
                     # 2. the current qualifier's node1 is not the latest property edge id, throw errors.
                     if not self.ignore:
@@ -390,25 +410,25 @@ def entry_point(self, line_number: int, edge: str):
                                 node1, line_number, self.to_append_statement_id
                             )
                         )
-        if label in self.label_set:
-            success = self.generate_label_triple(node1, label, node2)
-        elif label in self.description_set:
-            success = self.generate_description_triple(node1, label, node2)
-        elif label in self.alias_set:
-            success = self.generate_alias_triple(node1, label, node2)
-        elif label == "type":
+        if prop in self.label_set:
+            success = self.generate_label_triple(node1, prop, node2)
+        elif prop in self.description_set:
+            success = self.generate_description_triple(node1, prop, node2)
+        elif prop in self.alias_set:
+            success = self.generate_alias_triple(node1, prop, node2)
+        elif prop == "type":
             # special edge of prop declaration
             success = self.generate_prop_declaration_triple(
-                node1, label, node2)
+                node1, prop, node2)
         else:
-            if label in self.prop_types:
+            if prop in self.prop_types:
                 success = self.generate_normal_triple(
-                    node1, label, node2, is_qualifier_edge)
+                    node1, prop, node2, is_qualifier_edge)
             else:
                 if not self.ignore:
                     raise KGTKException(
                         "property {}'s type is unknown at line {}.\n".format(
-                            label, line_number)
+                            prop, line_number)
                     )
                     success = False
         if (not success) and (not is_qualifier_edge) and (not self.ignore):

From bf1b50c1a26c68b102df0ea96e500d13c067509b Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Wed, 6 May 2020 10:09:51 -0700
Subject: [PATCH 087/278] support using edge id as statement id after removing
 illegal characters

---
 kgtk/cli/generate_wikidata_triples.py | 14 +++++++++++++-
 kgtk/triple_generator.py              | 17 +++++++++++------
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index f1cc8dfdc..cdc8b44ad 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -112,6 +112,16 @@ def add_arguments(parser):
         help="if set to yes, read from compressed gz file",
         dest="use_gz",
     )
+    parser.add_argument(
+        "-sid",
+        "--use-id",
+        action="store",
+        type=str2bool,
+        required = False,
+        default="no",
+        help="if set to yes, the id in the edge will be used as statement id when creating statement or truthy statement",
+        dest="use_id",
+    )
 
 
 def run(
@@ -123,6 +133,7 @@ def run(
     truthy: bool,
     ignore: bool,
     use_gz: bool,
+    use_id:bool
 ):
     # import modules locally
     import gzip
@@ -135,7 +146,8 @@ def run(
         description_set=descriptions,
         n=n,
         ignore=ignore,
-        truthy=truthy
+        truthy=truthy,
+        use_id=use_id
     )
     # process stdin
     if use_gz:
diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 766154f7f..675ceffe9 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -21,7 +21,7 @@
 )
 
 BAD_CHARS = [":", "-", "&", ",", " ",
-             "(", ")", "\'", '\"', "/", "\\", "[", "]", ";"]
+             "(", ")", "\'", '\"', "/", "\\", "[", "]", ";","|"]
 
 
 class TripleGenerator:
@@ -38,7 +38,8 @@ def __init__(
         ignore: bool,
         n: int,
         dest_fp: TextIO = sys.stdout,
-        truthy: bool = False
+        truthy: bool = False,
+        use_id:bool=False,
     ):
         from etk.wikidata.statement import Rank
         self.ignore = ignore
@@ -65,6 +66,7 @@ def __init__(
             "([\+|\-]?[0-9]+\.?[0-9]*)(?:\[([\+|\-]?[0-9]+\.?[0-9]*),([\+|\-]?[0-9]+\.?[0-9]*)\])?([U|Q](?:[0-9]+))?")
         # order map, know the column index of ["node1","property","node2",id]
         self.order_map = {}
+        self.use_id = use_id
 
     def _node_2_entity(self, node: str):
         '''
@@ -198,7 +200,9 @@ def generate_prop_declaration_triple(self, node1: str, label: str, node2: str) -
         return True
 
     def generate_normal_triple(
-            self, node1: str, label: str, node2: str, is_qualifier_edge: bool) -> bool:
+            self, node1: str, label: str, node2: str, is_qualifier_edge: bool,e_id:str) -> bool:
+        if self.use_id:
+            e_id = TripleGenerator.replace_illegal_string(e_id)
         entity = self._node_2_entity(node1)
         # determine the edge type
         edge_type = self.prop_types[label]
@@ -310,9 +314,10 @@ def generate_normal_triple(
                 self.doc.kg.add_subject(object)
             if self.truthy:
                 self.to_append_statement = entity.add_truthy_statement(
-                    label, object)
+                    label, object,statement_id=e_id) if self.use_id else entity.add_truthy_statement(label,object)
             else:
-                self.to_append_statement = entity.add_statement(label, object)
+                self.to_append_statement = entity.add_statement(
+                    label, object,statement_id=e_id) if self.use_id else entity.add_statement(label, object)
             self.doc.kg.add_subject(entity)
         return True
 
@@ -423,7 +428,7 @@ def entry_point(self, line_number: int, edge: str):
         else:
             if prop in self.prop_types:
                 success = self.generate_normal_triple(
-                    node1, prop, node2, is_qualifier_edge)
+                    node1, prop, node2, is_qualifier_edge,e_id)
             else:
                 if not self.ignore:
                     raise KGTKException(

From cdf5b13d75f530b729fa0a4240eee58133aea77d Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 11:01:43 -0700
Subject: [PATCH 088/278] Repair month or day zero.  Escape list separators.

---
 kgtk/cli/validate.py          |  6 +++++-
 kgtk/join/kgtkreader.py       |  2 +-
 kgtk/join/kgtkvalue.py        | 17 ++++++++++++-----
 kgtk/join/kgtkvalueoptions.py | 28 +++++++++++++++++++++++-----
 4 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 074f56aed..ce2fce9e2 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -153,6 +153,8 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
         allow_lax_strings: bool = False,
         allow_lax_lq_strings: bool = False,
         allow_month_or_day_zero: bool = False,
+        repair_month_or_day_zero: bool = False,
+        escape_list_separators: bool = False,
         minimum_valid_year: int = KgtkValueOptions.MINIMUM_VALID_YEAR,
         maximum_valid_year: int = KgtkValueOptions.MAXIMUM_VALID_YEAR,
         compression_type: typing.Optional[str] = None,
@@ -175,12 +177,14 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
 
     # Build the value parsing option structure.
     value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=allow_month_or_day_zero,
+                                                       repair_month_or_day_zero=repair_month_or_day_zero,
                                                        allow_lax_strings=allow_lax_strings,
                                                        allow_lax_lq_strings=allow_lax_lq_strings,
                                                        allow_language_suffixes=allow_language_suffixes,
                                                        additional_language_codes=additional_language_codes,
                                                        minimum_valid_year=minimum_valid_year,
-                                                       maximum_valid_year=maximum_valid_year)
+                                                       maximum_valid_year=maximum_valid_year,
+                                                       escape_list_separators=escape_list_separators)
 
     try:
         kgtk_file: typing.Optional[Path]
diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index 326b905bf..9b338eb49 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -588,7 +588,7 @@ def __next__(self)-> typing.List[str]:
 
     def _ignore_invalid_values(self, values: typing.List[str], line: str)->bool:
         """Give a row of values, validate each value.  If we find one or more
-        validation problems, we might want to emit erro messages and we might
+        validation problems, we might want to emit error messages and we might
         want to ignore the entire row.
 
         Returns True to indicate that the row should be ignored (skipped).
diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 4604f6f87..ebbf799e9 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -87,9 +87,16 @@ def get_list_items(self)->typing.List['KgtkValue']:
         if self.list_items is not None:
             return self.list_items
 
-        # Return an empty list if this is not a list.
-        self.list_items: typing.List['KgtkValue'] = [ ]
+        # Split the KGTK list.
         values: typing.List[str] = KgtkValue.split_list_re.split(self.value)
+
+        # Perhaps we'd like to escape the list separators instead of splitting on them?
+        if self.options.escape_list_separators:
+            self.value = ("\\" + KgtkFormat.LIST_SEPARATOR).join(values)
+            return [ ] # Return an empty list.
+
+        # Return an empty Python list if this is not a KGTK list.
+        self.list_items: typing.List['KgtkValue'] = [ ]
         if len(values) > 1:
             # Populate list_items with a KgtkValue for each item in the list:
             item_value: str
@@ -742,10 +749,10 @@ def is_date_and_times(self, validate: bool=False)->bool:
         self.secondsstr = None
         self.year = None
         self.month = None
+        self.day = None
         self.zonestr = None
         self.precisionstr = None
         self.iso8601basic = None
-        self.day = None
 
         # Validate the date and times:
         m: typing.Optional[typing.Match] = KgtkValue.lax_date_and_times_re.match(self.value)
@@ -799,7 +806,7 @@ def is_date_and_times(self, validate: bool=False)->bool:
                     self.day = 1
                     self.daystr = "01"
                     fixup_needed = True
-                if not self.options.allow_month_or_day_zero:
+                elif not self.options.allow_month_or_day_zero:
                     return False # day 0 was disallowed.
 
         if fixup_needed:
@@ -829,7 +836,7 @@ def update_date_and_times(self):
         if self.secondsstr is not None:
             if not self.iso8601basic:
                 v += ":"
-            v += self.secondssr
+            v += self.secondsstr
         if self.zonestr is not None:
             v += self.zonestr
         if self.precisionstr is not None:
diff --git a/kgtk/join/kgtkvalueoptions.py b/kgtk/join/kgtkvalueoptions.py
index d576f649c..b061b92e3 100644
--- a/kgtk/join/kgtkvalueoptions.py
+++ b/kgtk/join/kgtkvalueoptions.py
@@ -16,7 +16,7 @@ class KgtkValueOptions:
     # Allow month 00 or day 00 in dates?  This isn't really allowed by ISO
     # 8601, but appears in wikidata.
     allow_month_or_day_zero: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-    repair_month_or_day_zero: bool = attr.ib(validator=attr.validators.instance_of(bool), default=True)
+    repair_month_or_day_zero: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
     # When allow_lax_strings is true, strings will be checked to see if they
     # start and end with double quote ("), but we won't check if internal
@@ -38,6 +38,8 @@ class KgtkValueOptions:
     #                                                                                              iterable_validator=attr.validators.instance_of(list)))),
     additional_language_codes: typing.Optional[typing.List[str]] = attr.ib(default=None)
 
+    escape_list_separators: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+
     # Minimum and maximum year range in dates.
     MINIMUM_VALID_YEAR: int = 1583 # Per ISO 8601, years before this one require special agreement.
     minimum_valid_year: int = attr.ib(validator=attr.validators.instance_of(int), default=MINIMUM_VALID_YEAR)
@@ -81,29 +83,45 @@ def add_arguments(cls, parser: ArgumentParser):
         lqgroup.add_argument(      "--disallow-lax-lq-strings", dest="allow_lax_lq_strings",
                                    help="Check if single quotes are backslashed inside language qualified strings.", action='store_false')
 
-        md0group= parser.add_mutually_exclusive_group()
-        md0group.add_argument(      "--allow-month-or-day-zero", dest="allow_month_or_day_zero",
+        amd0group= parser.add_mutually_exclusive_group()
+        amd0group.add_argument(      "--allow-month-or-day-zero", dest="allow_month_or_day_zero",
                                     help="Allow month or day zero in dates.", action='store_true', default=False)
 
-        md0group.add_argument(      "--disallow-month-or-day-zero", dest="allow_month_or_day_zero",
+        amd0group.add_argument(      "--disallow-month-or-day-zero", dest="allow_month_or_day_zero",
                                     help="Allow month or day zero in dates.", action='store_false')
 
+        rmd0group= parser.add_mutually_exclusive_group()
+        rmd0group.add_argument(      "--repair-month-or-day-zero", dest="repair_month_or_day_zero",
+                                    help="Repair month or day zero in dates.", action='store_true', default=False)
+
+        rmd0group.add_argument(      "--no-repair-month-or-day-zero", dest="repair_month_or_day_zero",
+                                    help="Do not repair month or day zero in dates.", action='store_false')
+
         parser.add_argument(      "--minimum-valid-year", dest="minimum_valid_year",
                                   help="The minimum valid year in dates.", type=int, default=cls.MINIMUM_VALID_YEAR)
 
         parser.add_argument(      "--maximum-valid-year", dest="maximum_valid_year",
                                   help="The maximum valid year in dates.", type=int, default=cls.MAXIMUM_VALID_YEAR)
 
+        elsgroup= parser.add_mutually_exclusive_group()
+        elsgroup.add_argument(      "--escape-list-separators", dest="escape_list_separators",
+                                    help="Escape all list separators instead of splitting on them.", action='store_true', default=False)
+
+        elsgroup.add_argument(      "--no-escape-list-separators", dest="escape_list_separators",
+                                    help="Do not escape list separators.", action='store_false')
+
     @classmethod
     # Build the value parsing option structure.
     def from_args(cls, args: Namespace)->'KgtkValueOptions':
         return cls(allow_month_or_day_zero=args.allow_month_or_day_zero,
+                   repair_month_or_day_zero=args.repair_month_or_day_zero,
                    allow_language_suffixes=args.allow_language_suffixes,
                    allow_lax_strings=args.allow_lax_strings,
                    allow_lax_lq_strings=args.allow_lax_lq_strings,
                    additional_language_codes=args.additional_language_codes,
                    minimum_valid_year=args.minimum_valid_year,
-                   maximum_valid_year=args.maximum_valid_year)
+                   maximum_valid_year=args.maximum_valid_year,
+                   escape_list_separators=args.escape_list_separators)
 
 DEFAULT_KGTK_VALUE_OPTIONS: KgtkValueOptions = KgtkValueOptions()
 

From fe70fd94a1196876727e1e74eca3303c29ccffb4 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 11:20:32 -0700
Subject: [PATCH 089/278] Rebuild a list when repairing a child of the list.

---
 kgtk/join/kgtkvalue.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index ebbf799e9..8ae0ff089 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -16,6 +16,7 @@
 class KgtkValue(KgtkFormat):
     value: str = attr.ib(validator=attr.validators.instance_of(str))
     options: KgtkValueOptions = attr.ib(validator=attr.validators.instance_of(KgtkValueOptions), default=DEFAULT_KGTK_VALUE_OPTIONS)
+    parent: typing.Optional['KgtkValue'] = attr.ib(default=None)
 
     # Cache some properties of the value that would be expensive to
     # continuously recompute. The class is not frozen because we have these
@@ -101,7 +102,7 @@ def get_list_items(self)->typing.List['KgtkValue']:
             # Populate list_items with a KgtkValue for each item in the list:
             item_value: str
             for item_value in values:
-                self.list_items.append(KgtkValue(item_value, options=self.options))
+                self.list_items.append(KgtkValue(item_value, options=self.options, parent=self))
         return self.list_items
 
     def is_list(self, validate: bool = False)->bool:
@@ -132,6 +133,18 @@ def is_list(self, validate: bool = False)->bool:
         self.valid = True
         return True
 
+    def rebuild_list(self):
+        # Called to repair a list when we've repaired a list item.
+        if self.list_items is None or len(self.list_items) == 0:
+            return
+        
+        values: typing.List[str] = []
+        item: KgtkValue
+        for item in self.list_items:
+            values.append(item.value)
+        self.value = KgtkFormat.LIST_SEPARATOR.join(values)
+        
+
     def _is_number_or_quantity(self)->bool:
         return self.value.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", "."))
 
@@ -810,7 +823,11 @@ def is_date_and_times(self, validate: bool=False)->bool:
                     return False # day 0 was disallowed.
 
         if fixup_needed:
+            # Rapair a month or day zero problem.  If this value is the child
+            #of a list, repair the list parent value, too.
             self.update_date_and_times()
+            if self.parent is not None:
+                self.parent.rebuild_list()
 
         # We are fairly certain that this is a valid date and times.
         self.valid = True

From 111c8b160dc0263302085ef35c10e1b36f6a1590 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 14:10:15 -0700
Subject: [PATCH 090/278] Document the iso8601 extended flag properly.

---
 kgtk/join/kgtkvalue.py | 40 +++++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 8ae0ff089..b89615ee1 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -16,36 +16,39 @@
 class KgtkValue(KgtkFormat):
     value: str = attr.ib(validator=attr.validators.instance_of(str))
     options: KgtkValueOptions = attr.ib(validator=attr.validators.instance_of(KgtkValueOptions), default=DEFAULT_KGTK_VALUE_OPTIONS)
+
+    # TODO: proper validation.
     parent: typing.Optional['KgtkValue'] = attr.ib(default=None)
 
     # Cache some properties of the value that would be expensive to
-    # continuously recompute. The class is not frozen because we have these
-    # cache members.
+    # continuously recompute.
     data_type: typing.Optional[KgtkFormat.DataType] = None
     valid: typing.Optional[bool] = None
 
     # If this is a list, cache a KgtkValue object for each item of the list.
+    #
+    # Note: Please do not access this list directly.  Use get_list_items().
     list_items: typing.Optional[typing.List['KgtkValue']] = None
 
-    # Offer the components of a string or language-qualified string:
+    # Offer the components of a string or language-qualified string, after validating the item.
     contents: typing.Optional[str] = None # String contents without the enclosing quotes
     lang: typing.Optional[str] = None
     suffix: typing.Optional[str] = None # Includes the leading dash.
 
-    # Offer the components of a number or quantity:
+    # Offer the components of a number or quantity, after validating the item.
     number: typing.Optional[str] = None # Note: not converted to int or float
     low_tolerance: typing.Optional[str] = None # Note: not converted to int or float
     high_tolerance: typing.Optional[str] = None # Note: not converted to int or float
     si_units: typing.Optional[str] = None
     wikidata_node: typing.Optional[str] = None
 
-    # Offer the components of a location coordinates:
+    # Offer the components of a location coordinates, after validaating the item:
     latstr: typing.Optional[str] = None
     lat: typing.Optional[float] = None
     lonstr: typing.Optional[str] = None
     lon: typing.Optional[float] = None
 
-    # Offer the components of a date and times:
+    # Offer the components of a date and times, after validating the item:
     yearstr: typing.Optional[str] = None # Note: not converted to int
     year: typing.Optional[int] = None
     monthstr: typing.Optional[str] = None # Note: not converted to int
@@ -57,7 +60,7 @@ class KgtkValue(KgtkFormat):
     secondsstr: typing.Optional[str] = None # Note: not converted to int or float
     zonestr: typing.Optional[str] = None
     precisionstr: typing.Optional[str] = None
-    iso8601basic: typing.Optional[bool] = None # True when hyphens/colons present.
+    iso8601extended: typing.Optional[bool] = None # True when hyphens/colons present.
 
     def is_valid(self)->bool:
         # Is this a valid whatever it is?
@@ -85,6 +88,8 @@ def is_empty(self, validate: bool = False)->bool:
     def get_list_items(self)->typing.List['KgtkValue']:
         # If this is a KGTK List, return a list of KGTK values representing
         # the items in the list.  If this is not a KGTK List, return an empty list.
+        #
+        # Note:  This is the only routine that should touch self.list_items.
         if self.list_items is not None:
             return self.list_items
 
@@ -135,12 +140,13 @@ def is_list(self, validate: bool = False)->bool:
 
     def rebuild_list(self):
         # Called to repair a list when we've repaired a list item.
-        if self.list_items is None or len(self.list_items) == 0:
+        list_items: typng.List[KgtkValues] = self.get_list_items()
+        if list_items is None or len(list_items) == 0:
             return
         
         values: typing.List[str] = []
         item: KgtkValue
-        for item in self.list_items:
+        for item in list_items:
             values.append(item.value)
         self.value = KgtkFormat.LIST_SEPARATOR.join(values)
         
@@ -727,7 +733,7 @@ def is_date_and_times(self, validate: bool=False)->bool:
                 self.day = None
                 self.zonestr = None
                 self.precisionstr = None
-                self.iso8601basic = None
+                self.iso8601extended = None
                 return False
             # We are certain that this is location coordinates, although we haven't checked validity.
             self.data_type = KgtkFormat.DataType.DATE_AND_TIMES
@@ -745,7 +751,7 @@ def is_date_and_times(self, validate: bool=False)->bool:
                 self.day = None
                 self.zonestr = None
                 self.precisionstr = None
-                self.iso8601basic = None
+                self.iso8601extended = None
                 return False
 
         if not validate:
@@ -765,7 +771,7 @@ def is_date_and_times(self, validate: bool=False)->bool:
         self.day = None
         self.zonestr = None
         self.precisionstr = None
-        self.iso8601basic = None
+        self.iso8601extended = None
 
         # Validate the date and times:
         m: typing.Optional[typing.Match] = KgtkValue.lax_date_and_times_re.match(self.value)
@@ -780,7 +786,7 @@ def is_date_and_times(self, validate: bool=False)->bool:
         self.secondsstr = m.group("seconds")
         self.zonestr = m.group("zone")
         self.precisionstr = m.group("precision")
-        self.iso8601basic = m.group("hyphen") is None
+        self.iso8601extended = m.group("hyphen") is not None
 
         fixup_needed: bool = False
 
@@ -836,22 +842,22 @@ def is_date_and_times(self, validate: bool=False)->bool:
     def update_date_and_times(self):
         v: str = "^" + self.yearstr
         if self.monthstr is not None:
-            if not self.iso8601basic:
+            if self.iso8601extended:
                 v += "-"
             v += self.monthstr
         if self.daystr is not None:
-            if not self.iso8601basic:
+            if self.iso8601extended:
                 v += "-"
             v += self.daystr
         if self.hourstr is not None:
             v += "T"
             v += self.hourstr
         if self.minutesstr is not None:
-            if not self.iso8601basic:
+            if self.iso8601extended:
                 v += ":"
             v += self.minutesstr
         if self.secondsstr is not None:
-            if not self.iso8601basic:
+            if self.iso8601extended:
                 v += ":"
             v += self.secondsstr
         if self.zonestr is not None:

From 9572db3af4fa5c675d1114187fe73951c87b064a Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 14:54:54 -0700
Subject: [PATCH 091/278] More field extractions.

---
 kgtk/join/kgtkvalue.py | 163 +++++++++++++++++++++++++++++++----------
 1 file changed, 123 insertions(+), 40 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index b89615ee1..9bdb3331f 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -30,15 +30,25 @@ class KgtkValue(KgtkFormat):
     # Note: Please do not access this list directly.  Use get_list_items().
     list_items: typing.Optional[typing.List['KgtkValue']] = None
 
+    # The following members offer access to the fields of a KgtkValue.
+    # They are accessible immediately after validating the contents
+    # of the KgtkValue object:
+    # obj.is_valid() return True
+    # obj.validate() returns True
+    # obj.revalidate() returns True
+    # obj.is_language_qualified_string(validate=True) returns True
+    #... etc.
+
     # Offer the components of a string or language-qualified string, after validating the item.
     contents: typing.Optional[str] = None # String contents without the enclosing quotes
-    lang: typing.Optional[str] = None
-    suffix: typing.Optional[str] = None # Includes the leading dash.
+    lang: typing.Optional[str] = None # 2- or 3-character code without suffix.
+    suffix: typing.Optional[str] = None # Language code suffix, including the leading dash.
 
     # Offer the components of a number or quantity, after validating the item.
-    number: typing.Optional[str] = None # Note: not converted to int or float
-    low_tolerance: typing.Optional[str] = None # Note: not converted to int or float
-    high_tolerance: typing.Optional[str] = None # Note: not converted to int or float
+    numberstr: typing.Optional[str] = None # Note: not converted to int or float
+    number: typing.Optional[typing.Union[int, float]] = None
+    low_tolerancestr: typing.Optional[str] = None # Note: not converted to int or float
+    high_tolerancestr: typing.Optional[str] = None # Note: not converted to int or float
     si_units: typing.Optional[str] = None
     wikidata_node: typing.Optional[str] = None
 
@@ -49,18 +59,24 @@ class KgtkValue(KgtkFormat):
     lon: typing.Optional[float] = None
 
     # Offer the components of a date and times, after validating the item:
-    yearstr: typing.Optional[str] = None # Note: not converted to int
+    yearstr: typing.Optional[str] = None # Note: before conversion to int
     year: typing.Optional[int] = None
-    monthstr: typing.Optional[str] = None # Note: not converted to int
+    monthstr: typing.Optional[str] = None # Note: before conversion to int
     month: typing.Optional[int] = None
-    daystr: typing.Optional[str] = None # Note: not converted to int
+    daystr: typing.Optional[str] = None # Note: before conversion to int
     day: typing.Optional[int] = None
-    hourstr: typing.Optional[str] = None # Note: not converted to int or float
-    minutesstr: typing.Optional[str] = None # Note: not converted to int or float
-    secondsstr: typing.Optional[str] = None # Note: not converted to int or float
-    zonestr: typing.Optional[str] = None
+    hourstr: typing.Optional[str] = None # Note: before conversion to int or float
+    hour: typing.Optional[int] = None
+    minutesstr: typing.Optional[str] = None # Note: before conversion to int or float
+    minutes: typing.Optional[int] = None
+    secondsstr: typing.Optional[str] = None # Note: before conversion to int or float
+    seconds: typing.Optional[int] = None
+    zonestr: typing.Optional[str] = None # Z or [-+]HH or [-+]HHSS or [-+]HH:SS
     precisionstr: typing.Optional[str] = None
-    iso8601extended: typing.Optional[bool] = None # True when hyphens/colons present.
+    iso8601extended: typing.Optional[bool] = None # True when hyphens/colons are present.
+
+    # Offer the contents of a boolean, after validating the item:
+    truth: typing.Optional[bool] = None
 
     def is_valid(self)->bool:
         # Is this a valid whatever it is?
@@ -179,9 +195,9 @@ def _is_number_or_quantity(self)->bool:
                                                                             long_suffix=long_suffix_pat)
      
     integer_pat: str = r'(?:{decinteger}|{bininteger}|{octinteger}|{hexinteger})'.format(decinteger=decinteger_pat,
-                                                                                        bininteger=bininteger_pat,
-                                                                                        octinteger=octinteger_pat,
-                                                                                        hexinteger=hexinteger_pat)
+                                                                                         bininteger=bininteger_pat,
+                                                                                         octinteger=octinteger_pat,
+                                                                                         hexinteger=hexinteger_pat)
 
     # Floating point literals.
     digitpart_pat: str = r'(?:{digit}(?:_?{digit})*)'.format(digit=digit_pat)
@@ -256,17 +272,17 @@ def is_number_or_quantity(self, validate: bool=False)->bool:
                 return self.is_quantity(validate=validate)
             else:
                 # Clear the number or quantity components:
-                self.number = None
-                self.low_tolerance = None
-                self.high_tolerance = None
+                self.numberstr = None
+                self.low_tolerancestr = None
+                self.high_tolerancestr = None
                 self.si_units = None
                 self.wikidata_node = None
                 return False # Not a number or quantity.
 
         # Clear the number or quantity components:
-        self.number = None
-        self.low_tolerance = None
-        self.high_tolerance = None
+        self.numberstr = None
+        self.low_tolerancestr = None
+        self.high_tolerancestr = None
         self.si_units = None
         self.wikidata_node = None
 
@@ -284,13 +300,24 @@ def is_number_or_quantity(self, validate: bool=False)->bool:
             return False
 
         # Extract the number or quantity components:
-        self.number = m.group("number")
-        self.low_tolerance = m.group("low_tolerance")
-        self.high_tolerance = m.group("high_tolerance")
+        self.numberstr = m.group("number")
+        self.low_tolerancestr = m.group("low_tolerance")
+        self.high_tolerancestr = m.group("high_tolerance")
         self.si_units = m.group("si_units")
         self.wikidata_node = m.group("wikidata_node")
 
-        if self.low_tolerance is not None or self.high_tolerance is not None or self.si_units is not None or self.wikidata_node is not None:
+        # For convenience, convert the numeric part to int or float:
+        #
+        # TODO: go to this extra work only when requested?
+        if self.numberstr is None:
+            raise ValueError("Missing numeric part")
+        n: str = self.numberstr.lower()
+        if "." in n or ("e" in n and not n.startswith("0x")):
+            self.number = float(n)
+        else:
+            self.number = int(n)
+
+        if self.low_tolerancestr is not None or self.high_tolerancestr is not None or self.si_units is not None or self.wikidata_node is not None:
             # We can be certain that this is a quantity.
             self.data_type = KgtkFormat.DataType.QUANTITY
         else:
@@ -322,7 +349,7 @@ def is_number(self, validate: bool=False)->bool:
         if self.data_type is not None:
             if self.data_type != KgtkFormat.DataType.NUMBER:
                 # Clear the number components:
-                self.number = None
+                self.numberstr = None
                 return False
 
             if not validate:
@@ -331,7 +358,7 @@ def is_number(self, validate: bool=False)->bool:
                 return self.valid
         
         # Clear the number components:
-        self.number = None
+        self.numberstr = None
 
         if not self._is_number_or_quantity():
             return False
@@ -342,7 +369,18 @@ def is_number(self, validate: bool=False)->bool:
             return False
 
         # Extract the number components:
-        self.number = m.group("number")
+        self.numberstr = m.group("number")
+
+        # For convenience, convert the numeric part to int or float:
+        #
+        # TODO: go to this extra work only when requested?
+        if self.numberstr is None:
+            raise ValueError("Missing numeric part")
+        n: str = self.numberstr.lower()
+        if "." in n or ("e" in n and not n.startswith("0x")):
+            self.number = float(n)
+        else:
+            self.number = int(n)
 
         # Now we can be certain that this is a number.
         self.data_type = KgtkFormat.DataType.NUMBER
@@ -358,9 +396,9 @@ def is_quantity(self, validate: bool=False)->bool:
         if self.data_type is not None:
             if self.data_type != KgtkFormat.DataType.QUANTITY:
                 # Clear the quantity components:
-                self.number = None
-                self.low_tolerance = None
-                self.high_tolerance = None
+                self.numberstr = None
+                self.low_tolerancestr = None
+                self.high_tolerancestr = None
                 self.si_units = None
                 self.wikidata_node = None
                 return False
@@ -371,9 +409,9 @@ def is_quantity(self, validate: bool=False)->bool:
                 return self.valid
         
         # Clear the quantity components:
-        self.number = None
-        self.low_tolerance = None
-        self.high_tolerance = None
+        self.numberstr = None
+        self.low_tolerancestr = None
+        self.high_tolerancestr = None
         self.si_units = None
         self.wikidata_node = None
 
@@ -386,13 +424,24 @@ def is_quantity(self, validate: bool=False)->bool:
             return False
 
         # Extract the quantity components:
-        self.number = m.group("number")
-        self.low_tolerance = m.group("low_tolerance")
-        self.high_tolerance = m.group("high_tolerance")
+        self.numberstr = m.group("number")
+        self.low_tolerancestr = m.group("low_tolerance")
+        self.high_tolerancestr = m.group("high_tolerance")
         self.si_units = m.group("si_units")
         self.wikidata_node = m.group("wikidata_node")
 
-        if self.low_tolerance is None and self.high_tolerance is None and self.si_units is None and self.wikidata_node is None:
+        # For convenience, convert the numeric part to int or float:
+        #
+        # TODO: go to this extra work only when requested?
+        if self.numberstr is None:
+            raise ValueError("Missing numeric part")
+        n: str = self.numberstr.lower()
+        if "." in n or ("e" in n and not n.startswith("0x")):
+            self.number = float(n)
+        else:
+            self.number = int(n)
+
+        if self.low_tolerancestr is None and self.high_tolerancestr is None and self.si_units is None and self.wikidata_node is None:
             # This is a number, not a quantity
             self.data_type = KgtkFormat.DataType.NUMBER
             self.valid = True
@@ -483,15 +532,21 @@ def is_boolean(self, validate: bool = False)->bool:
         The validate parameter is ignored.
         """
         if self.data_type is not None:
-            return self.data_type == KgtkFormat.DataType.BOOLEAN
+            if self.data_type != KgtkFormat.DataType.BOOLEAN:
+                self.truth = None
+                return False
+            self.truth = self.value == KgtkFormat.TRUE_SYMBOL
+            return True
 
         # Is this a boolean?
         if self.value != KgtkFormat.TRUE_SYMBOL and self.value != KgtkFormat.FALSE_SYMBOL:
+            self.truth = None
             return False
             
         # We are certain this is a valid boolean.
         self.data_type = KgtkFormat.DataType.BOOLEAN
         self.valid = True
+        self.truth = self.value == KgtkFormat.TRUE_SYMBOL
         return True
 
     # Support two or three character language codes.  Suports hyphenated codes
@@ -731,6 +786,9 @@ def is_date_and_times(self, validate: bool=False)->bool:
                 self.year = None
                 self.month = None
                 self.day = None
+                self.hour = None
+                self.minutes = None
+                self.seconds = None
                 self.zonestr = None
                 self.precisionstr = None
                 self.iso8601extended = None
@@ -749,6 +807,9 @@ def is_date_and_times(self, validate: bool=False)->bool:
                 self.year = None
                 self.month = None
                 self.day = None
+                self.hour = None
+                self.minutes = None
+                self.seconds = None
                 self.zonestr = None
                 self.precisionstr = None
                 self.iso8601extended = None
@@ -769,6 +830,9 @@ def is_date_and_times(self, validate: bool=False)->bool:
         self.year = None
         self.month = None
         self.day = None
+        self.hour = None
+        self.minutes = None
+        self.seconds = None
         self.zonestr = None
         self.precisionstr = None
         self.iso8601extended = None
@@ -828,6 +892,25 @@ def is_date_and_times(self, validate: bool=False)->bool:
                 elif not self.options.allow_month_or_day_zero:
                     return False # day 0 was disallowed.
 
+        # Convert the time fields to ints:
+        if self.hourstr is not None:
+            try:
+                self.hour: int = int(self.hourstr)
+            except ValueError:
+                return False # shouldn't happen
+
+        if self.minutesstr is not None:
+            try:
+                self.minutes: int = int(self.minutesstr)
+            except ValueError:
+                return False # shouldn't happen
+
+        if self.secondsstr is not None:
+            try:
+                self.seconds: int = int(self.secondsstr)
+            except ValueError:
+                return False # shouldn't happen
+
         if fixup_needed:
             # Rapair a month or day zero problem.  If this value is the child
             #of a list, repair the list parent value, too.

From ec21ee1f1fa3fce502c076ea06687e3641afc440 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 15:27:37 -0700
Subject: [PATCH 092/278] Provide a map of parsed fields.  Add more debugging.

---
 kgtk/join/kgtkvalue.py | 89 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 85 insertions(+), 4 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 9bdb3331f..583767f3c 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -38,6 +38,9 @@ class KgtkValue(KgtkFormat):
     # obj.revalidate() returns True
     # obj.is_language_qualified_string(validate=True) returns True
     #... etc.
+    #
+    # The fields may be accessed directly, or thet may be obtained in
+    # a map via obj.get_fields()
 
     # Offer the components of a string or language-qualified string, after validating the item.
     contents: typing.Optional[str] = None # String contents without the enclosing quotes
@@ -1098,6 +1101,73 @@ def describe(self)->str:
             return "Symbol" if self.is_symbol(validate=True) else "Invalid Symbol"
         else:
             return "Unknown"
+
+    def get_fields(self)->typing.Mapping[str, typing.Union[str, int, float, bool]]:
+        results: typing.MutableMapping[str, typing.Union[str, int, float, bool]] = { }
+        if self.data_type is not None:
+            results["data_type"] = str(self.data_type)
+        if self.valid is not None:
+            results["valid"] = self.valid
+        if self.contents is not None:
+            results["contents"] = self.contents
+        if self.lang is not None:
+            results["lang"] = self.lang
+        if self.suffix is not None:
+            results["suffix"] = self.suffix
+        if self.numberstr is not None:
+            results["numberstr"] = self.numberstr
+        if self.number is not None:
+            results["number"] = self.number
+        if self.low_tolerancestr is not None:
+            results["low_tolerancestr"] = self.low_tolerancestr
+        if self.high_tolerancestr is not None:
+            results["high_tolerancestr"] = self.high_tolerancestr
+        if self.si_units is not None:
+            results["si_units"] = self.si_units
+        if self.wikidata_node is not None:
+            results["wikidata_node"] = self.wikidata_node
+        if self.latstr is not None:
+            results["latstr"] = self.latstr
+        if self.lat is not None:
+            results["lat"] = self.lat
+        if self.lonstr is not None:
+            results["lonstr"] = self.lonstr
+        if self.lon is not None:
+            results["lon"] = self.lon
+        if self.yearstr is not None:
+            results["yearstr"] = self.yearstr
+        if self.year is not None:
+            results["year"] = self.year
+        if self.monthstr is not None:
+            results["monthstr"] = self.monthstr
+        if self.month is not None:
+            results["month"] = self.month
+        if self.daystr is not None:
+            results["daystr"] = self.daystr
+        if self.day is not None:
+            results["day"] = self.day
+        if self.hourstr is not None:
+            results["hourstr"] = self.hourstr
+        if self.hour is not None:
+            results["hour"] = self.hour
+        if self.minutesstr is not None:
+            results["minutesstr"] = self.minutesstr
+        if self.minutes is not None:
+            results["minutes"] = self.minutes
+        if self.secondsstr is not None:
+            results["secondsstr"] = self.secondsstr
+        if self.seconds is not None:
+            results["seconds"] = self.seconds
+        if self.zonestr is not None:
+            results["zonestr"] = self.zonestr
+        if self.precisionstr is not None:
+            results["precisionstr"] = self.precisionstr
+        if self.iso8601extended is not None:
+            results["iso8601extended"] = self.iso8601extended
+        list_items: typing.List[KgtkValue] = self.get_list_items()
+        if len(list_items) > 0:
+            results["list_len"] = len(list_items)
+        return results
     
 def main():
     """
@@ -1117,12 +1187,23 @@ def main():
     for value in args.values:
         kv: KgtkValue = KgtkValue(value, options=value_options)
         kv.validate()
-        nv: str = kv.value
-        if value == nv:
+        if value == kv.value:
             print("%s: %s" % (value, kv.describe()), flush=True)
         else:
-            print("%s => %s: %s" % (value, nv, kv.describe()), flush=True)
-            
+            print("%s => %s: %s" % (value, kv.value, kv.describe()), flush=True)
+
+        if args.verbose:
+            fields = kv.get_fields()
+            for key in sorted(fields.keys()):
+                print("%s: %s" % (key, str(fields[key])))
+            list_items: typing.List[KgtkValue] = kv.get_list_items()
+            item: KghtValue
+            for item in list_items:
+                print("...")
+                fields = item.get_fields()
+                for key in sorted(fields.keys()):
+                    print("... %s: %s" % (key, str(fields[key])))
+                
 
 if __name__ == "__main__":
     main()

From 49be20069a5ed571f20868b777efbad700cf2766 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 15:29:56 -0700
Subject: [PATCH 093/278] Better documentation.

---
 kgtk/join/kgtkvalue.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 583767f3c..415e5be03 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -30,17 +30,18 @@ class KgtkValue(KgtkFormat):
     # Note: Please do not access this list directly.  Use get_list_items().
     list_items: typing.Optional[typing.List['KgtkValue']] = None
 
-    # The following members offer access to the fields of a KgtkValue.
-    # They are accessible immediately after validating the contents
-    # of the KgtkValue object:
+    # The following members offer access to the components (fields) of a
+    # KgtkValue.  They are accessible immediately after validating the
+    # contents of the KgtkValue object:
+    #
     # obj.is_valid() return True
     # obj.validate() returns True
     # obj.revalidate() returns True
     # obj.is_language_qualified_string(validate=True) returns True
     #... etc.
     #
-    # The fields may be accessed directly, or thet may be obtained in
-    # a map via obj.get_fields()
+    # The fields may be accessed directly from this object or they may be
+    # obtained as a map via obj.get_fields()
 
     # Offer the components of a string or language-qualified string, after validating the item.
     contents: typing.Optional[str] = None # String contents without the enclosing quotes

From 7a79093bc5b6a14d4699228a59400919b05bef65 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 15:39:38 -0700
Subject: [PATCH 094/278] DOcument node or edge file.

---
 kgtk/join/kgtkreader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index 9b338eb49..2c64ef9d6 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -1,5 +1,5 @@
 """
-Read a KGTK edge file in TSV format.
+Read a KGTK node or edge file in TSV format.
 
 TODO: Add support for alternative envelope formats, such as JSON.
 """

From f174a1728b86f5630b8306efec3c82f64e6639eb Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 16:28:49 -0700
Subject: [PATCH 095/278] Change hot value_options are passed.  Pass
 value_options to the edge joiner.  Fix bug in lang_suffix parsing.

---
 kgtk/join/edgejoiner.py | 20 ++++++++++++++++++--
 kgtk/join/edgereader.py |  4 ++--
 kgtk/join/kgtkreader.py |  7 ++++---
 kgtk/join/kgtkvalue.py  |  2 +-
 kgtk/join/nodereader.py |  4 ++--
 5 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/kgtk/join/edgejoiner.py b/kgtk/join/edgejoiner.py
index 83d592a61..dc93907b4 100644
--- a/kgtk/join/edgejoiner.py
+++ b/kgtk/join/edgejoiner.py
@@ -18,6 +18,7 @@
 from kgtk.join.edgereader import EdgeReader
 from kgtk.join.kgtkformat import KgtkFormat
 from kgtk.join.kgtkwriter import KgtkWriter
+from kgtk.join.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=True)
@@ -51,6 +52,10 @@ class EdgeJoiner(KgtkFormat):
     fill_short_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     truncate_long_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
+    # TODO: find a working validator
+    # value_options: typing.Optional[KgtkValueOptions] = attr.ib(attr.validators.optional(attr.validators.instance_of(KgtkValueOptions)), default=None)
+    value_options: typing.Optional[KgtkValueOptions] = attr.ib(default=None)
+
     gzip_in_parallel: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
     verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
@@ -94,6 +99,7 @@ def extract_join_key_set(self, file_path: Path, who: str)->typing.Set[str]:
                                                    long_line_action=self.long_line_action,
                                                    fill_short_lines=self.fill_short_lines,
                                                    truncate_long_lines=self.truncate_long_lines,
+                                                   value_options = self.value_options,
                                                    gzip_in_parallel=self.gzip_in_parallel,
                                                    verbose=self.verbose,
                                                    very_verbose=self.very_verbose)
@@ -181,13 +187,16 @@ def process(self):
                                                          short_line_action=self.short_line_action,
                                                          long_line_action=self.long_line_action,
                                                          fill_short_lines=self.fill_short_lines,
-                                                         truncate_long_lines=self.truncate_long_lines)
+                                                         truncate_long_lines=self.truncate_long_lines,
+                                                         value_options = self.value_options)
+
 
         right_kr: EdgeReader = EdgeReader.open_edge_file(self.right_file_path,
                                                          short_line_action=self.short_line_action,
                                                          long_line_action=self.long_line_action,
                                                          fill_short_lines=self.fill_short_lines,
-                                                         truncate_long_lines=self.truncate_long_lines)
+                                                         truncate_long_lines=self.truncate_long_lines,
+                                                         value_options = self.value_options)
 
         # Map the right column names for the join:
         joined_column_names: typing.List[str]
@@ -256,8 +265,14 @@ def main():
                               help="Remove excess trailing columns in long lines.", action='store_true')
     parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
     parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
+
+    KgtkValueOptions.add_arguments(parser)
+
     args = parser.parse_args()
 
+    # Build the value parsing option structure.
+    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
+
     ej: EdgeJoiner = EdgeJoiner(left_file_path=args.left_file_path,
                                 right_file_path=args.right_file_path,
                                 output_path=args.output_file_path,
@@ -271,6 +286,7 @@ def main():
                                 long_line_action=args.long_line_action,
                                 fill_short_lines=args.fill_short_lines,
                                 truncate_long_lines=args.truncate_long_lines,
+                                value_options=value_options,
                                 gzip_in_parallel=args.gzip_in_parallel,
                                 verbose=args.verbose,
                                 very_verbose=args.very_verbose)
diff --git a/kgtk/join/edgereader.py b/kgtk/join/edgereader.py
index 0d687988e..d4d343148 100644
--- a/kgtk/join/edgereader.py
+++ b/kgtk/join/edgereader.py
@@ -13,7 +13,7 @@
 from kgtk.join.closableiter import ClosableIter
 from kgtk.join.enumnameaction import EnumNameAction
 from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
+from kgtk.join.kgtkvalueoptions import KgtkValueOptions
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=False)
@@ -38,7 +38,7 @@ def open_edge_file(cls,
                        invalid_value_action: ValidationAction = ValidationAction.REPORT,
                        header_error_action: ValidationAction = ValidationAction.EXIT,
                        unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
-                       value_options: KgtkValueOptions = DEFAULT_KGTK_VALUE_OPTIONS,
+                       value_options: typing.Optional[KgtkValueOptions] = None,
                        compression_type: typing.Optional[str] = None,
                        gzip_in_parallel: bool = False,
                        gzip_queue_size: int = KgtkReader.GZIP_QUEUE_SIZE_DEFAULT,
diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index 2c64ef9d6..6b73c8f0e 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -84,7 +84,7 @@ class KgtkReader(KgtkBase, ClosableIter[typing.List[str]]):
 
     # Validate data cell values?
     invalid_value_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.REPORT)
-    value_options: KgtkValueOptions = attr.ib(validator=attr.validators.instance_of(KgtkValueOptions), default=DEFAULT_KGTK_VALUE_OPTIONS)
+    value_options: typing.Optional[KgtkValueOptions] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(KgtkValueOptions)), default=None)
 
     # Repair records with too many or too few fields?
     fill_short_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
@@ -132,7 +132,7 @@ def open(cls,
              invalid_value_action: ValidationAction = ValidationAction.REPORT,
              header_error_action: ValidationAction = ValidationAction.EXIT,
              unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
-             value_options: KgtkValueOptions = DEFAULT_KGTK_VALUE_OPTIONS,
+             value_options: typing.Optional[KgtkValueOptions] = None,
              compression_type: typing.Optional[str] = None,
              gzip_in_parallel: bool = False,
              gzip_queue_size: int = GZIP_QUEUE_SIZE_DEFAULT,
@@ -594,12 +594,13 @@ def _ignore_invalid_values(self, values: typing.List[str], line: str)->bool:
         Returns True to indicate that the row should be ignored (skipped).
 
         """
+        options: KgtkValueOptions = self.value_options if self.value_options is not None else DEFAULT_KGTK_VALUE_OPTIONS
         problems: typing.List[str] = [ ] # Build a list of problems.
         idx: int
         value: str
         for idx, value in enumerate(values):
             if len(value) > 0: # Optimize the common case of empty columns.
-                kv: KgtkValue = KgtkValue(value, options=self.value_options)
+                kv: KgtkValue = KgtkValue(value, options=options)
                 if not kv.is_valid():
                     problems.append("col %d (%s) value '%s'is an %s" % (idx, self.column_names[idx], value, kv.describe()))
 
diff --git a/kgtk/join/kgtkvalue.py b/kgtk/join/kgtkvalue.py
index 415e5be03..9bec2605e 100644
--- a/kgtk/join/kgtkvalue.py
+++ b/kgtk/join/kgtkvalue.py
@@ -555,7 +555,7 @@ def is_boolean(self, validate: bool = False)->bool:
 
     # Support two or three character language codes.  Suports hyphenated codes
     # with a country code or dialect namesuffix after the language code.
-    lax_language_qualified_string_re: typing.Pattern = re.compile(r"^'(?P<contents>.*)'@(?P<lang>[a-zA-Z]{2,3}(?P<suffix>-[a-zA-Z]+)?)$")
+    lax_language_qualified_string_re: typing.Pattern = re.compile(r"^'(?P<contents>.*)'@(?P<lang_suffix>(?P<lang>[a-zA-Z]{2,3})(?P<suffix>-[a-zA-Z]+)?)$")
     strict_language_qualified_string_re: typing.Pattern = re.compile(r"^'(?P<contents>(?:[^'\\]|\\.)*)'@(?P<lang_suffix>(?P<lang>[a-zA-Z]{2,3})(?P<suffix>-[a-zA-Z]+)?)$")
 
     def is_language_qualified_string(self, validate: bool=False)->bool:
diff --git a/kgtk/join/nodereader.py b/kgtk/join/nodereader.py
index 0f83d8b8a..4f4189a6f 100644
--- a/kgtk/join/nodereader.py
+++ b/kgtk/join/nodereader.py
@@ -13,7 +13,7 @@
 from kgtk.join.closableiter import ClosableIter
 from kgtk.join.enumnameaction import EnumNameAction
 from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
+from kgtk.join.kgtkvalueoptions import KgtkValueOptions
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=False)
@@ -37,7 +37,7 @@ def open_node_file(cls,
                        invalid_value_action: ValidationAction = ValidationAction.REPORT,
                        header_error_action: ValidationAction = ValidationAction.EXIT,
                        unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
-                       value_options: KgtkValueOptions = DEFAULT_KGTK_VALUE_OPTIONS,
+                       value_options: typing.Optional[KgtkValueOptions] = None,
                        compression_type: typing.Optional[str] = None,
                        gzip_in_parallel: bool = False,
                        gzip_queue_size: int = KgtkReader.GZIP_QUEUE_SIZE_DEFAULT,

From b969c6625b82761d1aacc9b1063eed17ea2751a8 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 17:37:55 -0700
Subject: [PATCH 096/278] Give better verbose feedback.  Optimize the join key
 processsing.  Accept an error limit.

---
 kgtk/join/edgejoiner.py | 94 ++++++++++++++++++++++++++++++++---------
 1 file changed, 75 insertions(+), 19 deletions(-)

diff --git a/kgtk/join/edgejoiner.py b/kgtk/join/edgejoiner.py
index dc93907b4..2a7044283 100644
--- a/kgtk/join/edgejoiner.py
+++ b/kgtk/join/edgejoiner.py
@@ -58,6 +58,8 @@ class EdgeJoiner(KgtkFormat):
 
     gzip_in_parallel: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
+    error_limit: int = attr.ib(validator=attr.validators.instance_of(int), default=EdgeReader.ERROR_LIMIT_DEFAULT)
+
     verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     very_verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
@@ -94,6 +96,8 @@ def single_column_key_set(self, kr: EdgeReader, join_idx: int)->typing.Set[str]:
         return result
         
     def extract_join_key_set(self, file_path: Path, who: str)->typing.Set[str]:
+        if self.verbose:
+            print("Extracting the %s join key set" % who)
         kr: EdgeReader = EdgeReader.open_edge_file(file_path,
                                                    short_line_action=self.short_line_action,
                                                    long_line_action=self.long_line_action,
@@ -101,6 +105,7 @@ def extract_join_key_set(self, file_path: Path, who: str)->typing.Set[str]:
                                                    truncate_long_lines=self.truncate_long_lines,
                                                    value_options = self.value_options,
                                                    gzip_in_parallel=self.gzip_in_parallel,
+                                                   error_limit=self.error_limit,
                                                    verbose=self.verbose,
                                                    very_verbose=self.very_verbose)
 
@@ -123,24 +128,30 @@ def extract_join_key_set(self, file_path: Path, who: str)->typing.Set[str]:
             return self.single_column_key_set(kr, join_idx) # closes er file
         
 
-    def join_key_sets(self)->typing.Set[str]:
+    def join_key_sets(self)->typing.Optional[typing.Set[str]]:
         """
         Read the input edge files the first time, building the sets of left and right join values.
         """
-        left_join_key_set: typing.Set[str] = self.extract_join_key_set(self.left_file_path, "left")
-        right_join_key_set: typing.Set[str] = self.extract_join_key_set(self.right_file_path, "right")
-
-        joined_key_set: typing.Set[str]
         if self.left_join and self.right_join:
-            # TODO: This joins everything! We can shortut computing these sets.
-            joined_key_set = left_join_key_set.union(right_join_key_set)
+            if self.verbose:
+                print("Outer join, no need to compute join keys.")
+            return None
         elif self.left_join and not self.right_join:
-            joined_key_set = left_join_key_set.copy()
+            if self.verbose:
+                print("Computing the left join key set")
+            return self.extract_join_key_set(self.left_file_path, "left").copy()
+
         elif self.right_join and not self.left_join:
-            joined_key_set = right_join_key_set.copy()
+            if self.verbose:
+                print("Computing the right join key set")
+            return self.extract_join_key_set(self.right_file_path, "right").copy()
+
         else:
-            joined_key_set = left_join_key_set.intersection(right_join_key_set)
-        return joined_key_set
+            if self.verbose:
+                print("Computing the inner join key set")
+            left_join_key_set: typing.Set[str] = self.extract_join_key_set(self.left_file_path, "left")
+            right_join_key_set: typing.Set[str] = self.extract_join_key_set(self.right_file_path, "right")
+            return left_join_key_set.intersection(right_join_key_set)
     
     def merge_columns(self, left_kr: EdgeReader, right_kr: EdgeReader)->typing.Tuple[typing.List[str], typing.List[str]]:
         joined_column_names: typing.List[str] = [ ]
@@ -180,25 +191,33 @@ def merge_columns(self, left_kr: EdgeReader, right_kr: EdgeReader)->typing.Tuple
         return (joined_column_names, right_column_names)
 
     def process(self):
-        joined_key_set: typing.Set[str] = self.join_key_sets()
+        joined_key_set: typing.Optional[typing.Set[str]] = self.join_key_sets()
 
+        if self.verbose:
+            print("Opening the left edge file: %s" % str(self.left_file_path))
         # Open the input files for the second time. This won't work with stdin.
         left_kr: EdgeReader =  EdgeReader.open_edge_file(self.left_file_path,
                                                          short_line_action=self.short_line_action,
                                                          long_line_action=self.long_line_action,
                                                          fill_short_lines=self.fill_short_lines,
                                                          truncate_long_lines=self.truncate_long_lines,
-                                                         value_options = self.value_options)
+                                                         value_options = self.value_options,
+                                                         error_limit=self.error_limit)
 
 
+        if self.verbose:
+            print("Opening the right edge file: %s" % str(self.right_file_path))
         right_kr: EdgeReader = EdgeReader.open_edge_file(self.right_file_path,
                                                          short_line_action=self.short_line_action,
                                                          long_line_action=self.long_line_action,
                                                          fill_short_lines=self.fill_short_lines,
                                                          truncate_long_lines=self.truncate_long_lines,
-                                                         value_options = self.value_options)
+                                                         value_options = self.value_options,
+                                                         error_limit=self.error_limit)
 
-        # Map the right column names for the join:
+
+        if self.verbose:
+            print("Mapping the column names for the join.")
         joined_column_names: typing.List[str]
         right_column_names: typing.List[str]
         (joined_column_names, right_column_names)  = self.merge_columns(left_kr, right_kr)
@@ -209,6 +228,8 @@ def process(self):
             print("mapped right  columns: %s" % " ".join(right_column_names))
             print("       joined columns: %s" % " ".join(joined_column_names))
         
+        if self.verbose:
+            print("Opening the output edge file: %s" % str(self.output_path))
         ew: KgtkWriter = KgtkWriter.open(joined_column_names,
                                          self.output_path,
                                          require_all_columns=False,
@@ -218,21 +239,52 @@ def process(self):
                                          verbose=self.verbose,
                                          very_verbose=self.very_verbose)
 
+        output_data_lines: int = 0
+        left_data_lines_read: int = 0
+        left_data_lines_kept: int = 0
+        right_data_lines_read: int = 0
+        right_data_lines_kept: int = 0
+        
+        if self.verbose:
+            print("Processing the left input file")
         row: typing.list[str]
         left_node1_idx: int = self.node1_column_idx(left_kr, who="left")
         for row in left_kr:
-            left_key: str = self.build_join_key(left_kr, left_node1_idx, row)
-            if left_key in joined_key_set:
+            left_data_lines_read += 1
+            if joined_key_set is None:
                 ew.write(row)
+                output_data_lines += 1
+                left_data_lines_kept += 1
+            else:
+                left_key: str = self.build_join_key(left_kr, left_node1_idx, row)
+                if left_key in joined_key_set:
+                    ew.write(row)
+                    output_data_lines += 1
+                    left_data_lines_kept += 1
 
+        if self.verbose:
+            print("Processing the right input file")
         right_shuffle_list: typing.List[int] = ew.build_shuffle_list(right_column_names)
         right_node1_idx: int = self.node1_column_idx(right_kr, who="right")
         for row in right_kr:
-            right_key: str = self.build_join_key(right_kr, right_node1_idx, row)
-            if right_key in joined_key_set:
+            right_data_lines_read += 1
+            if joined_key_set is None:
                 ew.write(row, shuffle_list=right_shuffle_list)
+                output_data_lines += 1
+                right_data_lines_kept += 1
+            else:
+                right_key: str = self.build_join_key(right_kr, right_node1_idx, row)
+                if right_key in joined_key_set:
+                    ew.write(row, shuffle_list=right_shuffle_list)
+                    output_data_lines += 1
+                    right_data_lines_kept += 1
             
         ew.close()
+        if self.verbose:
+            print("The join is complete")
+            print("%d left input data lines read, %d kept" % (left_data_lines_read, left_data_lines_kept))
+            print("%d right input data lines read, %d kept" % (right_data_lines_read, right_data_lines_kept))
+            print("%d data lines written." % output_data_lines)
         
 def main():
     """
@@ -241,6 +293,9 @@ def main():
     parser = ArgumentParser()
     parser.add_argument(dest="left_file_path", help="The left KGTK file to join", type=Path)
     parser.add_argument(dest="right_file_path", help="The right KGTK file to join", type=Path)
+    parser.add_argument(      "--error-limit", dest="error_limit",
+                              help="The maximum number of errors to report before failing", type=int, default=EdgeReader.ERROR_LIMIT_DEFAULT)
+
     parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=EdgeJoiner.FIELD_SEPARATOR_DEFAULT)
     parser.add_argument(      "--fill-short-lines", dest="fill_short_lines",
                               help="Fill missing trailing columns in short lines with empty values.", action='store_true')
@@ -288,6 +343,7 @@ def main():
                                 truncate_long_lines=args.truncate_long_lines,
                                 value_options=value_options,
                                 gzip_in_parallel=args.gzip_in_parallel,
+                                error_limit=args.error_limit,
                                 verbose=args.verbose,
                                 very_verbose=args.very_verbose)
 

From aede6c1eb396078bcdb08253bfe878013e495c86 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 17:45:08 -0700
Subject: [PATCH 097/278] Flush progress reports to ensure a timely appearance.
  Provide key file names.

---
 kgtk/join/edgejoiner.py | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/kgtk/join/edgejoiner.py b/kgtk/join/edgejoiner.py
index 2a7044283..fcee3ee94 100644
--- a/kgtk/join/edgejoiner.py
+++ b/kgtk/join/edgejoiner.py
@@ -97,7 +97,7 @@ def single_column_key_set(self, kr: EdgeReader, join_idx: int)->typing.Set[str]:
         
     def extract_join_key_set(self, file_path: Path, who: str)->typing.Set[str]:
         if self.verbose:
-            print("Extracting the %s join key set" % who)
+            print("Extracting the %s join key set from %s" % (who, str(file_path)), flush=True)
         kr: EdgeReader = EdgeReader.open_edge_file(file_path,
                                                    short_line_action=self.short_line_action,
                                                    long_line_action=self.long_line_action,
@@ -134,21 +134,21 @@ def join_key_sets(self)->typing.Optional[typing.Set[str]]:
         """
         if self.left_join and self.right_join:
             if self.verbose:
-                print("Outer join, no need to compute join keys.")
+                print("Outer join, no need to compute join keys.", flush=True)
             return None
         elif self.left_join and not self.right_join:
             if self.verbose:
-                print("Computing the left join key set")
+                print("Computing the left join key set", flush=True)
             return self.extract_join_key_set(self.left_file_path, "left").copy()
 
         elif self.right_join and not self.left_join:
             if self.verbose:
-                print("Computing the right join key set")
+                print("Computing the right join key set", flush=True)
             return self.extract_join_key_set(self.right_file_path, "right").copy()
 
         else:
             if self.verbose:
-                print("Computing the inner join key set")
+                print("Computing the inner join key set", flush=True)
             left_join_key_set: typing.Set[str] = self.extract_join_key_set(self.left_file_path, "left")
             right_join_key_set: typing.Set[str] = self.extract_join_key_set(self.right_file_path, "right")
             return left_join_key_set.intersection(right_join_key_set)
@@ -194,7 +194,7 @@ def process(self):
         joined_key_set: typing.Optional[typing.Set[str]] = self.join_key_sets()
 
         if self.verbose:
-            print("Opening the left edge file: %s" % str(self.left_file_path))
+            print("Opening the left edge file: %s" % str(self.left_file_path), flush=True)
         # Open the input files for the second time. This won't work with stdin.
         left_kr: EdgeReader =  EdgeReader.open_edge_file(self.left_file_path,
                                                          short_line_action=self.short_line_action,
@@ -206,7 +206,7 @@ def process(self):
 
 
         if self.verbose:
-            print("Opening the right edge file: %s" % str(self.right_file_path))
+            print("Opening the right edge file: %s" % str(self.right_file_path), flush=True)
         right_kr: EdgeReader = EdgeReader.open_edge_file(self.right_file_path,
                                                          short_line_action=self.short_line_action,
                                                          long_line_action=self.long_line_action,
@@ -217,19 +217,19 @@ def process(self):
 
 
         if self.verbose:
-            print("Mapping the column names for the join.")
+            print("Mapping the column names for the join.", flush=True)
         joined_column_names: typing.List[str]
         right_column_names: typing.List[str]
         (joined_column_names, right_column_names)  = self.merge_columns(left_kr, right_kr)
 
         if self.verbose:
-            print("       left   columns: %s" % " ".join(left_kr.column_names))
-            print("       right  columns: %s" % " ".join(right_kr.column_names))
-            print("mapped right  columns: %s" % " ".join(right_column_names))
-            print("       joined columns: %s" % " ".join(joined_column_names))
+            print("       left   columns: %s" % " ".join(left_kr.column_names), flush=True)
+            print("       right  columns: %s" % " ".join(right_kr.column_names), flush=True)
+            print("mapped right  columns: %s" % " ".join(right_column_names), flush=True)
+            print("       joined columns: %s" % " ".join(joined_column_names), flush=True)
         
         if self.verbose:
-            print("Opening the output edge file: %s" % str(self.output_path))
+            print("Opening the output edge file: %s" % str(self.output_path), flush=True)
         ew: KgtkWriter = KgtkWriter.open(joined_column_names,
                                          self.output_path,
                                          require_all_columns=False,
@@ -246,7 +246,7 @@ def process(self):
         right_data_lines_kept: int = 0
         
         if self.verbose:
-            print("Processing the left input file")
+            print("Processing the left input file: %s" % str(self.left_file_path), flush=True)
         row: typing.list[str]
         left_node1_idx: int = self.node1_column_idx(left_kr, who="left")
         for row in left_kr:
@@ -263,7 +263,7 @@ def process(self):
                     left_data_lines_kept += 1
 
         if self.verbose:
-            print("Processing the right input file")
+            print("Processing the right input file: %s" % str(self.right_file_path), flush=True)
         right_shuffle_list: typing.List[int] = ew.build_shuffle_list(right_column_names)
         right_node1_idx: int = self.node1_column_idx(right_kr, who="right")
         for row in right_kr:
@@ -281,10 +281,10 @@ def process(self):
             
         ew.close()
         if self.verbose:
-            print("The join is complete")
-            print("%d left input data lines read, %d kept" % (left_data_lines_read, left_data_lines_kept))
-            print("%d right input data lines read, %d kept" % (right_data_lines_read, right_data_lines_kept))
-            print("%d data lines written." % output_data_lines)
+            print("The join is complete", flush=True)
+            print("%d left input data lines read, %d kept" % (left_data_lines_read, left_data_lines_kept), flush=True)
+            print("%d right input data lines read, %d kept" % (right_data_lines_read, right_data_lines_kept), flush=True)
+            print("%d data lines written." % output_data_lines, flush=True)
         
 def main():
     """

From c2381984f046f8d813943705e737b3137e1e37a4 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 18:06:06 -0700
Subject: [PATCH 098/278] Give more feedback on the join columns.  Flush the
 output file when done with the left input file.

---
 kgtk/join/edgejoiner.py | 10 +++++++++-
 kgtk/join/kgtkwriter.py |  4 ++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/kgtk/join/edgejoiner.py b/kgtk/join/edgejoiner.py
index fcee3ee94..2b5e35555 100644
--- a/kgtk/join/edgejoiner.py
+++ b/kgtk/join/edgejoiner.py
@@ -97,7 +97,7 @@ def single_column_key_set(self, kr: EdgeReader, join_idx: int)->typing.Set[str]:
         
     def extract_join_key_set(self, file_path: Path, who: str)->typing.Set[str]:
         if self.verbose:
-            print("Extracting the %s join key set from %s" % (who, str(file_path)), flush=True)
+            print("Extracting the join key set from the %s input file: %s" % (who, str(file_path)), flush=True)
         kr: EdgeReader = EdgeReader.open_edge_file(file_path,
                                                    short_line_action=self.short_line_action,
                                                    long_line_action=self.long_line_action,
@@ -113,15 +113,21 @@ def extract_join_key_set(self, file_path: Path, who: str)->typing.Set[str]:
             raise ValueError("The %s file is not an edge file" % who)
         
         join_idx: int = self.node1_column_idx(kr, who)
+        if self.verbose:
+            print("Joining on node1 (index %s in the %s input file)" % (join_idx, who))
 
         # join_on_label and join_on_node2 may be specified
         if self.join_on_label or self.join_on_node2:
             if self.join_on_label:
                 if kr.label_column_idx < 0:
                     raise ValueError("join_on_label may not be used because the %s input file does not have a label column." % who)
+                if self.verbose:
+                    print("Joining on label (index %s in the %s input file)" % (kr.label_column_idx, who))
             if self.join_on_node2:
                 if kr.node2_column_idx < 0:
                     raise ValueError("join_on_node2 may not be used because the %s input file does not have a node2 column." % who)
+                if self.verbose:
+                    print("Joining on node2 (index %s in the %s input file)" % (kr.node2_column_idx, who))
             return self.multi_column_key_set(kr, join_idx) # closes er file
         else:
             # This uses optimized code:
@@ -261,6 +267,8 @@ def process(self):
                     ew.write(row)
                     output_data_lines += 1
                     left_data_lines_kept += 1
+        # Flush the output file so far:
+        ew.flush()
 
         if self.verbose:
             print("Processing the right input file: %s" % str(self.right_file_path), flush=True)
diff --git a/kgtk/join/kgtkwriter.py b/kgtk/join/kgtkwriter.py
index 2aac2f7e1..367b6f88f 100644
--- a/kgtk/join/kgtkwriter.py
+++ b/kgtk/join/kgtkwriter.py
@@ -289,6 +289,10 @@ def write(self, values: typing.List[str],
             sys.stdout.write(".")
             sys.stdout.flush()
 
+    def flush(self):
+        if self.gzip_thread is None:
+            self.file_out.flush()
+
     def close(self):
         if self.gzip_thread is not None:
             self.gzip_thread.close()

From 41378a836020592932588c205b867c53ddf79d7c Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 18:10:49 -0700
Subject: [PATCH 099/278] Give feedback on the number of join keys.

---
 kgtk/join/edgejoiner.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/kgtk/join/edgejoiner.py b/kgtk/join/edgejoiner.py
index 2b5e35555..b70101422 100644
--- a/kgtk/join/edgejoiner.py
+++ b/kgtk/join/edgejoiner.py
@@ -138,6 +138,7 @@ def join_key_sets(self)->typing.Optional[typing.Set[str]]:
         """
         Read the input edge files the first time, building the sets of left and right join values.
         """
+        join_key_set: typing.Set[str]
         if self.left_join and self.right_join:
             if self.verbose:
                 print("Outer join, no need to compute join keys.", flush=True)
@@ -145,19 +146,32 @@ def join_key_sets(self)->typing.Optional[typing.Set[str]]:
         elif self.left_join and not self.right_join:
             if self.verbose:
                 print("Computing the left join key set", flush=True)
-            return self.extract_join_key_set(self.left_file_path, "left").copy()
+            join_key_set = self.extract_join_key_set(self.left_file_path, "left").copy()
+            if self.verbose:
+                print("There are %d keys in the left join key set." % len(join_key_set))
+            return join_key_set
 
         elif self.right_join and not self.left_join:
             if self.verbose:
                 print("Computing the right join key set", flush=True)
-            return self.extract_join_key_set(self.right_file_path, "right").copy()
+            join_key_set = self.extract_join_key_set(self.right_file_path, "right").copy()
+            if self.verbose:
+                print("There are %d keys in the right join key set." % len(join_key_set))
+            return join_key_set
 
         else:
             if self.verbose:
                 print("Computing the inner join key set", flush=True)
             left_join_key_set: typing.Set[str] = self.extract_join_key_set(self.left_file_path, "left")
+            if self.verbose:
+                print("There are %d keys in the left file key set." % len(left_join_key_set))
             right_join_key_set: typing.Set[str] = self.extract_join_key_set(self.right_file_path, "right")
-            return left_join_key_set.intersection(right_join_key_set)
+            if self.verbose:
+                print("There are %d keys in the right file key set." % len(right_join_key_set))
+            join_key_set = left_join_key_set.intersection(right_join_key_set)
+            if self.verbose:
+                print("There are %d keys in the inner join key set." % len(join_key_set))
+            return joiin_key_set
     
     def merge_columns(self, left_kr: EdgeReader, right_kr: EdgeReader)->typing.Tuple[typing.List[str], typing.List[str]]:
         joined_column_names: typing.List[str] = [ ]

From 5dfb02a3df62493be336a4ff1d203a342324cbd0 Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Wed, 6 May 2020 19:00:59 -0700
Subject: [PATCH 100/278] vector embedding: add support for dimension value
 set, pca, property-value setting. Please refer to readme update for details.

---
 kgtk/cli/text_embedding.py        | 104 ++++++-----
 kgtk/cli/text_embedding_README.md |  42 +++--
 kgtk/gt/embedding_utils.py        | 296 +++++++++++++++++-------------
 3 files changed, 255 insertions(+), 187 deletions(-)

diff --git a/kgtk/cli/text_embedding.py b/kgtk/cli/text_embedding.py
index 6083da388..ed73266dd 100644
--- a/kgtk/cli/text_embedding.py
+++ b/kgtk/cli/text_embedding.py
@@ -130,36 +130,36 @@ def main(**kwargs):
         import re
         import argparse
         import pickle
+        from collections import defaultdict
         from kgtk.gt.embedding_utils import EmbeddingVector
 
         # get input parameters from kwargs
         output_uri = kwargs.get("output_uri", "")
         parallel_count = kwargs.get("parallel_count", "1")
-        black_list_files = kwargs.get("black_list_files", "")
+        black_list_files = kwargs.get("black_list_files", [])
         all_models_names = kwargs.get("all_models_names", ['bert-base-wikipedia-sections-mean-tokens'])
         input_format = kwargs.get("input_format", "kgtk_format")
         input_uris = kwargs.get("input_uris", [])
         output_format = kwargs.get("output_format", "kgtk_format")
         property_labels_files = kwargs.get("property_labels_file_uri", [])
         query_server = kwargs.get("query_server")
-        properties = dict()
-        all_property_relate_inputs = [kwargs.get("label_properties", ["label"]),
-                                      kwargs.get("description_properties", ["description"]),
-                                      kwargs.get("isa_properties", ["P31"]),
-                                      kwargs.get("has_properties", ["all"]),
-                                      ]
-        all_required_properties = ["label_properties", "description_properties",
-                                   "isa_properties", "has_properties"]
-        cache_config = {"use_cache": kwargs.get("use_cache", False),
-                        "host": kwargs.get("cache_host", "dsbox01.isi.edu"),
-                        "port": kwargs.get("cache_port", 6379)
-                        }
-        for each_property, each_input in zip(all_required_properties, all_property_relate_inputs):
-            for each in each_input:
-                properties[each] = each_property
+
+        cache_config = {
+            "use_cache": kwargs.get("use_cache", False),
+            "host": kwargs.get("cache_host", "dsbox01.isi.edu"),
+            "port": kwargs.get("cache_port", 6379)
+        }
+
+        sentence_properties = {
+            "label_properties": kwargs.get("label_properties", ["label"]),
+            "description_properties": kwargs.get("description_properties", ["description"]),
+            "isa_properties": kwargs.get("isa_properties", ["P31"]),
+            "has_properties": kwargs.get("has_properties", ["all"]),
+            "property_values": kwargs.get("property_values", [])
+        }
 
         output_properties = {
-            "metatada_properties": kwargs.get("metatada_properties", []),
+            "metadata_properties": kwargs.get("metadata_properties", []),
             "output_properties": kwargs.get("output_properties", "text_embedding")
         }
 
@@ -173,8 +173,8 @@ def main(**kwargs):
             raise KGTKException("No input file path given!")
 
         if output_uri == "":
-            output_uri = os.getenv("HOME")  # os.getcwd()
-        if black_list_files != "":
+            output_uri = os.getenv("HOME")
+        if black_list_files:
             black_list_set = load_black_list_files(black_list_files)
         else:
             black_list_set = set()
@@ -184,7 +184,8 @@ def main(**kwargs):
         else:
             property_labels_dict = {}
 
-        run_TSNE = kwargs.get("run_TSNE", True)
+        dimensional_reduction = kwargs.get("dimensional_reduction", "none")
+        dimension_val = kwargs.get("dimension_val", 2)
 
         for each_model_name in all_models_names:
             for each_input_file in input_uris:
@@ -192,13 +193,14 @@ def main(**kwargs):
                 process = EmbeddingVector(each_model_name, query_server=query_server, cache_config=cache_config,
                                           parallel_count=parallel_count)
                 process.read_input(file_path=each_input_file, skip_nodes_set=black_list_set,
-                                   input_format=input_format, target_properties=properties,
+                                   input_format=input_format, target_properties=sentence_properties,
                                    property_labels_dict=property_labels_dict)
                 process.get_vectors()
                 process.plot_result(output_properties=output_properties,
                                     input_format=input_format, output_uri=output_uri,
-                                    run_TSNE=run_TSNE, output_format=output_format)
-                process.evaluate_result()
+                                    dimensional_reduction=dimensional_reduction, dimension_val=dimension_val,
+                                    output_format=output_format)
+                # process.evaluate_result()
                 _logger.info("*" * 20 + "finished" + "*" * 20)
     except Exception as e:
         _logger.debug(e, exc_info=True)
@@ -212,17 +214,7 @@ def parser():
 
 
 def add_arguments(parser):
-    import argparse
-    def str2bool(v):
-        if isinstance(v, bool):
-            return v
-        if v.lower() in ('yes', 'true', 't', 'y', '1'):
-            return True
-        elif v.lower() in ('no', 'false', 'f', 'n', '0'):
-            return False
-        else:
-            raise argparse.ArgumentTypeError('Boolean value expected.')
-
+    from kgtk.gt.embedding_utils import str2bool
     parser.accept_shared_argument('_debug')
     # logging level, no longer need as there is a global --debug choice for it
     # parser.add_argument('-l', '--logging-level', action='store', dest='logging_level',
@@ -243,27 +235,32 @@ def str2bool(v):
                         help="the input file format, could either be `test_format` or `kgtk_format`, default is `kgtk_format`", )
     parser.add_argument('-p', '--property-labels-file', action='store', nargs='+',
                         dest='property_labels_file_uri', help="the path to the property labels file.", )
+
     # properties (only valid for kgtk format input/output data)
     parser.add_argument('--label-properties', action='store', nargs='+',
                         dest='label_properties', default=["label"],
-                        help="""The names of the eges for label properties, Default is ["label"]. \n 
+                        help="""The names of the edges for label properties, Default is ["label"]. \n 
                         This argument is only valid for input in kgtk format.""")
     parser.add_argument('--description-properties', action='store', nargs='+',
                         dest='description_properties', default=["description"],
-                        help="""The names of the eges for description properties, Default is ["description"].\n 
+                        help="""The names of the edges for description properties, Default is ["description"].\n 
                         This argument is only valid for input in kgtk format.""")
     parser.add_argument('--isa-properties', action='store', nargs='+',
                         dest='isa_properties', default=["P31"],
-                        help="""The names of the eges for `isa` properties, Default is ["P31"] (the `instance of` node in 
-                        wikidata).\n This argument is only valid for input in kgtk format.""")
+                        help="""The names of the edges for `isa` properties, Default is ["P31"] (the `instance of` node in 
+                        wikidata).""")
     parser.add_argument('--has-properties', action='store', nargs='+',
                         dest='has_properties', default=["all"],
-                        help="""The names of the eges for `has` properties, Default is ["all"] (will automatically append all 
-                        properties found for each node).\n This argument is only valid for input in kgtk format.""")
+                        help="""The names of the edges for `has` properties, Default is ["all"] (will automatically append all 
+                        properties found for each node).""")
+    parser.add_argument('--property-value', action='store', nargs='+',
+                        dest='property_values', default=[],
+                        help="""For those edges found in `has` properties, the nodes specified here will display with 
+                        corresponding edge(property) values. instead of edge name. """)
     parser.add_argument('--output-property', action='store',
                         dest='output_properties', default="text_embedding",
-                        help="""The output property name used to record the embedding. Default is `output_properties`. \nThis 
-                        argument is only valid for output in kgtk format.""")
+                        help="""The output property name used to record the embedding. Default is `output_properties`. \n
+                        This argument is only valid for output in kgtk format.""")
     # output
     parser.add_argument('-o', '--embedding-projector-metadata-path', action='store', dest='output_uri', default="",
                         help="output path for the metadata file, default will be current user's home directory")
@@ -272,18 +269,27 @@ def str2bool(v):
                         help="output format, can either be `tsv_format` or `kgtk_format`. \nIf choose `tsv_format`, the output "
                              "will be a tsv file, with each row contains only the vector representation of a node. Each "
                              "dimension is separated by a tab")
-    parser.add_argument('--embedding-projector-metatada', action='store', nargs='+',
-                        dest='metatada_properties', default=[],
+    parser.add_argument('--embedding-projector-metadata', action='store', nargs='+',
+                        dest='metadata_properties', default=[],
                         help="""list of properties used to construct a metadata file for use in the Google Embedding Projector: 
                         http://projector.tensorflow.org. \n Default: the label and description of each node.""")
+
     # black list file
     parser.add_argument('-b', '--black-list', nargs='+', action='store', dest='black_list_files',
-                        default="",
+                        default=[],
                         help="the black list file, contains the Q nodes which should not consider as candidates.")
-    # run tsne or not
-    parser.add_argument("--run-TSNE", type=str2bool, nargs='?', action='store',
-                        default=True, dest="run_TSNE",
-                        help="whether to run TSNE or not after the embedding, default is true.")
+
+    # dimensional reduction relate
+    parser.add_argument("--dimensional-reduction", nargs='?', action='store',
+                        default="none", dest="dimensional_reduction", choices=("pca", "tsne", "none"),
+                        help='whether to run dimensional reduction algorithm or not after the embedding, default is None (not '
+                             'run). '
+                        )
+    parser.add_argument("--dimension", type=int, nargs='?', action='store',
+                        default=2, dest="dimension_val",
+                        help='How many dimension should remained after reductions, only valid when set to run dimensional '
+                             'reduction, default value is 2 '
+                        )
 
     parser.add_argument("--parallel", nargs='?', action='store',
                         default="1", dest="parallel_count",
diff --git a/kgtk/cli/text_embedding_README.md b/kgtk/cli/text_embedding_README.md
index a05bf3ba7..17753bd8c 100644
--- a/kgtk/cli/text_embedding_README.md
+++ b/kgtk/cli/text_embedding_README.md
@@ -22,22 +22,22 @@ kgtk text_embedding \
     --embedding-projector-path/ -o <string> # optional, default is the home directory of current user
     --black-list/ -b <string> # optional,default is None
     --logging-level/ -l <string> \ # optional, default is `info`
-    --run-TSNE False # optional, default is True
+    --dimensional-reduction pca \ # optional, default is none
+    --dimension 5 \ #optional, default is 2
     --parallel 4 # optional, default is 1
 ```
 ##### Example 1:
 For easiest running, just give the input file as 
 `kgtk text_embedding -i input_file.csv`
 ##### Example 2:
-Running with more specific parameters and not run TSNE (output original embedding vectors):
+Running with more specific parameters and then run TSNE to reduce output dimension:
 ```
-kgtk text_embedding \ 
+kgtk text_embedding --debug \ 
     --input test_edges_file.tsv \
     --model bert-base-wikipedia-sections-mean-tokens bert-base-nli-cls-token \
     --label-properties P1449 P1559 \
     --description-properties P94 \
-    --logging-level debug \
-    --run-TSNE false
+    --dimensional-reduction tsne
 ```
 ##### Example 3:
 Running with test format input and tsv output(for visulization at google embedding projector)
@@ -116,20 +116,40 @@ an ordered list of properties. When a property contains multiple values, the fir
 If not given, the program will try to use the default edge(property) name as `P279`. Those words in properties will be for vector embedding later.
 
 ##### --has-properties
-an ordered list of properties. The output consists of a comma-separated text with the labels of the properties, using and for the last item, e.g., “country, place of birth, religion and canonization status” 
+an ordered list of properties. The output consists of a comma-separated text with the labels of the properties, using and for the last item, e.g., “country, place of birth, religion and canonization status” .
 If not given, the program will use all of the found properties found for the node. Those words in properties will be for vector embedding later.
 
+##### --property-value
+If the properties in `has-properties` is a property which need to check for details, specify the edge name here and the system will go further to get the property values of this node instead of use the name of this edge. Default is empty `[]`
+For example: For wikidata node `Q41421` (Michael Jordan) `P544` (member of sports team), if specified here, the generated sentence will be "Michael Jordan has Chicago Bulls" instead of "Michael Jordan has member of sports team". 
+
 ##### --out-properties
 the property used to record the embedding. If not given, the program will use the edge(property) name as `text_embedding`.
 This option is only available when output format is set to `kgtk_format`.
 
+##### --property-labels-file
+This parameter only works for KGTK format input. For some condition, KGTK format's value is just a reference to another P node. In this condition, user need to specify another label file for KGTK to read.
+For example, if run without the labels file on the wikidata dump file, we will get some generated sentence like:
+`WALS genus code is a Q19847637, Q20824104, and has P1466 and P1468` (sentence generated for P1467). After add the labels file, we will get the correct sentence as: `WALS genus code is a Wikidata property for an identifier, Wikidata property for items about languages, and has WALS family code and WALS lect code`.
+This property labels file should also be a KGTK format file. One example file is [here](https://drive.google.com/open?id=1F7pb4LEx5MT1YTqycUCQcs8H2OWmBbB6 "here") (accessed only available for KGTK developers).
+
+
+#### Dimensional Reduction Algorithm
+
+##### --dimensional-reduction
+User can choose to whether run some dimensional reduction algorithm to reduce the output vector dimensions. Default is not run. 
+Currently 3 choices can be made:
+- `none`: not run dimensional reduction algorithm)
+- `tsne`: run TSNE algorithm, note that TSNE only works for some special dimensional number
+- `pca`: run PCA alogirhtm
+
+##### --dimension
+If specified to run dimensional algorithm, user can run with this choice to specify how many dimensions to keep for the final vector output.
+
 ### Output
 There will be 2 part of files:
-##### --run-TSNE
-User can choose to whether run TSNE to reduce the dimension of the output vectors after getting the embeding vectors or not. The default is True.
-
 ##### Logger file
-User can set up the different logging level to records different infomation. Default is `warning` level. Available options are: `debug / info / warning / error / none`. If set to `none`, no logging file will generate.
+If passed with global parameter `--debug`, an extra debugging logger file will be stored at user's home directory.
 
 ##### Metadata File
 User can specify where to store the metadata file for the vectors. If not given, the default is to save the metadata file at user's home directly. If set to `none`, no metadata file will generate.
@@ -152,7 +172,7 @@ Third column is the embeded vecotrs.
 You can also set up the parallel count to some number larger than 1 to run in multiprocess mode. Currently only support for kgtk format input data. For example: `--parallel 4`
 
 ##### Reduced Embedding Vectors
-This will have embedded vectors values after running TSNE and reduced dimension to 2-dimensions for each Q nodes. This is used for visulization. (for example, you can view it at Google's online tools here: http://projector.tensorflow.org/)
+This will have embedded vectors values after running dimensional reduction algorithm and reduced dimension to 2-dimensions for each Q nodes. This is used for visulization. (for example, you can view it at Google's online tools here: http://projector.tensorflow.org/)
 3. Metadata for the generated vectors: This will contains the metadata information for the Q nodes generated from 2 files mentioned above. It will contains the Q node value of each vector, the type (it is a `candidate` or a `ground truth` node), the given label of the Q node and corresponding fetched description information from wikidata.
 
 #### Query / cache related
diff --git a/kgtk/gt/embedding_utils.py b/kgtk/gt/embedding_utils.py
index f4fb45f5f..1e57491f2 100644
--- a/kgtk/gt/embedding_utils.py
+++ b/kgtk/gt/embedding_utils.py
@@ -9,13 +9,13 @@
 import pickle
 import os
 import time
+import argparse
 
 from pyrallel import ParallelProcessor
-from sklearn.manifold import TSNE  # type: ignore
+from collections import defaultdict, OrderedDict
 from tqdm import tqdm  # type: ignore
 from ast import literal_eval
 from sentence_transformers import SentenceTransformer, SentencesDataset, LoggingHandler, losses, models  # type: ignore
-from collections import defaultdict
 from SPARQLWrapper import SPARQLWrapper, JSON, POST, URLENCODED  # type: ignore
 from kgtk.exceptions import KGTKException
 
@@ -23,7 +23,7 @@
 class EmbeddingVector:
     def __init__(self, model_name=None, query_server=None, cache_config: dict = None, parallel_count=1):
         self._logger = logging.getLogger(__name__)
-        if model_name is None:
+        if not model_name:
             self.model_name = 'bert-base-nli-mean-tokens'
         # xlnet need to be trained before using, we can't use this for now
         # elif model_name == "xlnet-base-cased":
@@ -50,22 +50,20 @@ def __init__(self, model_name=None, query_server=None, cache_config: dict = None
             try:
                 _ = self.redis_server.get("foo")
                 self._logger.debug("Cache server {}:{} connected!".format(host, port))
-            except:
+            except Exception as e:
                 self._logger.error("Cache server {}:{} is not able to be connected! Will not use cache!".format(host, port))
+                self._logger.debug(e, exc_info=True)
                 self.redis_server = None
         else:
             self.redis_server = None
         self._parallel_count = int(parallel_count)
         self._logger.debug("Running with {} processes.".format(parallel_count))
-        self.qnodes_descriptions = dict()
         self.vectors_map = dict()
-        self.property_labels_dict = dict()
-        self.q_node_to_label = dict()
-        self.node_labels = dict()
+        self.node_labels = dict()  # this is used to store {node:label} pairs
+        self.candidates = defaultdict(dict)  # this is used to store all node {node:dict()} information
         self.vectors_2D = None
         self.vector_dump_file = None
         self.gt_nodes = set()
-        self.candidates = defaultdict(dict)
         self.metadata = []
         self.gt_indexes = set()
         self.input_format = ""
@@ -115,20 +113,30 @@ def send_sparql_query(self, query_body: str):
             raise KGTKException(error_message)
 
     def _get_labels(self, nodes: typing.List[str]):
-        query_nodes = " ".join(["wd:{}".format(each) for each in nodes])
-        query = """
-        select ?item ?nodeLabel
-        where { 
-          values ?item {""" + query_nodes + """}
-          ?item rdfs:label ?nodeLabel.
-          FILTER(LANG(?nodeLabel) = "en").
-        }
-        """
-        results2 = self.send_sparql_query(query)
-        for each_res in results2:
-            node_id = each_res['item']['value'].split("/")[-1]
-            value = each_res['nodeLabel']['value']
-            self.node_labels[node_id] = value
+        nodes_need_query = set()
+        for each in nodes:
+            if each not in self.node_labels:
+                nodes_need_query.add(each)
+        if nodes_need_query:
+            query_nodes = " ".join(["wd:{}".format(each) for each in nodes_need_query])
+            query = """
+            select ?item ?nodeLabel
+            where { 
+              values ?item {""" + query_nodes + """}
+              ?item rdfs:label ?nodeLabel.
+              FILTER(LANG(?nodeLabel) = "en").
+            }
+            """
+            results2 = self.send_sparql_query(query)
+            for each_res in results2:
+                node_id = each_res['item']['value'].split("/")[-1]
+                nodes_need_query.remove(node_id)
+                value = each_res['nodeLabel']['value']
+                self.node_labels[node_id] = value
+
+            # for those nodes we can't find label, just add this to dict to prevent query again
+            for each_node in nodes_need_query:
+                self.node_labels[each_node] = each_node
 
     def _get_labels_and_descriptions(self, query_qnodes: str, need_find_label: bool, need_find_description: bool):
         query_body = """
@@ -154,83 +162,89 @@ def _get_labels_and_descriptions(self, query_qnodes: str, need_find_label: bool,
             if need_find_description:
                 self.candidates[each_node]["description_properties"] = [description]
 
-    def _get_property_values(self, query_qnodes, query_part_names, query_part_properties):
+    def _get_property_values(self, query_qnodes: str, properties: dict, properties_reversed: dict):
+        """
+        run sparql query to get corresponding property values of given q nodes
+        """
         used_p_node_ids = set()
-        for part_name, part in zip(query_part_names, query_part_properties):
+        all_needed_properties = ""
+        for part_name, part in properties.items():
             if part_name == "isa_properties":
                 self._get_labels(part)
-            for i, each in enumerate(part):
-                if each not in {"label", "description", "all"}:
-                    query_body2 = """
-                    select ?item ?eachPropertyLabel
-                    where {{
-                      values ?item {{{all_nodes}}}
-                    ?item wdt:{qnode} ?eachProperty.
-                      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
-                    }}
-                    """.format(all_nodes=query_qnodes, qnode=each)
-                    results2 = self.send_sparql_query(query_body2)
-
-                    for each_res in results2:
-                        node_id = each_res['item']['value'].split("/")[-1]
-                        value = each_res['eachPropertyLabel']['value']
-                        if part_name == "isa_properties" and self.node_labels[each].endswith("of"):
-                            value = self.node_labels[each] + "||" + value
-                        used_p_node_ids.add(node_id)
-                        if part_name in self.candidates[node_id]:
-                            self.candidates[node_id][part_name].add(value)
-                        else:
-                            self.candidates[node_id][part_name] = {value}
+
+        for each_node, role in properties_reversed.items():
+            if role != {"has_properties"} and each_node not in {"label", "description", "all"}:
+                all_needed_properties += "wdt:{} ".format(each_node)
+
+        query_body = """
+        select ?item ?properties ?eachPropertyValueLabel
+        where {{
+          values ?item {{{all_nodes}}}
+          values ?properties {{{properties}}}
+          ?item ?properties ?eachPropertyValue.
+          SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
+        }}
+        """.format(all_nodes=query_qnodes, properties=all_needed_properties)
+        results = self.send_sparql_query(query_body)
+
+        for each_res in results:
+            node_id = each_res['item']['value'].split("/")[-1]
+            node_property = each_res['properties']['value'].split("/")[-1]
+            roles = properties_reversed[node_property]
+            value = each_res['eachPropertyValueLabel']['value']
+            if node_property in properties["isa_properties"] and self.node_labels[node_property].endswith("of"):
+                value = self.node_labels[node_property] + "||" + value
+            used_p_node_ids.add(node_property)
+            for each_role in roles:
+                if each_role != "property_values":
+                    if each_role in self.candidates[node_id]:
+                        self.candidates[node_id][each_role].add(value)
+                    else:
+                        self.candidates[node_id][each_role] = {value}
         return used_p_node_ids
 
-    def _get_all_properties(self, query_qnodes, used_p_node_ids, properties_list):
-        has_properties_set = set(properties_list[3])
+    def _get_all_properties(self, query_qnodes: str, used_p_node_ids: set, properties: dict):
+        """
+        run sparql query to get all properties of given q nodes
+        """
+        has_properties_set = set(properties["has_properties"])
         query_body3 = """
-                            select DISTINCT ?item ?p_entity ?p_entityLabel
-                            where {
-                              values ?item {""" + query_qnodes + """}
-                              ?item ?p ?o.
-                              FILTER regex(str(?p), "^http://www.wikidata.org/prop/P", "i")
-                              BIND (IRI(REPLACE(STR(?p), "http://www.wikidata.org/prop", "http://www.wikidata.org/entity")) AS ?p_entity) .
-                              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
-                            }
-                        """
+            select DISTINCT ?item ?p_entity ?p_entityLabel
+            where {
+              values ?item {""" + query_qnodes + """}
+              ?item ?p ?o.
+              FILTER regex(str(?p), "^http://www.wikidata.org/prop/P", "i")
+              BIND (IRI(REPLACE(STR(?p), "http://www.wikidata.org/prop", "http://www.wikidata.org/entity")) AS ?p_entity) .
+              SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
+            }
+            """
         results3 = self.send_sparql_query(query_body3)
         for each in results3:
             node_name = each['item']['value'].split("/")[-1]
             p_node_id = each['p_entity']['value'].split("/")[-1]
             p_node_label = each['p_entityLabel']['value']
             if p_node_id not in used_p_node_ids:
-                if properties_list[3] == ["all"] or p_node_id in has_properties_set:
+                if has_properties_set == {"all"} or p_node_id in has_properties_set:
                     if "has_properties" in self.candidates[node_name]:
                         self.candidates[node_name]["has_properties"].add(p_node_label)
                     else:
                         self.candidates[node_name]["has_properties"] = {p_node_label}
 
-    def get_item_description(self, qnodes: typing.List[str] = None, target_properties: dict = {}):
+    def get_item_description(self, target_properties: dict, properties_reversed: dict,
+                             qnodes: typing.Union[set, typing.List[str]]):
         """
             use sparql query to get the descriptions of given Q nodes
         """
-        if qnodes is None:
-            qnodes = self.candidates
-        if "all" in target_properties:
-            find_all_properties = True
-        else:
-            find_all_properties = False
-        properties_list = [[] for _ in range(4)]
-        names = ["labels", "descriptions", "isa_properties", "has_properties"]
-        for k, v in target_properties.items():
-            if v == "label_properties":
-                properties_list[0].append(k)
-            elif v == "description_properties":
-                properties_list[1].append(k)
-            elif v == "isa_properties":
-                properties_list[2].append(k)
-            elif v == "has_properties":
-                properties_list[3].append(k)
+        # find_all_properties = False
+        if "all" in properties_reversed:
+            # find_all_properties = True
+            _ = properties_reversed.pop("all")
+        self._logger.info("Need to find all properties.")
 
         hash_generator = hashlib.md5()
-        hash_generator.update(str(properties_list).encode('utf-8'))
+        # sort to ensure the hash key same
+        target_properties = OrderedDict(sorted(target_properties.items()))
+        hash_generator.update(str(target_properties).encode('utf-8'))
         properties_list_hash = "||" + str(hash_generator.hexdigest())
 
         sentences_cache_dict = {}
@@ -240,7 +254,7 @@ def get_item_description(self, qnodes: typing.List[str] = None, target_propertie
                 cache_res = self.redis_server.get(cache_key)
                 self._logger.debug("Cached key is: {}".format(cache_key))
                 if cache_res is not None:
-                    self._logger.debug("Cache hitted {}".format(cache_key))
+                    self._logger.debug("Cache hit {}".format(cache_key))
                     sentences_cache_dict[each_node] = cache_res.decode("utf-8")
 
         self._logger.debug("Cached for those nodes {} / {}".format(len(sentences_cache_dict), len(qnodes)))
@@ -254,31 +268,22 @@ def get_item_description(self, qnodes: typing.List[str] = None, target_propertie
 
         # only need to do query when we still have remained nodes
         if len(qnodes) > 0:
-            need_find_label = "label" in properties_list[0]
-            need_find_description = "description" in properties_list[1]
             query_qnodes = ""
             for each in qnodes:
                 query_qnodes += "wd:{} ".format(each)
 
+            need_find_label = "label" in target_properties["label_properties"]
+            need_find_description = "description" in target_properties["description_properties"]
             # this is used to get corresponding labels / descriptions
             if need_find_label or need_find_description:
                 self._get_labels_and_descriptions(query_qnodes, need_find_label, need_find_description)
 
-            if len(properties_list[3]) > len(qnodes):
-                # in this condition, we have too many properties need to be queried, it will waste time
-                # query to get all properties then filtering would save more times
-                find_all_properties = True
-                query_part2_names = names[:3]
-                query_part2_properties = properties_list[:3]
-            else:
-                query_part2_names = names
-                query_part2_properties = properties_list
             # this is used to get corresponding labels of properties values
-            used_p_node_ids = self._get_property_values(query_qnodes, query_part2_names, query_part2_properties)
+            used_p_node_ids = self._get_property_values(query_qnodes, target_properties, properties_reversed)
 
             # if need get all properties, we need to run extra query
-            if find_all_properties:
-                self._get_all_properties(query_qnodes, used_p_node_ids, properties_list)
+            # if find_all_properties:
+            self._get_all_properties(query_qnodes, used_p_node_ids, target_properties)
 
         for each_node_id in qnodes:
             each_sentence = self.attribute_to_sentence(self.candidates[each_node_id], each_node_id)
@@ -294,14 +299,14 @@ def get_item_description(self, qnodes: typing.List[str] = None, target_propertie
 
     def _process_one(self, args):
         """
-        one process for multiprocess calling
-        :param args:
-        :return:
+        one process for multiprocess calling, should not be used for any other function
+        :param args: args to receive from main process
+        :return: corresponding node vector and attribute
         """
         node_id = args["node_id"]
         each_node_attributes = args["attribute"]
-        concated_sentence = self.attribute_to_sentence(each_node_attributes, node_id)
-        vectors = self.get_sentences_embedding([concated_sentence], [node_id])[0]
+        concat_sentence = self.attribute_to_sentence(each_node_attributes, node_id)
+        vectors = self.get_sentences_embedding([concat_sentence], [node_id])[0]
         return {"v_" + node_id: vectors, "c_" + node_id: each_node_attributes}
 
     def _multiprocess_collector(self, data):
@@ -313,14 +318,19 @@ def _multiprocess_collector(self, data):
                 k = k.replace("c_", "")
                 self.candidates[k] = v
 
-    def read_input(self, file_path: str, skip_nodes_set: set = None,
-                   input_format: str = "kgtk_format", target_properties: dict = {},
-                   property_labels_dict: dict = {}, black_list_set: set = set()
+    def read_input(self, file_path: str, target_properties: dict, property_labels_dict: dict,
+                   skip_nodes_set: set = None, input_format: str = "kgtk_format",
+                   black_list_set: typing.Optional[set] = None
                    ):
         """
             load the input candidates files
         """
-        self.property_labels_dict = property_labels_dict
+        self.node_labels.update(property_labels_dict)
+        # reverse sentence property to be {property : role)
+        properties_reversed = defaultdict(set)
+        for k, v in target_properties.items():
+            for each_property in v:
+                properties_reversed[each_property].add(k)
 
         if input_format == "test_format":
             self.input_format = input_format
@@ -335,6 +345,7 @@ def read_input(self, file_path: str, skip_nodes_set: set = None,
                 raise KGTKException("Can't find ground truth id column! It should either named as `GT_kg_id` or `kg_id`")
 
             for _, each in input_df.iterrows():
+                temp = []
                 if isinstance(each["candidates"], str):
                     temp = str(each['candidates']).split("|")
                 elif each['candidates'] is np.nan or math.isnan(each['candidates']):
@@ -355,20 +366,20 @@ def read_input(self, file_path: str, skip_nodes_set: set = None,
                 temp.extend(gt_nodes)
 
                 for each_q in temp:
-                    self.q_node_to_label[each_q] = label
+                    self.node_labels[each_q] = label
                     if skip_nodes_set is not None and each_q in skip_nodes_set:
                         to_remove_q.add(each_q)
                 temp = set(temp) - to_remove_q
                 count += len(temp)
                 self.gt_nodes.add(each[gt_column_id])
-                self.get_item_description(temp, target_properties)
+                self.get_item_description(target_properties, properties_reversed, temp)
 
             self._logger.info("Totally {} rows with {} candidates loaded.".format(str(len(gt)), str(count)))
 
         elif input_format == "kgtk_format":
             # assume the input edge file is sorted
-            if "all" in target_properties:
-                _ = target_properties.pop("all")
+            if "all" in properties_reversed:
+                _ = properties_reversed.pop("all")
                 add_all_properties = True
             else:
                 add_all_properties = False
@@ -406,11 +417,14 @@ def read_input(self, file_path: str, skip_nodes_set: set = None,
                 for each_line in f:
                     each_line = each_line.replace("\n", "").split("\t")
                     node_id = each_line[column_references["node"]]
+                    # skip nodes id in black list
+                    if black_list_set and node_id in black_list_set:
+                        continue
+
                     node_property = each_line[column_references["property"]]
                     node_value = each_line[column_references["value"]]
                     # remove @ mark
                     if "@" in node_value and node_value[0] != "@":
-                        node_value_org = node_value
                         node_value = node_value[:node_value.index("@")]
 
                     # remove extra double quote " and single quote '
@@ -423,7 +437,7 @@ def read_input(self, file_path: str, skip_nodes_set: set = None,
                         if current_process_node_id is None:
                             current_process_node_id = node_id
                         else:
-                            # if we get to next id, concate all properties into one sentence to represent the Q node
+                            # if we get to next id, concat all properties into one sentence to represent the Q node
 
                             # for multi process
                             if self._parallel_count > 1:
@@ -431,35 +445,40 @@ def read_input(self, file_path: str, skip_nodes_set: set = None,
                                 pp.add_task(each_arg)
                             # for single process
                             else:
-                                concated_sentence = self.attribute_to_sentence(each_node_attributes, current_process_node_id)
-                                each_node_attributes["sentence"] = concated_sentence
+                                concat_sentence = self.attribute_to_sentence(each_node_attributes, current_process_node_id)
+                                each_node_attributes["sentence"] = concat_sentence
                                 self.candidates[current_process_node_id] = each_node_attributes
 
-                            # after write down finish, we can cleaer and start parsing next one
+                            # after write down finish, we can clear and start parsing next one
                             each_node_attributes = {"has_properties": [], "isa_properties": [], "label_properties": [],
                                                     "description_properties": []}
                             # update to new id
                             current_process_node_id = node_id
 
-                    if node_property in target_properties:
-                        each_node_attributes[target_properties[node_property]].append(node_value)
+                    if node_property in properties_reversed:
+                        roles = properties_reversed[node_property]
+                        if "property_values" in roles:
+                            node_value = self.get_real_label_name(node_value)
+                        for each_role in roles:
+                            if each_role != "property_values":
+                                each_node_attributes[each_role].append(node_value)
                     if add_all_properties and each_line[column_references["value"]][0] == "P":
-                        each_node_attributes["has_properties"].append(node_value)
+                        each_node_attributes["has_properties"].append(self.get_real_label_name(node_value))
 
                 # close multiprocess pool
                 if self._parallel_count > 1:
                     pp.task_done()
                     pp.join()
         else:
-            raise KGTKException("Unkonwn input format {}".format(input_format))
+            raise KGTKException("Unknown input format {}".format(input_format))
 
         self._logger.info("Totally {} Q nodes loaded.".format(len(self.candidates)))
         self.vector_dump_file = "dump_vectors_{}_{}.pkl".format(file_path[:file_path.rfind(".")], self.model_name)
         # self._logger.debug("The cache file name will be {}".format(self.vector_dump_file))
 
     def get_real_label_name(self, node):
-        if node in self.property_labels_dict:
-            return self.property_labels_dict[node]
+        if node in self.node_labels:
+            return self.node_labels[node]
         else:
             return node
 
@@ -489,6 +508,7 @@ def attribute_to_sentence(self, attribute_dict: dict, node_id=None):
                 concated_sentence += " is a "
             elif concated_sentence == "":
                 concated_sentence += "It is a "
+            # remove last ", "
             concated_sentence += temp[:-2]
         if "has_properties" in attribute_dict and len(attribute_dict["has_properties"]) > 0:
             temp = [self.get_real_label_name(each) for each in attribute_dict["has_properties"]]
@@ -580,9 +600,9 @@ def print_vector(self, vectors, output_properties: str = "text_embedding", outpu
                     else:
                         print(str(each_dimension) + "\n", end="")
 
-    def plot_result(self, output_properties={}, input_format="kgtk_format",
+    def plot_result(self, output_properties: dict, input_format="kgtk_format",
                     output_uri: str = "", output_format="kgtk_format",
-                    run_TSNE=True
+                    dimensional_reduction="none", dimension_val=2
                     ):
         """
             transfer the vectors to lower dimension so that we can plot
@@ -590,12 +610,23 @@ def plot_result(self, output_properties={}, input_format="kgtk_format",
         """
         self.vectors_map = {k: v for k, v in sorted(self.vectors_map.items(), key=lambda item: item[0], reverse=True)}
         vectors = list(self.vectors_map.values())
-        # use TSNE to reduce dimension
-        if run_TSNE:
-            self._logger.warning("Start running TSNE to reduce dimension. It will take a long time.")
+        # reduce dimension if needed
+        if dimensional_reduction.lower() == "tsne":
+            self._logger.warning("Start running TSNE to reduce dimension. It will take some time.")
+            start = time.time()
+            from sklearn.manifold import TSNE  # type: ignore
+            self.vectors_2D = TSNE(n_components=int(dimension_val), random_state=0).fit_transform(vectors)
+            self._logger.info("Totally used {} seconds.".format(time.time() - start))
+        elif dimensional_reduction.lower() == "pca":
+            self._logger.warning("Start running PCA to reduce dimension. It will take some time.")
             start = time.time()
-            self.vectors_2D = TSNE(n_components=2, random_state=0).fit_transform(vectors)
+            from sklearn.decomposition import PCA  # type: ignore
+            self.vectors_2D = PCA(n_components=int(dimension_val)).fit_transform(vectors)
             self._logger.info("Totally used {} seconds.".format(time.time() - start))
+        elif dimensional_reduction.lower() == "none":
+            self._logger.info("Not run dimensional reduction algorithm.")
+        else:
+            raise KGTKException("Unknown or unsupport dimensional reduction type: {}".format(dimensional_reduction))
 
         if input_format == "test_format":
             gt_indexes = set()
@@ -605,7 +636,7 @@ def plot_result(self, output_properties={}, input_format="kgtk_format",
 
             self.metadata.append("Q_nodes\tType\tLabel\tDescription")
             for i, each in enumerate(self.vectors_map.keys()):
-                label = self.q_node_to_label[each]
+                label = self.node_labels[each]
                 description = self.candidates[each]["sentence"]
                 if i in gt_indexes:
                     self.metadata.append("{}\tground_truth_node\t{}\t{}".format(each, label, description))
@@ -614,7 +645,7 @@ def plot_result(self, output_properties={}, input_format="kgtk_format",
             self.gt_indexes = gt_indexes
 
         elif input_format == "kgtk_format":
-            if len(output_properties.get("metatada_properties", [])) == 0:
+            if len(output_properties.get("metadata_properties", [])) == 0:
                 for k, v in self.candidates.items():
                     label = v.get("label_properties", "")
                     if len(label) > 0 and isinstance(label, list):
@@ -624,7 +655,7 @@ def plot_result(self, output_properties={}, input_format="kgtk_format",
                         description = description[0]
                     self.metadata.append("{}\t\t{}\t{}".format(k, label, description))
             else:
-                required_properties = output_properties["metatada_properties"]
+                required_properties = output_properties["metadata_properties"]
                 self.metadata.append("node\t" + "\t".join(required_properties))
                 for k, v in self.candidates.items():
                     each_metadata = k + "\t"
@@ -633,7 +664,7 @@ def plot_result(self, output_properties={}, input_format="kgtk_format",
                     self.metadata.append(each_metadata)
 
         metadata_output_path = os.path.join(output_uri, self.vector_dump_file.split("/")[-1])
-        if run_TSNE:
+        if self.vectors_2D is not None:
             self.print_vector(self.vectors_2D, output_properties.get("output_properties"), output_format)
         else:
             self.print_vector(vectors, output_properties.get("output_properties"), output_format)
@@ -674,3 +705,14 @@ def calculate_distance(a, b):
             dist += (v1 - v2) ** 2
         dist = dist ** 0.5
         return dist
+
+
+def str2bool(v):
+    if isinstance(v, bool):
+        return v
+    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')

From fd176e9e71923f1608ccc114c1a597cefbd00ff8 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 19:01:24 -0700
Subject: [PATCH 101/278] Add arbitrary join column names.

---
 kgtk/join/edgejoiner.py | 120 +++++++++++++++++++++++++++-------------
 1 file changed, 83 insertions(+), 37 deletions(-)

diff --git a/kgtk/join/edgejoiner.py b/kgtk/join/edgejoiner.py
index b70101422..e9924eb83 100644
--- a/kgtk/join/edgejoiner.py
+++ b/kgtk/join/edgejoiner.py
@@ -38,6 +38,10 @@ class EdgeJoiner(KgtkFormat):
     join_on_label: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     join_on_node2: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
+    # TODO: Write fuill validators
+    left_join_columns: typing.Optional[typing.List[str]] = attr.ib(default=None)
+    right_join_columns: typing.Optional[typing.List[str]] = attr.ib(default=None)
+
     # The prefix applied to right file column names in the output file:
     prefix: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
 
@@ -72,19 +76,24 @@ def node1_column_idx(self, kr: EdgeReader, who: str)->int:
             raise ValueError("EdgeJoiner: unknown node1 column index in KGTK %s edge type." % who)
         return idx
 
-    def build_join_key(self, kr: EdgeReader, join_idx: int, row: typing.List[str])->str:
-        key: str = row[join_idx]
-        if self.join_on_label:
-            key += self.field_separator+ row[kr.label_column_idx]
-        if self.join_on_node2:
-            key += self.field_separator+ row[kr.node2_column_idx]
+    def build_join_key(self, kr: EdgeReader, join_idx_list: typing.List[int], row: typing.List[str])->str:
+        key: str = ""
+        join_idx: int
+        first: bool = True
+        for join_idx in join_idx_list:
+            if first:
+                first = False
+            else:
+                key += self.field_separator
+                
+            key += row[join_idx]
         return key
 
-    def multi_column_key_set(self, kr: EdgeReader, join_idx: int)->typing.Set[str]:
+    def multi_column_key_set(self, kr: EdgeReader, join_idx_list: typing.List[int])->typing.Set[str]:
         result: typing.Set[str] = set()
         row: typing.List[str]
         for row in kr:
-            result.add(self.build_join_key(kr, join_idx, row))
+            result.add(self.build_join_key(kr, join_idx_list, row))
         return result
         
     # Optimized for a single join column:
@@ -95,26 +104,25 @@ def single_column_key_set(self, kr: EdgeReader, join_idx: int)->typing.Set[str]:
             result.add(row[join_idx])
         return result
         
-    def extract_join_key_set(self, file_path: Path, who: str)->typing.Set[str]:
-        if self.verbose:
-            print("Extracting the join key set from the %s input file: %s" % (who, str(file_path)), flush=True)
-        kr: EdgeReader = EdgeReader.open_edge_file(file_path,
-                                                   short_line_action=self.short_line_action,
-                                                   long_line_action=self.long_line_action,
-                                                   fill_short_lines=self.fill_short_lines,
-                                                   truncate_long_lines=self.truncate_long_lines,
-                                                   value_options = self.value_options,
-                                                   gzip_in_parallel=self.gzip_in_parallel,
-                                                   error_limit=self.error_limit,
-                                                   verbose=self.verbose,
-                                                   very_verbose=self.very_verbose)
+    def build_join_idx_list(self, kr: EdgeReader, who: str, join_columns: typing.Optional[typing.List[str]])->typing.List[int]:
+        join_idx: int
+        join_idx_list: typing.List[int] = [ ]
+        col_num: int = 1
+        if join_columns is not None and len(join_columns) > 0:
+            join_column:str
+            for join_column in join_columns:
+                if join_column not in kr.column_name_map:
+                    raise ValueError("Join column %s not found in in the %s input file" % (join_column, who))
+                join_idx = kr.column_name_map[join_column]
+                if self.verbose:
+                    print("Join column %d: %s (index %d in the %s input file)" % (col_num, join_column, join_idx, who))
+                join_idx_list.append(join_idx)
+            return join_idx_list
 
-        if not kr.is_edge_file:
-            raise ValueError("The %s file is not an edge file" % who)
-        
-        join_idx: int = self.node1_column_idx(kr, who)
+        join_idx = self.node1_column_idx(kr, who)
         if self.verbose:
             print("Joining on node1 (index %s in the %s input file)" % (join_idx, who))
+        join_idx_list.append(join_idx)
 
         # join_on_label and join_on_node2 may be specified
         if self.join_on_label or self.join_on_node2:
@@ -123,15 +131,42 @@ def extract_join_key_set(self, file_path: Path, who: str)->typing.Set[str]:
                     raise ValueError("join_on_label may not be used because the %s input file does not have a label column." % who)
                 if self.verbose:
                     print("Joining on label (index %s in the %s input file)" % (kr.label_column_idx, who))
+                join_idx_list.append(kr.label_column_idx)
+                
             if self.join_on_node2:
                 if kr.node2_column_idx < 0:
                     raise ValueError("join_on_node2 may not be used because the %s input file does not have a node2 column." % who)
                 if self.verbose:
                     print("Joining on node2 (index %s in the %s input file)" % (kr.node2_column_idx, who))
-            return self.multi_column_key_set(kr, join_idx) # closes er file
-        else:
+                join_idx_list.append(kr.node2_column_idx)
+        return join_idx_list
+        
+
+    def extract_join_key_set(self, file_path: Path, who: str, join_columns: typing.Optional[typing.List[str]])->typing.Set[str]:
+        if self.verbose:
+            print("Extracting the join key set from the %s input file: %s" % (who, str(file_path)), flush=True)
+            if join_columns is not None:
+                print("Using join columns: %s" % " ".join(join_columns))
+        kr: EdgeReader = EdgeReader.open_edge_file(file_path,
+                                                   short_line_action=self.short_line_action,
+                                                   long_line_action=self.long_line_action,
+                                                   fill_short_lines=self.fill_short_lines,
+                                                   truncate_long_lines=self.truncate_long_lines,
+                                                   value_options = self.value_options,
+                                                   gzip_in_parallel=self.gzip_in_parallel,
+                                                   error_limit=self.error_limit,
+                                                   verbose=self.verbose,
+                                                   very_verbose=self.very_verbose)
+
+        if not kr.is_edge_file:
+            raise ValueError("The %s file is not an edge file" % who)
+        
+        join_idx_list: typing.List[int] = self.build_join_idx_list(kr, who, join_columns)
+        if len(join_idx_list) == 1:
             # This uses optimized code:
-            return self.single_column_key_set(kr, join_idx) # closes er file
+            return self.single_column_key_set(kr, join_idx_list[0]) # closes er file
+        else:
+            return self.multi_column_key_set(kr, join_idx_list) # closes er file
         
 
     def join_key_sets(self)->typing.Optional[typing.Set[str]]:
@@ -146,7 +181,7 @@ def join_key_sets(self)->typing.Optional[typing.Set[str]]:
         elif self.left_join and not self.right_join:
             if self.verbose:
                 print("Computing the left join key set", flush=True)
-            join_key_set = self.extract_join_key_set(self.left_file_path, "left").copy()
+            join_key_set = self.extract_join_key_set(self.left_file_path, "left", self.left_join_columns).copy()
             if self.verbose:
                 print("There are %d keys in the left join key set." % len(join_key_set))
             return join_key_set
@@ -154,7 +189,7 @@ def join_key_sets(self)->typing.Optional[typing.Set[str]]:
         elif self.right_join and not self.left_join:
             if self.verbose:
                 print("Computing the right join key set", flush=True)
-            join_key_set = self.extract_join_key_set(self.right_file_path, "right").copy()
+            join_key_set = self.extract_join_key_set(self.right_file_path, "right", self.right_join_columns).copy()
             if self.verbose:
                 print("There are %d keys in the right join key set." % len(join_key_set))
             return join_key_set
@@ -162,16 +197,16 @@ def join_key_sets(self)->typing.Optional[typing.Set[str]]:
         else:
             if self.verbose:
                 print("Computing the inner join key set", flush=True)
-            left_join_key_set: typing.Set[str] = self.extract_join_key_set(self.left_file_path, "left")
+            left_join_key_set: typing.Set[str] = self.extract_join_key_set(self.left_file_path, "left", self.left_join_columns)
             if self.verbose:
                 print("There are %d keys in the left file key set." % len(left_join_key_set))
-            right_join_key_set: typing.Set[str] = self.extract_join_key_set(self.right_file_path, "right")
+            right_join_key_set: typing.Set[str] = self.extract_join_key_set(self.right_file_path, "right", self.right_join_columns)
             if self.verbose:
                 print("There are %d keys in the right file key set." % len(right_join_key_set))
             join_key_set = left_join_key_set.intersection(right_join_key_set)
             if self.verbose:
                 print("There are %d keys in the inner join key set." % len(join_key_set))
-            return joiin_key_set
+            return join_key_set
     
     def merge_columns(self, left_kr: EdgeReader, right_kr: EdgeReader)->typing.Tuple[typing.List[str], typing.List[str]]:
         joined_column_names: typing.List[str] = [ ]
@@ -236,6 +271,15 @@ def process(self):
                                                          error_limit=self.error_limit)
 
 
+        # TODO: We ought to do this test sooner.
+        left_join_idx_list: typing.List[int] = self.build_join_idx_list(left_kr, "left", self.left_join_columns)
+        right_join_idx_list: typing.List[int] = self.build_join_idx_list(right_kr, "right", self.right_join_columns)
+        if len(left_join_idx_list) != len(right_join_idx_list):
+            print("the left join key has %d components, the right join key has %d columns. Exiting." % (len(left_join_idx_list), len(right_join_idx_list)))
+            left_kr.close()
+            right_kr.close()
+            return
+
         if self.verbose:
             print("Mapping the column names for the join.", flush=True)
         joined_column_names: typing.List[str]
@@ -268,7 +312,6 @@ def process(self):
         if self.verbose:
             print("Processing the left input file: %s" % str(self.left_file_path), flush=True)
         row: typing.list[str]
-        left_node1_idx: int = self.node1_column_idx(left_kr, who="left")
         for row in left_kr:
             left_data_lines_read += 1
             if joined_key_set is None:
@@ -276,7 +319,7 @@ def process(self):
                 output_data_lines += 1
                 left_data_lines_kept += 1
             else:
-                left_key: str = self.build_join_key(left_kr, left_node1_idx, row)
+                left_key: str = self.build_join_key(left_kr, left_join_idx_list, row)
                 if left_key in joined_key_set:
                     ew.write(row)
                     output_data_lines += 1
@@ -287,7 +330,6 @@ def process(self):
         if self.verbose:
             print("Processing the right input file: %s" % str(self.right_file_path), flush=True)
         right_shuffle_list: typing.List[int] = ew.build_shuffle_list(right_column_names)
-        right_node1_idx: int = self.node1_column_idx(right_kr, who="right")
         for row in right_kr:
             right_data_lines_read += 1
             if joined_key_set is None:
@@ -295,7 +337,7 @@ def process(self):
                 output_data_lines += 1
                 right_data_lines_kept += 1
             else:
-                right_key: str = self.build_join_key(right_kr, right_node1_idx, row)
+                right_key: str = self.build_join_key(right_kr, right_join_idx_list, row)
                 if right_key in joined_key_set:
                     ew.write(row, shuffle_list=right_shuffle_list)
                     output_data_lines += 1
@@ -325,6 +367,7 @@ def main():
     parser.add_argument(      "--join-on-node2", dest="join_on_node2", help="If both input files are edge files, include the node2 column in the join.", action='store_true')
     parser.add_argument(      "--gzip-in-parallel", dest="gzip_in_parallel", help="Execute gzip in parallel.", action='store_true')
     parser.add_argument(      "--left-join", dest="left_join", help="Perform a left outer join.", action='store_true')
+    parser.add_argument(      "--left-join-columns", dest="left_join_columns", help="Left file join columns.", nargs='+')
 
     parser.add_argument(      "--long-line-action", dest="long_line_action",
                               help="The action to take when a long line is detected.",
@@ -333,6 +376,7 @@ def main():
     parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to read", type=Path, default=None)
     parser.add_argument(      "--prefix", dest="prefix", help="The prefix applied to right file column names in the output file.")
     parser.add_argument(      "--right-join", dest="right_join", help="Perform a right outer join.", action='store_true')
+    parser.add_argument(      "--right-join-columns", dest="right_join_columns", help="Right file join columns.", nargs='+')
 
     parser.add_argument(      "--short-line-action", dest="short_line_action",
                               help="The action to take whe a short line is detected.",
@@ -357,6 +401,8 @@ def main():
                                 right_join=args.right_join,
                                 join_on_label=args.join_on_label,
                                 join_on_node2=args.join_on_node2,
+                                left_join_columns=args.left_join_columns,
+                                right_join_columns=args.right_join_columns,
                                 prefix=args.prefix,
                                 field_separator=args.field_separator,
                                 short_line_action=args.short_line_action,

From 4b32b054144a4ba5e2fe082748bc10130f02f61d Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 19:04:17 -0700
Subject: [PATCH 102/278] Rename the file join column arguments.

---
 kgtk/join/edgejoiner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kgtk/join/edgejoiner.py b/kgtk/join/edgejoiner.py
index e9924eb83..0d08725b7 100644
--- a/kgtk/join/edgejoiner.py
+++ b/kgtk/join/edgejoiner.py
@@ -366,8 +366,8 @@ def main():
     parser.add_argument(      "--join-on-label", dest="join_on_label", help="If both input files are edge files, include the label column in the join.", action='store_true')
     parser.add_argument(      "--join-on-node2", dest="join_on_node2", help="If both input files are edge files, include the node2 column in the join.", action='store_true')
     parser.add_argument(      "--gzip-in-parallel", dest="gzip_in_parallel", help="Execute gzip in parallel.", action='store_true')
+    parser.add_argument(      "--left-file-join-columns", dest="left_join_columns", help="Left file join columns.", nargs='+')
     parser.add_argument(      "--left-join", dest="left_join", help="Perform a left outer join.", action='store_true')
-    parser.add_argument(      "--left-join-columns", dest="left_join_columns", help="Left file join columns.", nargs='+')
 
     parser.add_argument(      "--long-line-action", dest="long_line_action",
                               help="The action to take when a long line is detected.",
@@ -375,8 +375,8 @@ def main():
 
     parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to read", type=Path, default=None)
     parser.add_argument(      "--prefix", dest="prefix", help="The prefix applied to right file column names in the output file.")
+    parser.add_argument(      "--right-file-join-columns", dest="right_join_columns", help="Right file join columns.", nargs='+')
     parser.add_argument(      "--right-join", dest="right_join", help="Perform a right outer join.", action='store_true')
-    parser.add_argument(      "--right-join-columns", dest="right_join_columns", help="Right file join columns.", nargs='+')
 
     parser.add_argument(      "--short-line-action", dest="short_line_action",
                               help="The action to take whe a short line is detected.",

From 165e42fd2d842a70953654ec57fc9a300bbbda37 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 19:26:25 -0700
Subject: [PATCH 103/278] Noin node files as well as edge files.

---
 kgtk/join/edgejoiner.py | 153 +++++++++++++++++++++++-----------------
 1 file changed, 90 insertions(+), 63 deletions(-)

diff --git a/kgtk/join/edgejoiner.py b/kgtk/join/edgejoiner.py
index 0d08725b7..4d9923c8d 100644
--- a/kgtk/join/edgejoiner.py
+++ b/kgtk/join/edgejoiner.py
@@ -15,7 +15,7 @@
 import typing
 
 from kgtk.join.enumnameaction import EnumNameAction
-from kgtk.join.edgereader import EdgeReader
+from kgtk.join.kgtkreader import KgtkReader
 from kgtk.join.kgtkformat import KgtkFormat
 from kgtk.join.kgtkwriter import KgtkWriter
 from kgtk.join.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
@@ -62,21 +62,28 @@ class EdgeJoiner(KgtkFormat):
 
     gzip_in_parallel: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
-    error_limit: int = attr.ib(validator=attr.validators.instance_of(int), default=EdgeReader.ERROR_LIMIT_DEFAULT)
+    error_limit: int = attr.ib(validator=attr.validators.instance_of(int), default=KgtkReader.ERROR_LIMIT_DEFAULT)
 
     verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     very_verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
     FIELD_SEPARATOR_DEFAULT: str = KgtkFormat.LIST_SEPARATOR
 
-    def node1_column_idx(self, kr: EdgeReader, who: str)->int:
+    def node1_column_idx(self, kr: KgtkReader, who: str)->int:
         idx: int = kr.node1_column_idx
         if idx < 0:
             # TODO: throw a better exception
-            raise ValueError("EdgeJoiner: unknown node1 column index in KGTK %s edge type." % who)
+            raise ValueError("EdgeJoiner: unknown node1 column index in KGTK %s edge file." % who)
         return idx
 
-    def build_join_key(self, kr: EdgeReader, join_idx_list: typing.List[int], row: typing.List[str])->str:
+    def id_column_idx(self, kr: KgtkReader, who: str)->int:
+        idx: int = kr.id_column_idx
+        if idx < 0:
+            # TODO: throw a better exception
+            raise ValueError("EdgeJoiner: unknown id column index in KGTK %s node file." % who)
+        return idx
+
+    def build_join_key(self, kr: KgtkReader, join_idx_list: typing.List[int], row: typing.List[str])->str:
         key: str = ""
         join_idx: int
         first: bool = True
@@ -89,7 +96,7 @@ def build_join_key(self, kr: EdgeReader, join_idx_list: typing.List[int], row: t
             key += row[join_idx]
         return key
 
-    def multi_column_key_set(self, kr: EdgeReader, join_idx_list: typing.List[int])->typing.Set[str]:
+    def multi_column_key_set(self, kr: KgtkReader, join_idx_list: typing.List[int])->typing.Set[str]:
         result: typing.Set[str] = set()
         row: typing.List[str]
         for row in kr:
@@ -97,32 +104,42 @@ def multi_column_key_set(self, kr: EdgeReader, join_idx_list: typing.List[int])-
         return result
         
     # Optimized for a single join column:
-    def single_column_key_set(self, kr: EdgeReader, join_idx: int)->typing.Set[str]:
+    def single_column_key_set(self, kr: KgtkReader, join_idx: int)->typing.Set[str]:
         result: typing.Set[str] = set()
         row: typing.List[str]
         for row in kr:
             result.add(row[join_idx])
         return result
         
-    def build_join_idx_list(self, kr: EdgeReader, who: str, join_columns: typing.Optional[typing.List[str]])->typing.List[int]:
+    def build_join_idx_list(self, kr: KgtkReader, who: str, join_columns: typing.Optional[typing.List[str]])->typing.List[int]:
         join_idx: int
         join_idx_list: typing.List[int] = [ ]
         col_num: int = 1
         if join_columns is not None and len(join_columns) > 0:
+            if self.verbose:
+                print("Using %s file join columns: %s" % (who, " ".join(join_columns)), flush=True)
             join_column:str
             for join_column in join_columns:
                 if join_column not in kr.column_name_map:
                     raise ValueError("Join column %s not found in in the %s input file" % (join_column, who))
                 join_idx = kr.column_name_map[join_column]
                 if self.verbose:
-                    print("Join column %d: %s (index %d in the %s input file)" % (col_num, join_column, join_idx, who))
+                    print("Join column %d: %s (index %d in the %s input file)" % (col_num, join_column, join_idx, who), flush=True)
                 join_idx_list.append(join_idx)
             return join_idx_list
 
-        join_idx = self.node1_column_idx(kr, who)
-        if self.verbose:
-            print("Joining on node1 (index %s in the %s input file)" % (join_idx, who))
-        join_idx_list.append(join_idx)
+        if kr.is_edge_file:
+            join_idx = self.node1_column_idx(kr, who)
+            if self.verbose:
+                print("Joining on node1 (index %s in the %s input file)" % (join_idx, who), flush=True)
+            join_idx_list.append(join_idx)
+        elif kr.is_node_file:
+            join_idx = self.id_column_idx(kr, who)
+            if self.verbose:
+                print("Joining on id (index %s in the %s input file)" % (join_idx, who), flush=True)
+            join_idx_list.append(join_idx)
+        else:
+            raise ValueError("Unknown file type in build_join_idx_list(...)")
 
         # join_on_label and join_on_node2 may be specified
         if self.join_on_label or self.join_on_node2:
@@ -130,38 +147,35 @@ def build_join_idx_list(self, kr: EdgeReader, who: str, join_columns: typing.Opt
                 if kr.label_column_idx < 0:
                     raise ValueError("join_on_label may not be used because the %s input file does not have a label column." % who)
                 if self.verbose:
-                    print("Joining on label (index %s in the %s input file)" % (kr.label_column_idx, who))
+                    print("Joining on label (index %s in the %s input file)" % (kr.label_column_idx, who), flush=True)
                 join_idx_list.append(kr.label_column_idx)
                 
             if self.join_on_node2:
                 if kr.node2_column_idx < 0:
                     raise ValueError("join_on_node2 may not be used because the %s input file does not have a node2 column." % who)
                 if self.verbose:
-                    print("Joining on node2 (index %s in the %s input file)" % (kr.node2_column_idx, who))
+                    print("Joining on node2 (index %s in the %s input file)" % (kr.node2_column_idx, who), flush=True)
                 join_idx_list.append(kr.node2_column_idx)
         return join_idx_list
         
 
-    def extract_join_key_set(self, file_path: Path, who: str, join_columns: typing.Optional[typing.List[str]])->typing.Set[str]:
+    def extract_join_key_set(self, file_path: Path, who: str, join_idx_list: typing.List[int])->typing.Set[str]:
         if self.verbose:
             print("Extracting the join key set from the %s input file: %s" % (who, str(file_path)), flush=True)
-            if join_columns is not None:
-                print("Using join columns: %s" % " ".join(join_columns))
-        kr: EdgeReader = EdgeReader.open_edge_file(file_path,
-                                                   short_line_action=self.short_line_action,
-                                                   long_line_action=self.long_line_action,
-                                                   fill_short_lines=self.fill_short_lines,
-                                                   truncate_long_lines=self.truncate_long_lines,
-                                                   value_options = self.value_options,
-                                                   gzip_in_parallel=self.gzip_in_parallel,
-                                                   error_limit=self.error_limit,
-                                                   verbose=self.verbose,
-                                                   very_verbose=self.very_verbose)
+        kr: KgtkReader = KgtkReader.open(file_path,
+                                         short_line_action=self.short_line_action,
+                                         long_line_action=self.long_line_action,
+                                         fill_short_lines=self.fill_short_lines,
+                                         truncate_long_lines=self.truncate_long_lines,
+                                         value_options = self.value_options,
+                                         gzip_in_parallel=self.gzip_in_parallel,
+                                         error_limit=self.error_limit,
+                                         verbose=self.verbose,
+                                         very_verbose=self.very_verbose)
 
         if not kr.is_edge_file:
             raise ValueError("The %s file is not an edge file" % who)
-        
-        join_idx_list: typing.List[int] = self.build_join_idx_list(kr, who, join_columns)
+
         if len(join_idx_list) == 1:
             # This uses optimized code:
             return self.single_column_key_set(kr, join_idx_list[0]) # closes er file
@@ -169,7 +183,7 @@ def extract_join_key_set(self, file_path: Path, who: str, join_columns: typing.O
             return self.multi_column_key_set(kr, join_idx_list) # closes er file
         
 
-    def join_key_sets(self)->typing.Optional[typing.Set[str]]:
+    def join_key_sets(self, left_join_idx_list: typing.List[int], right_join_idx_list: typing.List[int])->typing.Optional[typing.Set[str]]:
         """
         Read the input edge files the first time, building the sets of left and right join values.
         """
@@ -181,34 +195,34 @@ def join_key_sets(self)->typing.Optional[typing.Set[str]]:
         elif self.left_join and not self.right_join:
             if self.verbose:
                 print("Computing the left join key set", flush=True)
-            join_key_set = self.extract_join_key_set(self.left_file_path, "left", self.left_join_columns).copy()
+            join_key_set = self.extract_join_key_set(self.left_file_path, "left", left_join_idx_list).copy()
             if self.verbose:
-                print("There are %d keys in the left join key set." % len(join_key_set))
+                print("There are %d keys in the left join key set." % len(join_key_set), flush=True)
             return join_key_set
 
         elif self.right_join and not self.left_join:
             if self.verbose:
                 print("Computing the right join key set", flush=True)
-            join_key_set = self.extract_join_key_set(self.right_file_path, "right", self.right_join_columns).copy()
+            join_key_set = self.extract_join_key_set(self.right_file_path, "right", right_join_idx_list).copy()
             if self.verbose:
-                print("There are %d keys in the right join key set." % len(join_key_set))
+                print("There are %d keys in the right join key set." % len(join_key_set), flush=True)
             return join_key_set
 
         else:
             if self.verbose:
                 print("Computing the inner join key set", flush=True)
-            left_join_key_set: typing.Set[str] = self.extract_join_key_set(self.left_file_path, "left", self.left_join_columns)
+            left_join_key_set: typing.Set[str] = self.extract_join_key_set(self.left_file_path, "left", left_join_idx_list)
             if self.verbose:
-                print("There are %d keys in the left file key set." % len(left_join_key_set))
-            right_join_key_set: typing.Set[str] = self.extract_join_key_set(self.right_file_path, "right", self.right_join_columns)
+                print("There are %d keys in the left file key set." % len(left_join_key_set), flush=True)
+            right_join_key_set: typing.Set[str] = self.extract_join_key_set(self.right_file_path, "right", right_join_idx_list)
             if self.verbose:
-                print("There are %d keys in the right file key set." % len(right_join_key_set))
+                print("There are %d keys in the right file key set." % len(right_join_key_set), flush=True)
             join_key_set = left_join_key_set.intersection(right_join_key_set)
             if self.verbose:
-                print("There are %d keys in the inner join key set." % len(join_key_set))
+                print("There are %d keys in the inner join key set." % len(join_key_set), flush=True)
             return join_key_set
     
-    def merge_columns(self, left_kr: EdgeReader, right_kr: EdgeReader)->typing.Tuple[typing.List[str], typing.List[str]]:
+    def merge_columns(self, left_kr: KgtkReader, right_kr: KgtkReader)->typing.Tuple[typing.List[str], typing.List[str]]:
         joined_column_names: typing.List[str] = [ ]
         right_column_names: typing.List[str] = [ ]
 
@@ -246,40 +260,48 @@ def merge_columns(self, left_kr: EdgeReader, right_kr: EdgeReader)->typing.Tuple
         return (joined_column_names, right_column_names)
 
     def process(self):
-        joined_key_set: typing.Optional[typing.Set[str]] = self.join_key_sets()
-
         if self.verbose:
             print("Opening the left edge file: %s" % str(self.left_file_path), flush=True)
-        # Open the input files for the second time. This won't work with stdin.
-        left_kr: EdgeReader =  EdgeReader.open_edge_file(self.left_file_path,
-                                                         short_line_action=self.short_line_action,
-                                                         long_line_action=self.long_line_action,
-                                                         fill_short_lines=self.fill_short_lines,
-                                                         truncate_long_lines=self.truncate_long_lines,
-                                                         value_options = self.value_options,
-                                                         error_limit=self.error_limit)
+        left_kr: KgtkReader = KgtkReader.open(self.left_file_path,
+                                              short_line_action=self.short_line_action,
+                                              long_line_action=self.long_line_action,
+                                              fill_short_lines=self.fill_short_lines,
+                                              truncate_long_lines=self.truncate_long_lines,
+                                              value_options = self.value_options,
+                                              error_limit=self.error_limit)
 
 
         if self.verbose:
             print("Opening the right edge file: %s" % str(self.right_file_path), flush=True)
-        right_kr: EdgeReader = EdgeReader.open_edge_file(self.right_file_path,
-                                                         short_line_action=self.short_line_action,
-                                                         long_line_action=self.long_line_action,
-                                                         fill_short_lines=self.fill_short_lines,
-                                                         truncate_long_lines=self.truncate_long_lines,
-                                                         value_options = self.value_options,
-                                                         error_limit=self.error_limit)
-
+        right_kr: KgtkReader = KgtkReader.open(self.right_file_path,
+                                               short_line_action=self.short_line_action,
+                                               long_line_action=self.long_line_action,
+                                               fill_short_lines=self.fill_short_lines,
+                                               truncate_long_lines=self.truncate_long_lines,
+                                               value_options = self.value_options,
+                                               error_limit=self.error_limit)
+
+        if left_kr.is_edge_file and right_kr.is_edge_file:
+            if self.verbose:
+                print("Both input files are edge files.", flush=True)
+        elif left_kr.is_node_file and right_kr.is_node_file:
+            if self.verbose:
+                print("Both input files are node files.", flush=True)
+        else:
+            print("Cannot join edge and node files.", flush=True)
+            return
 
-        # TODO: We ought to do this test sooner.
         left_join_idx_list: typing.List[int] = self.build_join_idx_list(left_kr, "left", self.left_join_columns)
         right_join_idx_list: typing.List[int] = self.build_join_idx_list(right_kr, "right", self.right_join_columns)
         if len(left_join_idx_list) != len(right_join_idx_list):
-            print("the left join key has %d components, the right join key has %d columns. Exiting." % (len(left_join_idx_list), len(right_join_idx_list)))
+            print("the left join key has %d components, the right join key has %d columns. Exiting." % (len(left_join_idx_list), len(right_join_idx_list)), flush=True)
             left_kr.close()
             right_kr.close()
             return
 
+        # This might open the input files for a second time. This won't work with stdin.
+        joined_key_set: typing.Optional[typing.Set[str]] = self.join_key_sets(left_join_idx_list, right_join_idx_list)
+
         if self.verbose:
             print("Mapping the column names for the join.", flush=True)
         joined_column_names: typing.List[str]
@@ -353,12 +375,17 @@ def process(self):
 def main():
     """
     Test the KGTK file joiner.
+
+    Edge files can be joined to edge files.
+    Node files can also be joined to node files.
+
+    TODO: Add more KgtkReader parameters, especially mode.
     """
     parser = ArgumentParser()
     parser.add_argument(dest="left_file_path", help="The left KGTK file to join", type=Path)
     parser.add_argument(dest="right_file_path", help="The right KGTK file to join", type=Path)
     parser.add_argument(      "--error-limit", dest="error_limit",
-                              help="The maximum number of errors to report before failing", type=int, default=EdgeReader.ERROR_LIMIT_DEFAULT)
+                              help="The maximum number of errors to report before failing", type=int, default=KgtkReader.ERROR_LIMIT_DEFAULT)
 
     parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=EdgeJoiner.FIELD_SEPARATOR_DEFAULT)
     parser.add_argument(      "--fill-short-lines", dest="fill_short_lines",
@@ -419,4 +446,4 @@ def main():
 
 if __name__ == "__main__":
     main()
-
+a

From f9dcf4b93ad17f7248f25476a3b7ad26bb6ddaef Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 19:58:28 -0700
Subject: [PATCH 104/278] Add feedback.  Add value filter controls.

---
 kgtk/join/edgejoiner.py |  7 +++++--
 kgtk/join/ifexists.py   | 43 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/kgtk/join/edgejoiner.py b/kgtk/join/edgejoiner.py
index 4d9923c8d..45a508827 100644
--- a/kgtk/join/edgejoiner.py
+++ b/kgtk/join/edgejoiner.py
@@ -1,5 +1,7 @@
 """
-Join two KTKG edge files.  The output file is an edge file.
+Join two KTKG edge files or two KGTK node files.  The output file is an edge file or a node file.
+
+TODO: rename this to KgtkJoiner.
 
 Note: This implementation builds im-memory sets of all the key values in
 each input file.
@@ -46,6 +48,7 @@ class EdgeJoiner(KgtkFormat):
     prefix: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
 
     # The field separator used in multifield joins.  The KGHT list character should be safe.
+    # TODO: USE THE COLUMN SEPARATOR !!!!!
     field_separator: str = attr.ib(validator=attr.validators.instance_of(str), default=KgtkFormat.LIST_SEPARATOR)
 
     # Ignore records with too many or too few fields?
@@ -446,4 +449,4 @@ def main():
 
 if __name__ == "__main__":
     main()
-a
+
diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index 206cb52a9..88b772ffc 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -54,6 +54,10 @@ class IfExists(KgtkFormat):
     fill_short_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     truncate_long_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
+    # TODO: find a working validator
+    # value_options: typing.Optional[KgtkValueOptions] = attr.ib(attr.validators.optional(attr.validators.instance_of(KgtkValueOptions)), default=None)
+    value_options: typing.Optional[KgtkValueOptions] = attr.ib(default=None)
+
     gzip_in_parallel: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
     verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
@@ -119,23 +123,39 @@ def extract_key_set(self, kr: KgtkReader, who: str, key_columns: typing.List[int
 
     def process(self):
         # Open the input files once.
+        if self.verbose:
+            print("Opening the left input file: %s" % self.left_file_path, flush=True)
         left_kr: KgtkReader =  KgtkReader.open(self.left_file_path,
                                                short_line_action=self.short_line_action,
                                                long_line_action=self.long_line_action,
                                                fill_short_lines=self.fill_short_lines,
-                                               truncate_long_lines=self.truncate_long_lines)
+                                               truncate_long_lines=self.truncate_long_lines,
+                                               value_options = self.value_options)
 
+        if self.verbose:
+            print("Opening the right input file: %s" % self.right_file_path, flush=True)
         right_kr: KgtkReader = KgtkReader.open(self.right_file_path,
                                                short_line_action=self.short_line_action,
                                                long_line_action=self.long_line_action,
                                                fill_short_lines=self.fill_short_lines,
-                                               truncate_long_lines=self.truncate_long_lines)
+                                               truncate_long_lines=self.truncate_long_lines,
+                                               value_options = self.value_options)
 
         left_key_columns: typing.List[int] = self.get_key_columns(self.left_keys, left_kr, right_kr, "left")
         right_key_columns: typing.List[int] = self.get_key_columns(self.right_keys, right_kr, left_kr, "right")
 
+        if len(left_key_columns) != len(right_key_columns):
+            print("There are %d left key columns but %d right key columns.  Exiting." % (len(left_key_columns), len(right_key_columns)), flush=True)
+            return
+
+        if self.verbose:
+            print("Building the input key set from %s" % self.right_file_path, flush=True)
         key_set: typint.Set[str] = self.extract_key_set(right_kr, "right", right_key_columns)
+        if self.verbose:
+            print("There are %d entries in the key set." % len(key_set))
 
+        if self.verbose:
+            print("Opening the output file: %s" % self.output_path, flush=True)
         ew: KgtkWriter = KgtkWriter.open(left_kr.column_names,
                                          self.output_path,
                                          require_all_columns=False,
@@ -145,12 +165,22 @@ def process(self):
                                          verbose=self.verbose,
                                          very_verbose=self.very_verbose)
 
+        if self.verbose:
+            print("Filtering records from %s" % self.left_file_path, flush=True)
+        input_line_count: int = 0
+        output_line_count: int = 0;
+
         row: typing.list[str]
         for row in left_kr:
+            input_line_count += 1
             left_key: str = self.build_key(row, left_key_columns)
             if left_key in key_set:
                 ew.write(line)
+                output_line_count += 1
         ew.close()
+
+        if self.verbose:
+            print("Read %d records, write %d records." % (input_line_count, output_line_count))
         
 def main():
     """
@@ -162,6 +192,9 @@ def main():
 
     parser.add_argument(dest="right_file_path", help="The right KGTK file to join", type=Path)
 
+    parser.add_argument(      "--error-limit", dest="error_limit",
+                              help="The maximum number of errors to report before failing", type=int, default=KgtkReader.ERROR_LIMIT_DEFAULT)
+
     parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=IfExists.FIELD_SEPARATOR_DEFAULT)
 
     parser.add_argument(      "--fill-short-lines", dest="fill_short_lines",
@@ -190,8 +223,13 @@ def main():
 
     parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
 
+    KgtkValueOptions.add_arguments(parser)
+
     args = parser.parse_args()
 
+    # Build the value parsing option structure.
+    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
+
     ie: IfExists = IfExists(left_file_path=args.left_file_path,
                             right_file_path=args.right_file_path,
                             output_path=args.output_file_path,
@@ -202,6 +240,7 @@ def main():
                             long_line_action=args.long_line_action,
                             fill_short_lines=args.fill_short_lines,
                             truncate_long_lines=args.truncate_long_lines,
+                            value_options=value_options,
                             gzip_in_parallel=args.gzip_in_parallel,
                             verbose=args.verbose,
                             very_verbose=args.very_verbose)

From ed54b635e4503bdebe26ff07f0a01eb853eac1a9 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 20:02:58 -0700
Subject: [PATCH 105/278] Add missing import.  Correct output name.

---
 kgtk/join/edgejoiner.py | 2 +-
 kgtk/join/ifexists.py   | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/kgtk/join/edgejoiner.py b/kgtk/join/edgejoiner.py
index 45a508827..b2484a547 100644
--- a/kgtk/join/edgejoiner.py
+++ b/kgtk/join/edgejoiner.py
@@ -20,7 +20,7 @@
 from kgtk.join.kgtkreader import KgtkReader
 from kgtk.join.kgtkformat import KgtkFormat
 from kgtk.join.kgtkwriter import KgtkWriter
-from kgtk.join.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
+from kgtk.join.kgtkvalueoptions import KgtkValueOptions
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=True)
diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index 88b772ffc..90cf2d1c9 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -28,6 +28,7 @@
 from kgtk.join.kgtkformat import KgtkFormat
 from kgtk.join.kgtkreader import KgtkReader
 from kgtk.join.kgtkwriter import KgtkWriter
+from kgtk.join.kgtkvalueoptions import KgtkValueOptions
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=True)
@@ -175,7 +176,7 @@ def process(self):
             input_line_count += 1
             left_key: str = self.build_key(row, left_key_columns)
             if left_key in key_set:
-                ew.write(line)
+                ew.write(row)
                 output_line_count += 1
         ew.close()
 

From 9cddcc3bac8b23a19f98e8ade258f1c90f885633 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 20:07:55 -0700
Subject: [PATCH 106/278] Pass the error_limit through.

---
 kgtk/join/ifexists.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index 90cf2d1c9..c5ce50257 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -61,6 +61,8 @@ class IfExists(KgtkFormat):
 
     gzip_in_parallel: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
+    error_limit: int = attr.ib(validator=attr.validators.instance_of(int), default=KgtkReader.ERROR_LIMIT_DEFAULT)
+
     verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     very_verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
@@ -131,7 +133,11 @@ def process(self):
                                                long_line_action=self.long_line_action,
                                                fill_short_lines=self.fill_short_lines,
                                                truncate_long_lines=self.truncate_long_lines,
-                                               value_options = self.value_options)
+                                               value_options = self.value_options,
+                                               error_limit=self.error_limit,
+                                               verbose=self.verbose,
+                                               very_verbose=self.very_verbose,
+        )
 
         if self.verbose:
             print("Opening the right input file: %s" % self.right_file_path, flush=True)
@@ -140,7 +146,11 @@ def process(self):
                                                long_line_action=self.long_line_action,
                                                fill_short_lines=self.fill_short_lines,
                                                truncate_long_lines=self.truncate_long_lines,
-                                               value_options = self.value_options)
+                                               value_options = self.value_options,
+                                               error_limit=self.error_limit,
+                                               verbose=self.verbose,
+                                               very_verbose=self.very_verbose,
+        )
 
         left_key_columns: typing.List[int] = self.get_key_columns(self.left_keys, left_kr, right_kr, "left")
         right_key_columns: typing.List[int] = self.get_key_columns(self.right_keys, right_kr, left_kr, "right")
@@ -243,6 +253,7 @@ def main():
                             truncate_long_lines=args.truncate_long_lines,
                             value_options=value_options,
                             gzip_in_parallel=args.gzip_in_parallel,
+                            error_limit=args.error_limit,
                             verbose=args.verbose,
                             very_verbose=args.very_verbose)
 

From 719bbfe12eb14cab937df9a91ab771e6aee950c2 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 6 May 2020 20:20:42 -0700
Subject: [PATCH 107/278] Unify edgejoiner and nodejoiner.

---
 kgtk/join/{edgejoiner.py => kgtkjoiner.py} |  12 +-
 kgtk/join/nodejoiner.py                    | 245 ---------------------
 2 files changed, 5 insertions(+), 252 deletions(-)
 rename kgtk/join/{edgejoiner.py => kgtkjoiner.py} (98%)
 delete mode 100644 kgtk/join/nodejoiner.py

diff --git a/kgtk/join/edgejoiner.py b/kgtk/join/kgtkjoiner.py
similarity index 98%
rename from kgtk/join/edgejoiner.py
rename to kgtk/join/kgtkjoiner.py
index b2484a547..d512d7a0b 100644
--- a/kgtk/join/edgejoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -1,8 +1,6 @@
 """
 Join two KTKG edge files or two KGTK node files.  The output file is an edge file or a node file.
 
-TODO: rename this to KgtkJoiner.
-
 Note: This implementation builds im-memory sets of all the key values in
 each input file.
 
@@ -24,7 +22,7 @@
 from kgtk.join.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=True)
-class EdgeJoiner(KgtkFormat):
+class KgtkJoiner(KgtkFormat):
     left_file_path: Path = attr.ib(validator=attr.validators.instance_of(Path))
     right_file_path: Path = attr.ib(validator=attr.validators.instance_of(Path))
     output_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
@@ -76,14 +74,14 @@ def node1_column_idx(self, kr: KgtkReader, who: str)->int:
         idx: int = kr.node1_column_idx
         if idx < 0:
             # TODO: throw a better exception
-            raise ValueError("EdgeJoiner: unknown node1 column index in KGTK %s edge file." % who)
+            raise ValueError("KgtkJoiner: unknown node1 column index in KGTK %s edge file." % who)
         return idx
 
     def id_column_idx(self, kr: KgtkReader, who: str)->int:
         idx: int = kr.id_column_idx
         if idx < 0:
             # TODO: throw a better exception
-            raise ValueError("EdgeJoiner: unknown id column index in KGTK %s node file." % who)
+            raise ValueError("KgtkJoiner: unknown id column index in KGTK %s node file." % who)
         return idx
 
     def build_join_key(self, kr: KgtkReader, join_idx_list: typing.List[int], row: typing.List[str])->str:
@@ -390,7 +388,7 @@ def main():
     parser.add_argument(      "--error-limit", dest="error_limit",
                               help="The maximum number of errors to report before failing", type=int, default=KgtkReader.ERROR_LIMIT_DEFAULT)
 
-    parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=EdgeJoiner.FIELD_SEPARATOR_DEFAULT)
+    parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=KgtkJoiner.FIELD_SEPARATOR_DEFAULT)
     parser.add_argument(      "--fill-short-lines", dest="fill_short_lines",
                               help="Fill missing trailing columns in short lines with empty values.", action='store_true')
     parser.add_argument(      "--join-on-label", dest="join_on_label", help="If both input files are edge files, include the label column in the join.", action='store_true')
@@ -424,7 +422,7 @@ def main():
     # Build the value parsing option structure.
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
-    ej: EdgeJoiner = EdgeJoiner(left_file_path=args.left_file_path,
+    ej: KgtkJoiner = KgtkJoiner(left_file_path=args.left_file_path,
                                 right_file_path=args.right_file_path,
                                 output_path=args.output_file_path,
                                 left_join=args.left_join,
diff --git a/kgtk/join/nodejoiner.py b/kgtk/join/nodejoiner.py
deleted file mode 100644
index 97ce6029d..000000000
--- a/kgtk/join/nodejoiner.py
+++ /dev/null
@@ -1,245 +0,0 @@
-"""
-Join two KTKG edge files.  The output file is an edge file.
-
-Note: This implementation builds im-memory sets of all the key values in
-each input file.
-
-"""
-
-from argparse import ArgumentParser
-import attr
-import gzip
-from pathlib import Path
-from multiprocessing import Queue
-import sys
-import typing
-
-from kgtk.join.enumnameaction import EnumNameAction
-from kgtk.join.kgtkformat import KgtkFormat
-from kgtk.join.kgtkwriter import KgtkWriter
-from kgtk.join.nodereader import NodeReader
-from kgtk.join.validationaction import ValidationAction
-
-@attr.s(slots=True, frozen=True)
-class NodeJoiner(KgtkFormat):
-    left_file_path: Path = attr.ib(validator=attr.validators.instance_of(Path))
-    right_file_path: Path = attr.ib(validator=attr.validators.instance_of(Path))
-    output_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
-
-    # left_join == False and right_join == False: inner join
-    # left_join == True and right_join == False: left join
-    # left_join == False and right_join == True: right join
-    # left_join = True and right_join == True: outer join
-    left_join: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-    right_join: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-
-    # The prefix applied to right file column names in the output file:
-    prefix: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
-
-    # The field separator used in multifield joins.  The KGHT list character should be safe.
-    field_separator: str = attr.ib(validator=attr.validators.instance_of(str), default=KgtkFormat.LIST_SEPARATOR)
-
-    # Ignore records with too many or too few fields?
-    short_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
-    long_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
-
-    # Require or fill trailing fields?
-    fill_short_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-    truncate_long_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-
-    gzip_in_parallel: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-
-    verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-    very_verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-
-    FIELD_SEPARATOR_DEFAULT: str = KgtkFormat.LIST_SEPARATOR
-
-    def id_column_idx(self, kr: NodeReader, who: str)->int:
-        idx: int = kr.id_column_idx
-        if idx < 0:
-            # TODO: throw a better exception
-            raise ValueError("NodeJoiner: unknown node1 column index in KGTK %s edge type." % who)
-        return idx
-
-    def single_column_key_set(self, kr: NodeReader, join_idx: int)->typing.Set[str]:
-        result: typing.Set[str] = set()
-        row: typing.List[str]
-        for row in kr:
-            result.add(row[join_idx])
-        return result
-        
-    def extract_join_key_set(self, file_path: Path, who: str)->typing.Set[str]:
-        kr: NodeReader = NodeReader.open_node_file(file_path,
-                                                   short_line_action=self.short_line_action,
-                                                   long_line_action=self.long_line_action,
-                                                   fill_short_lines=self.fill_short_lines,
-                                                   truncate_long_lines=self.truncate_long_lines,
-                                                   gzip_in_parallel=self.gzip_in_parallel,
-                                                   verbose=self.verbose,
-                                                   very_verbose=self.very_verbose)
-
-        if not kr.is_node_file:
-            raise ValueError("The %s file is not a node file" % who)
-        
-        join_idx: int = self.id_column_idx(kr, who)
-        return self.single_column_key_set(kr, join_idx) # closes er file
-        
-
-    def join_key_sets(self)->typing.Set[str]:
-        """
-        Read the input edge files the first time, building the sets of left and right join values.
-        """
-        left_join_key_set: typing.Set[str] = self.extract_join_key_set(self.left_file_path, "left")
-        right_join_key_set: typing.Set[str] = self.extract_join_key_set(self.right_file_path, "right")
-
-        joined_key_set: typing.Set[str]
-        if self.left_join and self.right_join:
-            # TODO: This joins everything! We can shortut computing these sets.
-            joined_key_set = left_join_key_set.union(right_join_key_set)
-        elif self.left_join and not self.right_join:
-            joined_key_set = left_join_key_set.copy()
-        elif self.right_join and not self.left_join:
-            joined_key_set = right_join_key_set.copy()
-        else:
-            joined_key_set = left_join_key_set.intersection(right_join_key_set)
-        return joined_key_set
-    
-    def merge_columns(self, left_kr: NodeReader, right_kr: NodeReader)->typing.Tuple[typing.List[str], typing.List[str]]:
-        joined_column_names: typing.List[str] = [ ]
-        right_column_names: typing.List[str] = [ ]
-
-        # First step: copy the left column names.
-        column_name: str
-        for column_name in left_kr.column_names:
-            joined_column_names.append(column_name)
-
-        idx: int = 0
-        for column_name in right_kr.column_names:
-            if idx == right_kr.id_column_idx:
-                # The right file is an edge file and this is its node1 column index.
-                if left_kr.id_column_idx >= 0:
-                    # The left file has a node1 column.  Map to that.
-                    column_name = left_kr.column_names[left_kr.id_column_idx]
-                else:
-                    # Apparently we don't have a destination in the left file.  Punt.
-                    raise ValueError("Can't map right join column name to the left file #2.")
-            elif idx == right_kr.label_column_idx and left_kr.label_column_idx >= 0:
-                # Map the right file's label column to the left file's label column.
-                column_name = left_kr.column_names[left_kr.label_column_idx]
-            elif idx == right_kr.node2_column_idx and left_kr.node2_column_idx >= 0:
-                # Map the right file's node2 column to the left file's node2 column.
-                column_name = left_kr.column_names[left_kr.node2_column_idx]
-            else:
-                # Apply the prefix.
-                if self.prefix is not None and len(self.prefix) > 0:
-                    column_name = self.prefix + column_name
-
-            right_column_names.append(column_name)
-            if column_name not in joined_column_names:
-                joined_column_names.append(column_name)
-            idx += 1        
-
-        return (joined_column_names, right_column_names)
-
-    def process(self):
-        joined_key_set: typing.Set[str] = self.join_key_sets()
-
-        # Open the input files for the second time. This won't work with stdin.
-        left_kr: NodeReader =  NodeReader.open_node_file(self.left_file_path,
-                                                         short_line_action=self.short_line_action,
-                                                         long_line_action=self.long_line_action,
-                                                         fill_short_lines=self.fill_short_lines,
-                                                         truncate_long_lines=self.truncate_long_lines)
-
-        right_kr: NodeReader = NodeReader.open_node_file(self.right_file_path,
-                                                         short_line_action=self.short_line_action,
-                                                         long_line_action=self.long_line_action,
-                                                         fill_short_lines=self.fill_short_lines,
-                                                         truncate_long_lines=self.truncate_long_lines)
-
-        # Map the right column names for the join:
-        joined_column_names: typing.List[str]
-        right_column_names: typing.List[str]
-        (joined_column_names, right_column_names)  = self.merge_columns(left_kr, right_kr)
-
-        if self.verbose:
-            print("       left   columns: %s" % " ".join(left_kr.column_names))
-            print("       right  columns: %s" % " ".join(right_kr.column_names))
-            print("mapped right  columns: %s" % " ".join(right_column_names))
-            print("       joined columns: %s" % " ".join(joined_column_names))
-        
-        ew: KgtkWriter = KgtkWriter.open(joined_column_names,
-                                         self.output_path,
-                                         require_all_columns=False,
-                                         prohibit_extra_columns=True,
-                                         fill_missing_columns=True,
-                                         gzip_in_parallel=self.gzip_in_parallel,
-                                         verbose=self.verbose,
-                                         very_verbose=self.very_verbose)
-
-        row: typing.list[str]
-        left_node1_idx: int = self.id_column_idx(left_kr, who="left")
-        for row in left_kr:
-            left_key: str = row[left_node1_idx]
-            if left_key in joined_key_set:
-                ew.write(row)
-
-        right_shuffle_list: typing.List[int] = ew.build_shuffle_list(right_column_names)
-        right_node1_idx: int = self.id_column_idx(right_kr, who="right")
-        for row in right_kr:
-            right_key: str = row[right_node1_idx]
-            if right_key in joined_key_set:
-                ew.write(row, shuffle_list=right_shuffle_list)
-            
-        ew.close()
-        
-def main():
-    """
-    Test the KGTK file joiner.
-    """
-    parser = ArgumentParser()
-    parser.add_argument(dest="left_file_path", help="The left KGTK file to join", type=Path)
-    parser.add_argument(dest="right_file_path", help="The right KGTK file to join", type=Path)
-    parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=NodeJoiner.FIELD_SEPARATOR_DEFAULT)
-    parser.add_argument(      "--fill-short-lines", dest="fill_short_lines",
-                              help="Fill missing trailing columns in short lines with empty values.", action='store_true')
-    parser.add_argument(      "--gzip-in-parallel", dest="gzip_in_parallel", help="Execute gzip in parallel.", action='store_true')
-    parser.add_argument(      "--left-join", dest="left_join", help="Perform a left outer join.", action='store_true')
-
-    parser.add_argument(      "--long-line-action", dest="long_line_action",
-                              help="The action to take when a long line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-    parser.add_argument(      "--short-line-action", dest="short_line_action",
-                              help="The action to take whe a short line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to read", type=Path, default=None)
-    parser.add_argument(      "--prefix", dest="prefix", help="The prefix applied to right file column names in the output file.")
-    parser.add_argument(      "--right-join", dest="right_join", help="Perform a right outer join.", action='store_true')
-    parser.add_argument(      "--truncate-long-lines", dest="truncate_long_lines",
-                              help="Remove excess trailing columns in long lines.", action='store_true')
-    parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
-    parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
-    args = parser.parse_args()
-
-    nj: NodeJoiner = NodeJoiner(left_file_path=args.left_file_path,
-                                right_file_path=args.right_file_path,
-                                output_path=args.output_file_path,
-                                left_join=args.left_join,
-                                right_join=args.right_join,
-                                prefix=args.prefix,
-                                field_separator=args.field_separator,
-                                short_line_action=args.short_line_action,
-                                long_line_action=args.long_line_action,
-                                fill_short_lines=args.fill_short_lines,
-                                truncate_long_lines=args.truncate_long_lines,
-                                gzip_in_parallel=args.gzip_in_parallel,
-                                verbose=args.verbose,
-                                very_verbose=args.very_verbose)
-
-    nj.process()
-
-if __name__ == "__main__":
-    main()
-

From af869671b4bf59ee6feb626e7b436da890272590 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 7 May 2020 11:44:37 -0700
Subject: [PATCH 108/278] Offer convenience iterators.

---
 kgtk/join/kgtkreader.py | 212 +++++++++++++++++++++++++++++++++-------
 1 file changed, 178 insertions(+), 34 deletions(-)

diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index 6b73c8f0e..546974d57 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -495,14 +495,9 @@ def exclude_line(self, action: ValidationAction, msg: str, line: str)->bool:
             raise ValueError("Too many data errors, exiting.")
         return result
 
-    # This is both and iterable and an iterator object.
-    def __iter__(self)->typing.Iterator[typing.List[str]]:
-        return self
-
     # Get the next edge values as a list of strings.
-    # TODO: Convert integers, coordinates, etc. to Python types
-    def __next__(self)-> typing.List[str]:
-        values: typing.List[str]
+    def nextrow(self)-> typing.List[str]:
+        row: typing.List[str]
 
         # This loop accomodates lines that are ignored.
         while (True):
@@ -541,42 +536,45 @@ def __next__(self)-> typing.List[str]:
                 if self.exclude_line(self.whitespace_line_action, "saw a whitespace line", line):
                     continue
 
-            values = line.split(self.column_separator)
+            row = line.split(self.column_separator)
 
-            # Optionally fill missing trailing columns with empty values:
-            if self.fill_short_lines and len(values) < self.column_count:
-                while len(values) < self.column_count:
-                    values.append("")
+            # Optionally fill missing trailing columns with empty row:
+            if self.fill_short_lines and len(row) < self.column_count:
+                while len(row) < self.column_count:
+                    row.append("")
                     
             # Optionally remove extra trailing columns:
-            if self.truncate_long_lines and len(values) > self.column_count:
-                values = values[:self.column_count]
+            if self.truncate_long_lines and len(row) > self.column_count:
+                row = row[:self.column_count]
 
             # Optionally validate that the line contained the right number of columns:
             #
             # When we report line numbers in error messages, line 1 is the first line after the header line.
-            if self.short_line_action != ValidationAction.PASS and len(values) < self.column_count:
+            if self.short_line_action != ValidationAction.PASS and len(row) < self.column_count:
                 if self.exclude_line(self.short_line_action,
                                      "Required %d columns, saw %d: '%s'" % (self.column_count,
-                                                                            len(values),
+                                                                            len(row),
                                                                             line),
                                      line):
                     continue
                              
-            if self.long_line_action != ValidationAction.PASS and len(values) > self.column_count:
+            if self.long_line_action != ValidationAction.PASS and len(row) > self.column_count:
                 if self.exclude_line(self.long_line_action,
                                      "Required %d columns, saw %d (%d extra): '%s'" % (self.column_count,
-                                                                                       len(values),
-                                                                                       len(values) - self.column_count,
+                                                                                       len(row),
+                                                                                       len(row) - self.column_count,
                                                                                        line),
                                      line):
                     continue
 
-            if self._ignore_if_blank_fields(values, line):
+            if self._ignore_if_blank_fields(row, line):
                 continue
 
             if self.invalid_value_action != ValidationAction.PASS:
-                if self._ignore_invalid_values(values, line):
+                # TODO: find a way to optionally cache the KgtkValue objects
+                # so we don't have to create them a second time in the conversion
+                # and iterator methods below.
+                if self._ignore_invalid_values(row, line):
                     continue
 
             self.data_lines_passed += 1
@@ -584,7 +582,165 @@ def __next__(self)-> typing.List[str]:
                 sys.stdout.write(".")
                 sys.stdout.flush()
             
-            return values
+            return row
+
+    # This is both and iterable and an iterator object.
+    def __iter__(self)->typing.Iterator[typing.List[str]]:
+        return self
+
+    # Get the next row values as a list of strings.
+    # TODO: Convert integers, coordinates, etc. to Python types
+    def __next__(self)-> typing.List[str]:
+        return self.nextrow()
+
+    def concise(self)->typing.Iterator[typing.List[typing.Optional[str]]]:
+        """
+        Using a generator function, create an iterator that returns rows of fields
+        as strings.  Empty fields will be returned as None.
+
+        """
+        while True:
+            # self.nextrow() will throw StopIteration when done.
+            row: typing.List[str] = self.nextrow()
+
+            # Copy the row, converting empty fields into None:
+            results: typing.List[typing.Optional[str]] = [ ]
+            field: str
+            for field in row:
+                if len(field) == 0:
+                    results.append(None)
+                else:
+                    results.append(field)
+            yield results
+                    
+
+    def to_kgtk_values(self, row: typing.List[str], validate: bool = False)->typing.List[KgtkValue]:
+        """
+        Convert an input row into a list of KgtkValue instances.
+
+        When validate is True, validate each KgtkValue object.
+        """
+        options: KgtkValueOptions = self.value_options if self.value_options is not None else DEFAULT_KGTK_VALUE_OPTIONS
+        results: typing.List[KgtkValue] = [ ]
+        field: str
+        for field in row:
+            kv = KgtkValue(field, options=options)
+            if validate:
+                kv.validate()
+            results.append(kv)
+        return results
+
+    def kgtk_values(self, validate: bool = False)->typing.Iterator[typing.List[KgtkValue]]:
+        """
+        Using a generator function, create an iterator that returns rows of fields
+        as KgtkValue objects.
+
+        When validate is True, validate each KgtkValue object.
+        """
+        while True:
+            # self.nextrow() will throw StopIteration when done.
+            yield self.to_kgtk_values(self.nextrow(), validate=validate)
+
+    def to_concise_kgtk_values(self, row: typing.List[str], validate: bool = False)->typing.List[typing.Optional[KgtkValue]]:
+        """
+        Convert an input row into a list of KgtkValue instances.  Empty fields will be returned as None.
+
+        When validate is True, validate each KgtkValue object.
+        """
+        options: KgtkValueOptions = self.value_options if self.value_options is not None else DEFAULT_KGTK_VALUE_OPTIONS
+        results: typing.List[typing.Optional[KgtkValue]] = [ ]
+        field: str
+        for field in row:
+            if len(field) == 0:
+                results.append(None)
+            else:
+                kv = KgtkValue(field, options=options)
+                if validate:
+                    kv.validate()
+                results.append(kv)
+        return results
+
+    def concise_kgtk_values(self, validate: bool = False)->typing.Iterator[typing.List[typing.Optional[KgtkValue]]]:
+        """
+        Using a generator function, create an iterator that returns rows of fields
+        as KgtkValue objects, with empty fields returned as None.
+
+        When validate is True, validate each KgtkValue object.
+        """
+        while True:
+            # self.nextrow() will throw StopIteration when done.
+            yield self.to_concise_kgtk_values(self.nextrow(), validate=validate)
+
+    def to_dict(self, row: typing.List[str], concise: bool=False)->typing.Mapping[str, str]:
+        """
+        Convert an input row into a dict of named fields.
+
+        If concise is True, then empty fields will be skipped.
+        """
+        results: typing.MutableMapping[str, str] = { }
+        field: str
+        idx: int = 0
+
+        # We'll use two seperate loops in anticipation of a modest
+        # efficiency gain.
+        if concise:
+            for field in row:
+                if len(field) > 0:
+                    results[self.column_names[idx]] = field
+                idx += 1
+        else:
+            for field in row:
+                results[self.column_names[idx]] = field
+                idx += 1
+        return results
+
+    def dicts(self, concise: bool=False)->typing.Iterator[typing.Mapping[str, str]]:
+        """
+        Using a generator function, create an iterator that returns each row as a dict of named fields.
+
+        If concise is True, then empty fields will be skipped.
+
+        """
+        while True:
+            # self.nextrow() will throw StopIteration when done.
+            yield self.to_dict(self.nextrow(), concise=concise) 
+
+    def to_kgtk_value_dict(self, row: typing.List[str], validate: bool=False, concise: bool=False)->typing.Mapping[str, KgtkValue]:
+        """
+        Convert an input row into a dict of named fields.
+
+        If concise is True, then empty fields will be skipped.
+
+        When validate is True, validate each KgtkValue object.
+        """
+        options: KgtkValueOptions = self.value_options if self.value_options is not None else DEFAULT_KGTK_VALUE_OPTIONS
+        results: typing.MutableMapping[str, KgtkValue] = { }
+        idx: int = 0
+        field: str
+        for field in row:
+            if concise and len(field) == 0:
+                pass # Skip the empty field.
+            else:
+                kv = KgtkValue(field, options=options)
+                if validate:
+                    kv.validate()
+                results[self.column_names[idx]] = kv
+            idx += 1
+        return results
+
+    def kgtk_value_dicts(self, validate: bool=False, concise: bool=False)->typing.Iterator[typing.Mapping[str, KgtkValue]]:
+        """
+        Using a generator function, create an iterator that returns each row as a
+        dict of named KgtkValue objects.
+
+        If concise is True, then empty fields will be skipped.
+
+        When validate is True, validate each KgtkValue object.
+        """
+        while True:
+            # self.nextrow() will throw StopIteration when done.
+            yield self.to_kgtk_value_dict(self.nextrow(), validate=validate, concise=concise) 
+
 
     def _ignore_invalid_values(self, values: typing.List[str], line: str)->bool:
         """Give a row of values, validate each value.  If we find one or more
@@ -644,18 +800,6 @@ def merge_columns(self, additional_columns: typing.List[str])->typing.List[str]:
 
         return merged_columns
 
-    def to_map(self, row: typing.List[str])->typing.Mapping[str, str]:
-        """
-        Convert an input line into a named map of fields.
-        """
-        result: typing.MutableMapping[str, str] = { }
-        value: str
-        idx: int = 0
-        for value in row:
-            result[self.column_names[idx]] = value
-            idx += 1
-        return result
-
     @classmethod
     def add_shared_arguments(cls, parser: ArgumentParser):
         parser.add_argument(dest="kgtk_file", help="The KGTK file to read", type=Path, nargs="?")

From 6604bc99a140bf98af6690cae219df147135c52c Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 7 May 2020 12:19:35 -0700
Subject: [PATCH 109/278] Must catch and StopIteration and return in a
 generator.  Add iterator tests.

---
 kgtk/join/kgtkreader.py | 95 +++++++++++++++++++++++++++++++++++------
 1 file changed, 81 insertions(+), 14 deletions(-)

diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index 546974d57..c1dd3bbe4 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -593,15 +593,17 @@ def __iter__(self)->typing.Iterator[typing.List[str]]:
     def __next__(self)-> typing.List[str]:
         return self.nextrow()
 
-    def concise(self)->typing.Iterator[typing.List[typing.Optional[str]]]:
+    def concise_rows(self)->typing.Iterator[typing.List[typing.Optional[str]]]:
         """
         Using a generator function, create an iterator that returns rows of fields
         as strings.  Empty fields will be returned as None.
 
         """
         while True:
-            # self.nextrow() will throw StopIteration when done.
-            row: typing.List[str] = self.nextrow()
+            try:
+                row: typing.List[str] = self.nextrow()
+            except StopIteration:
+                return
 
             # Copy the row, converting empty fields into None:
             results: typing.List[typing.Optional[str]] = [ ]
@@ -638,8 +640,10 @@ def kgtk_values(self, validate: bool = False)->typing.Iterator[typing.List[KgtkV
         When validate is True, validate each KgtkValue object.
         """
         while True:
-            # self.nextrow() will throw StopIteration when done.
-            yield self.to_kgtk_values(self.nextrow(), validate=validate)
+            try:
+                yield self.to_kgtk_values(self.nextrow(), validate=validate)
+            except StopIteration:
+                return
 
     def to_concise_kgtk_values(self, row: typing.List[str], validate: bool = False)->typing.List[typing.Optional[KgtkValue]]:
         """
@@ -668,8 +672,10 @@ def concise_kgtk_values(self, validate: bool = False)->typing.Iterator[typing.Li
         When validate is True, validate each KgtkValue object.
         """
         while True:
-            # self.nextrow() will throw StopIteration when done.
-            yield self.to_concise_kgtk_values(self.nextrow(), validate=validate)
+            try:
+                yield self.to_concise_kgtk_values(self.nextrow(), validate=validate)
+            except StopIteration:
+                return
 
     def to_dict(self, row: typing.List[str], concise: bool=False)->typing.Mapping[str, str]:
         """
@@ -702,8 +708,10 @@ def dicts(self, concise: bool=False)->typing.Iterator[typing.Mapping[str, str]]:
 
         """
         while True:
-            # self.nextrow() will throw StopIteration when done.
-            yield self.to_dict(self.nextrow(), concise=concise) 
+            try:
+                yield self.to_dict(self.nextrow(), concise=concise)
+            except StopIteration:
+                return
 
     def to_kgtk_value_dict(self, row: typing.List[str], validate: bool=False, concise: bool=False)->typing.Mapping[str, KgtkValue]:
         """
@@ -738,9 +746,10 @@ def kgtk_value_dicts(self, validate: bool=False, concise: bool=False)->typing.It
         When validate is True, validate each KgtkValue object.
         """
         while True:
-            # self.nextrow() will throw StopIteration when done.
-            yield self.to_kgtk_value_dict(self.nextrow(), validate=validate, concise=concise) 
-
+            try:
+                yield self.to_kgtk_value_dict(self.nextrow(), validate=validate, concise=concise)
+            except StopIteration:
+                return
 
     def _ignore_invalid_values(self, values: typing.List[str], line: str)->bool:
         """Give a row of values, validate each value.  If we find one or more
@@ -893,6 +902,14 @@ def main():
     EdgeReader.add_arguments(parser)
     NodeReader.add_arguments(parser)
     KgtkValueOptions.add_arguments(parser)
+
+    parser.add_argument(       "--test", dest="test_method", help="The test to perform",
+                               choices=["rows", "concise-rows",
+                                        "kgtk-values", "concise-kgtk-values",
+                                        "dicts", "concise-dicts",
+                                        "kgtk-value-dicts", "concise-kgtk-value-dicts"],
+                               default="rows")
+    parser.add_argument(       "--test-valdate", dest="test_validate", help="Validate KgtkValue objects in test.", action='store_true')
     args = parser.parse_args()
 
     error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
@@ -929,8 +946,58 @@ def main():
 
     line_count: int = 0
     row: typing.List[str]
-    for row in kr:
-        line_count += 1
+    kgtk_values: typing.List[KgtkValue]
+    concise_kgtk_values: typing.List[typing.Optional[KgtkValue]]
+    dict_row: typing.Mapping[str, str]
+    kgtk_value_dict: typing.Mapping[str, str]
+    if args.test_method == "rows":
+        if args.verbose:
+            print("Testing iterating over rows.", flush=True)
+        for row in kr:
+            line_count += 1
+
+    elif args.test_method == "concise-rows":
+        if args.verbose:
+            print("Testing iterating over concise rows.", flush=True)
+        for row in kr.concise_rows():
+            line_count += 1
+
+    elif args.test_method == "kgtk-values":
+        if args.verbose:
+            print("Testing iterating over KgtkValue rows.", flush=True)
+        for kgtk_values in kr.kgtk_values(validate=args.test_validate):
+            line_count += 1
+
+    elif args.test_method == "concise-kgtk-values":
+        if args.verbose:
+            print("Testing iterating over concise KgtkValue rows.", flush=True)
+        for kgtk_values in kr.concise_kgtk_values(validate=args.test_validate):
+            line_count += 1
+            
+    elif args.test_method == "dicts":
+        if args.verbose:
+            print("Testing iterating over dicts.", flush=True)
+        for dict_row in kr.dicts():
+            line_count += 1
+            
+    elif args.test_method == "concise-dicts":
+        if args.verbose:
+            print("Testing iterating over concise dicts.", flush=True)
+        for dict_row in kr.dicts(concise=True):
+            line_count += 1
+            
+    elif args.test_method == "kgtk-value-dicts":
+        if args.verbose:
+            print("Testing iterating over KgtkValue dicts.", flush=True)
+        for kgtk_value_dict in kr.kgtk_value_dicts(validate=args.test_validate):
+            line_count += 1
+            
+    elif args.test_method == "concise-kgtk-value-dicts":
+        if args.verbose:
+            print("Testing iterating over concise KgtkValue dicts.", flush=True)
+        for kgtk_value_dict in kr.kgtk_value_dicts(concise=True, validate=args.test_validate):
+            line_count += 1
+            
     print("Read %d lines" % line_count, file=error_file, flush=True)
 
 if __name__ == "__main__":

From 6bc54bb6f77f6ce8e5a953b83048752da56089bb Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 7 May 2020 12:25:16 -0700
Subject: [PATCH 110/278] Add documentation on the available iterators.

---
 kgtk/join/kgtkreader.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index c1dd3bbe4..1b2c321e8 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -1,7 +1,19 @@
-"""
-Read a KGTK node or edge file in TSV format.
+"""Read a KGTK node or edge file in TSV format.
+
+Normally, results are obtained as rows of string values obtained by iteration
+on the KgtkReader object.  Alternative iterators are available to return the results
+as:
+
+ * concise_rows:                   lists of strings with empty fields converted to None
+ * kgtk_values:                    lists of KgtkValue objects
+ * concise_kgtk_values:            lists of KgtkValue objects with empty fields converted to None
+ * dicts:                          dicts of strings
+ * dicts(concise=True):            dicts of strings with empty fields omitted
+ * kgtk_value_dicts:               dicts of KgtkValue objects
+ * kgtk_value_dicts(concise=True): dicts of KgtkValue objects with empty fields omitted
 
 TODO: Add support for alternative envelope formats, such as JSON.
+
 """
 
 from argparse import ArgumentParser

From 5e9b44ebb9adef799b45756b2fc8b13408a78e80 Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Thu, 7 May 2020 13:06:16 -0700
Subject: [PATCH 111/278] update property-value template

---
 kgtk/cli/text_embedding_README.md | 14 ++++++++------
 kgtk/gt/embedding_utils.py        | 18 +++++++++++++-----
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/kgtk/cli/text_embedding_README.md b/kgtk/cli/text_embedding_README.md
index 17753bd8c..a2b431016 100644
--- a/kgtk/cli/text_embedding_README.md
+++ b/kgtk/cli/text_embedding_README.md
@@ -13,7 +13,7 @@ kgtk text_embedding \
     --model/ -m <list_of_string> \  # optional, default is `bert-base-wikipedia-sections-mean-tokens`
     --label-properties <list_of_string> \ # optional, default is ["label"]
     --description-properties <list_of_string> \ # optional, default is ["description"]
-    --isa-properties <list_of_string> \ # optional, default is ["P279"]
+    --isa-properties <list_of_string> \ # optional, default is ["P31"]
     --has-properties <list_of_string> \ # optional, default is ["all"]
     --property-labels-file/ -p <string> \ #optional
     --output-format <string> # optional, default is `kgtk_format`
@@ -113,15 +113,16 @@ If not given, the program will try to use the default edge(property) name as `de
 
 ##### --isa-properties
 an ordered list of properties. When a property contains multiple values, the first value will selected. When a property value is not a literal, output the label of the property value. When multiple isa-properties are present, the values are output comma-separated.
-If not given, the program will try to use the default edge(property) name as `P279`. Those words in properties will be for vector embedding later.
+If not given, the program will try to use the default edge(property) name as `P31`. Those words in properties will be for vector embedding later.
 
 ##### --has-properties
 an ordered list of properties. The output consists of a comma-separated text with the labels of the properties, using and for the last item, e.g., “country, place of birth, religion and canonization status” .
 If not given, the program will use all of the found properties found for the node. Those words in properties will be for vector embedding later.
 
 ##### --property-value
-If the properties in `has-properties` is a property which need to check for details, specify the edge name here and the system will go further to get the property values of this node instead of use the name of this edge. Default is empty `[]`
-For example: For wikidata node `Q41421` (Michael Jordan) `P544` (member of sports team), if specified here, the generated sentence will be "Michael Jordan has Chicago Bulls" instead of "Michael Jordan has member of sports team". 
+If the properties in `has-properties` is a property which need to check for details, specify the edge name here and the system will go further to get the property values of this node instead of use the name of this edge (using template `{property} {value}`) instead of `{property}` to represent this has-property). Default is empty `[]`
+
+For example: For wikidata node `Q41421` (Michael Jordan) `P544` (member of sports team), if specified here, the generated sentence will be `Michael Jordan, ..., has member of sports team Chicago Bulls` instead of `Michael Jordan,..., has member of sports team`. 
 
 ##### --out-properties
 the property used to record the embedding. If not given, the program will use the edge(property) name as `text_embedding`.
@@ -129,9 +130,10 @@ This option is only available when output format is set to `kgtk_format`.
 
 ##### --property-labels-file
 This parameter only works for KGTK format input. For some condition, KGTK format's value is just a reference to another P node. In this condition, user need to specify another label file for KGTK to read.
+
 For example, if run without the labels file on the wikidata dump file, we will get some generated sentence like:
-`WALS genus code is a Q19847637, Q20824104, and has P1466 and P1468` (sentence generated for P1467). After add the labels file, we will get the correct sentence as: `WALS genus code is a Wikidata property for an identifier, Wikidata property for items about languages, and has WALS family code and WALS lect code`.
-This property labels file should also be a KGTK format file. One example file is [here](https://drive.google.com/open?id=1F7pb4LEx5MT1YTqycUCQcs8H2OWmBbB6 "here") (accessed only available for KGTK developers).
+`WALS genus code is a Q19847637, Q20824104, and has P1855 and P2302` (sentence generated for P1467). After add the labels file, we will get the correct sentence as: `WALS genus code is a Wikidata property for an identifier, Wikidata property for items about languages, and has WALS family code and WALS lect code`.
+This property labels file should also be a KGTK format file. One example file is [here](https://drive.google.com/open?id=1F7pb4LEx5MT1YTqycUCQcs8H2OWmBbB6 "here") (accessable only for KGTK developers).
 
 
 #### Dimensional Reduction Algorithm
diff --git a/kgtk/gt/embedding_utils.py b/kgtk/gt/embedding_utils.py
index 1e57491f2..429d37b47 100644
--- a/kgtk/gt/embedding_utils.py
+++ b/kgtk/gt/embedding_utils.py
@@ -411,6 +411,9 @@ def read_input(self, file_path: str, target_properties: dict, property_labels_di
                 current_process_node_id = None
 
                 if self._parallel_count > 1:
+                    # need to set with spawn mode to initialize with multiple cuda in multiprocess
+                    from multiprocessing import set_start_method
+                    set_start_method('spawn')
                     pp = ParallelProcessor(self._parallel_count, self._process_one, collector=self._multiprocess_collector)
                     pp.start()
 
@@ -428,9 +431,9 @@ def read_input(self, file_path: str, target_properties: dict, property_labels_di
                         node_value = node_value[:node_value.index("@")]
 
                     # remove extra double quote " and single quote '
-                    if node_value[0] == '"' and node_value[-1] == '"':
+                    while node_value[0] == '"' and node_value[-1] == '"':
                         node_value = node_value[1:-1]
-                    if node_value[0] == "'" and node_value[-1] == "'":
+                    while node_value[0] == "'" and node_value[-1] == "'":
                         node_value = node_value[1:-1]
 
                     if current_process_node_id != node_id:
@@ -458,12 +461,17 @@ def read_input(self, file_path: str, target_properties: dict, property_labels_di
                     if node_property in properties_reversed:
                         roles = properties_reversed[node_property]
                         if "property_values" in roles:
+                            # for property values part, changed to be "{property} {value}"
+                            node_value = self.get_real_label_name(node_property) + " " + self.get_real_label_name(node_value)
+                        else:
                             node_value = self.get_real_label_name(node_value)
                         for each_role in roles:
-                            if each_role != "property_values":
+                            if each_role == "property_values" and "has_properties" not in roles:
+                                each_node_attributes["has_properties"].append(node_value)
+                            else:
                                 each_node_attributes[each_role].append(node_value)
-                    if add_all_properties and each_line[column_references["value"]][0] == "P":
-                        each_node_attributes["has_properties"].append(self.get_real_label_name(node_value))
+                    elif add_all_properties:  # add remained properties if need all properties
+                        each_node_attributes["has_properties"].append(self.get_real_label_name(node_property))
 
                 # close multiprocess pool
                 if self._parallel_count > 1:

From 3e87f3cb2d38b5290ef35b1ab03a898b7ab21218 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 7 May 2020 13:27:50 -0700
Subject: [PATCH 112/278] Support kgtk ifexists and kgtk ifnotexists.

---
 kgtk/cli/ifexists.py    | 132 +++++++++++++++++++++++++++++++++++++++
 kgtk/cli/ifnotexists.py | 134 ++++++++++++++++++++++++++++++++++++++++
 kgtk/join/ifexists.py   |  41 ++++++++----
 3 files changed, 295 insertions(+), 12 deletions(-)
 create mode 100644 kgtk/cli/ifexists.py
 create mode 100644 kgtk/cli/ifnotexists.py

diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
new file mode 100644
index 000000000..fdff15e56
--- /dev/null
+++ b/kgtk/cli/ifexists.py
@@ -0,0 +1,132 @@
+"""Filter a KGTK file based on whether one or more records exist in a second
+KGTK file with matching values for one or more fields.
+"""
+
+from pathlib import Path
+import sys
+import typing
+
+from kgtk.join.enumnameaction import EnumNameAction
+from kgtk.join.kgtkformat import KgtkFormat
+from kgtk.join.ifexists import IfExists
+from kgtk.join.kgtkreader import KgtkReader
+from kgtk.join.kgtkwriter import KgtkWriter
+from kgtk.join.kgtkvalueoptions import KgtkValueOptions
+from kgtk.join.validationaction import ValidationAction
+
+def parser():
+    return {
+        'help': 'Filter a KGTK file based on whether one or more records exist in a second KGTK file with matching values for one or more fields.'
+    }
+
+
+def add_arguments(parser):
+    """
+    Parse arguments
+    Args:
+        parser (argparse.ArgumentParser)
+    """
+    parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter ('left' file). May be omitted or '-' for stdin.", type=Path)
+
+    parser.add_argument(      "--filter-on", dest="filter_kgtk_file", help="The KGTK file to filter against ('right' file).", type=Path, required=True)
+
+    parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write", type=Path, default=None)
+
+    parser.add_argument(      "--left-keys", dest="left_keys", help="The key columns in the file being filtered.", nargs='*')
+
+    parser.add_argument(      "--right-keys", dest="right_keys", help="The key columns in the filter-on file.", nargs='*')
+
+
+    # A subset of common arguments:
+    errors_to = parser.add_mutually_exclusive_group()
+    errors_to.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
+                              help="Send errors to stdout instead of stderr (default)", action="store_true")
+    errors_to.add_argument(      "--errors-to-stderr", dest="errors_to_stderr",
+                              help="Send errors to stderr instead of stdout", action="store_true")
+
+    parser.add_argument(      "--error-limit", dest="error_limit",
+                              help="The maximum number of errors to report before failing", type=int, default=KgtkReader.ERROR_LIMIT_DEFAULT)
+
+    parser.add_argument(      "--field-separator", dest="field_separator",
+                              help="Field separator.", type=str, default=IfExists.FIELD_SEPARATOR_DEFAULT)
+
+    parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
+    
+    parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
+    
+
+
+    # Note: Any arguments described by KgtkValueOptions.add_arguments(...)
+    # need to be included in the arguments to run(...), below.
+    KgtkValueOptions.add_arguments(parser)
+
+
+def run(input_kgtk_file: typing.Optional[Path],
+        filter_kgtk_file: Path,
+        output_kgtk_file: typing.Optional[Path],
+        left_keys: typing.Optional[typing.List[str]],
+        right_keys: typing.Optional[typing.List[str]],
+        
+        # Some common arguments:
+        errors_to_stdout: bool = False,
+        errors_to_stderr: bool = False,
+        error_limit: int = KgtkReader.ERROR_LIMIT_DEFAULT,
+        field_separator: str = IfExists.FIELD_SEPARATOR_DEFAULT,
+        verbose: bool = False,
+        very_verbose: bool = False,
+
+        # Arguments from KgtkValueOptions:
+        additional_language_codes: typing.Optional[typing.List[str]] = None,
+        allow_language_suffixes: bool = False,
+        allow_lax_strings: bool = False,
+        allow_lax_lq_strings: bool = False,
+        allow_month_or_day_zero: bool = False,
+        repair_month_or_day_zero: bool = False,
+        minimum_valid_year: int = KgtkValueOptions.MINIMUM_VALID_YEAR,
+        maximum_valid_year: int = KgtkValueOptions.MAXIMUM_VALID_YEAR,
+        escape_list_separators: bool = False,
+
+)->int:
+    # import modules locally
+    from kgtk.exceptions import KGTKException
+
+
+    if input_kgtk_file is None:
+        input_kgtk_file = Path("-")
+
+    # Select where to send error messages, defaulting to stderr.
+    # (Not used yet)
+    error_file: typing.TextIO = sys.stderr if errors_to_stderr else sys.stdout
+
+    # Build the value parsing option structure.
+    value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=allow_month_or_day_zero,
+                                                       repair_month_or_day_zero=repair_month_or_day_zero,
+                                                       allow_lax_strings=allow_lax_strings,
+                                                       allow_lax_lq_strings=allow_lax_lq_strings,
+                                                       allow_language_suffixes=allow_language_suffixes,
+                                                       additional_language_codes=additional_language_codes,
+                                                       minimum_valid_year=minimum_valid_year,
+                                                       maximum_valid_year=maximum_valid_year,
+                                                       escape_list_separators=escape_list_separators)
+
+    try:
+        ie: IfExists = IfExists(left_file_path=input_kgtk_file,
+                                right_file_path=filter_kgtk_file,
+                                output_path=output_kgtk_file,
+                                left_keys=left_keys,
+                                right_keys=right_keys,
+                                field_separator=field_separator,
+                                value_options=value_options,
+                                error_limit=error_limit,
+                                verbose=verbose,
+                                very_verbose=very_verbose)
+        
+        ie.process()
+
+        return 0
+
+    except SystemExit as e:
+        raise KGTKException("Exit requested")
+    except Exception as e:
+        raise KGTKException(str(e))
+
diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
new file mode 100644
index 000000000..e9003a9ef
--- /dev/null
+++ b/kgtk/cli/ifnotexists.py
@@ -0,0 +1,134 @@
+"""Filter a KGTK file based on whether one or more records do not exist in a
+second KGTK file with matching values for one or more fields.
+
+"""
+
+from pathlib import Path
+import sys
+import typing
+
+from kgtk.join.enumnameaction import EnumNameAction
+from kgtk.join.kgtkformat import KgtkFormat
+from kgtk.join.ifexists import IfExists
+from kgtk.join.kgtkreader import KgtkReader
+from kgtk.join.kgtkwriter import KgtkWriter
+from kgtk.join.kgtkvalueoptions import KgtkValueOptions
+from kgtk.join.validationaction import ValidationAction
+
+def parser():
+    return {
+        'help': 'Filter a KGTK file based on whether one or more records do not exist in a second KGTK file with matching values for one or more fields.'
+    }
+
+
+def add_arguments(parser):
+    """
+    Parse arguments
+    Args:
+        parser (argparse.ArgumentParser)
+    """
+    parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter ('left' file). May be omitted or '-' for stdin.", type=Path)
+
+    parser.add_argument(      "--filter-on", dest="filter_kgtk_file", help="The KGTK file to filter against ('right' file).", type=Path, required=True)
+
+    parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write", type=Path, default=None)
+
+    parser.add_argument(      "--left-keys", dest="left_keys", help="The key columns in the file being filtered.", nargs='*')
+
+    parser.add_argument(      "--right-keys", dest="right_keys", help="The key columns in the filter-on file.", nargs='*')
+
+
+    # A subset of common arguments:
+    errors_to = parser.add_mutually_exclusive_group()
+    errors_to.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
+                              help="Send errors to stdout instead of stderr (default)", action="store_true")
+    errors_to.add_argument(      "--errors-to-stderr", dest="errors_to_stderr",
+                              help="Send errors to stderr instead of stdout", action="store_true")
+
+    parser.add_argument(      "--error-limit", dest="error_limit",
+                              help="The maximum number of errors to report before failing", type=int, default=KgtkReader.ERROR_LIMIT_DEFAULT)
+
+    parser.add_argument(      "--field-separator", dest="field_separator",
+                              help="Field separator.", type=str, default=IfExists.FIELD_SEPARATOR_DEFAULT)
+
+    parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
+    
+    parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
+    
+
+
+    # Note: Any arguments described by KgtkValueOptions.add_arguments(...)
+    # need to be included in the arguments to run(...), below.
+    KgtkValueOptions.add_arguments(parser)
+
+
+def run(input_kgtk_file: typing.Optional[Path],
+        filter_kgtk_file: Path,
+        output_kgtk_file: typing.Optional[Path],
+        left_keys: typing.Optional[typing.List[str]],
+        right_keys: typing.Optional[typing.List[str]],
+        
+        # Some common arguments:
+        errors_to_stdout: bool = False,
+        errors_to_stderr: bool = False,
+        error_limit: int = KgtkReader.ERROR_LIMIT_DEFAULT,
+        field_separator: str = IfExists.FIELD_SEPARATOR_DEFAULT,
+        verbose: bool = False,
+        very_verbose: bool = False,
+
+        # Arguments from KgtkValueOptions:
+        additional_language_codes: typing.Optional[typing.List[str]] = None,
+        allow_language_suffixes: bool = False,
+        allow_lax_strings: bool = False,
+        allow_lax_lq_strings: bool = False,
+        allow_month_or_day_zero: bool = False,
+        repair_month_or_day_zero: bool = False,
+        minimum_valid_year: int = KgtkValueOptions.MINIMUM_VALID_YEAR,
+        maximum_valid_year: int = KgtkValueOptions.MAXIMUM_VALID_YEAR,
+        escape_list_separators: bool = False,
+
+)->int:
+    # import modules locally
+    from kgtk.exceptions import KGTKException
+
+
+    if input_kgtk_file is None:
+        input_kgtk_file = Path("-")
+
+    # Select where to send error messages, defaulting to stderr.
+    # (Not used yet)
+    error_file: typing.TextIO = sys.stderr if errors_to_stderr else sys.stdout
+
+    # Build the value parsing option structure.
+    value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=allow_month_or_day_zero,
+                                                       repair_month_or_day_zero=repair_month_or_day_zero,
+                                                       allow_lax_strings=allow_lax_strings,
+                                                       allow_lax_lq_strings=allow_lax_lq_strings,
+                                                       allow_language_suffixes=allow_language_suffixes,
+                                                       additional_language_codes=additional_language_codes,
+                                                       minimum_valid_year=minimum_valid_year,
+                                                       maximum_valid_year=maximum_valid_year,
+                                                       escape_list_separators=escape_list_separators)
+
+    try:
+        ie: IfExists = IfExists(left_file_path=input_kgtk_file,
+                                right_file_path=filter_kgtk_file,
+                                output_path=output_kgtk_file,
+                                invert=True,
+                                left_keys=left_keys,
+                                right_keys=right_keys,
+                                field_separator=field_separator,
+                                value_options=value_options,
+                                error_limit=error_limit,
+                                verbose=verbose,
+                                very_verbose=very_verbose)
+        
+        ie.process()
+
+        return 0
+
+    except SystemExit as e:
+        raise KGTKException("Exit requested")
+    except Exception as e:
+        raise KGTKException(str(e))
+
diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index c5ce50257..beea083b6 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -37,6 +37,8 @@ class IfExists(KgtkFormat):
     right_file_path: Path = attr.ib(validator=attr.validators.instance_of(Path))
     output_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
 
+    invert: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+
     left_keys: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.optional(attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
                                                                                                                             iterable_validator=attr.validators.instance_of(list))),
                                                            default=None)
@@ -113,8 +115,13 @@ def get_key_columns(self, supplied_keys: typing.Optional[typing.List[str]], kr:
     def build_key(self, row: typing.List[str], key_columns: typing.List[int])->str:
         key: str = ""
         idx: int
+        first: bool = True
         for idx in key_columns:
-            key += self.field_separator+ row[idx]
+            if first:
+                first = False
+            else:
+                key += self.field_separator
+            key += row[idx]
         return key
 
     def extract_key_set(self, kr: KgtkReader, who: str, key_columns: typing.List[int])->typing.Set[str]:
@@ -162,8 +169,10 @@ def process(self):
         if self.verbose:
             print("Building the input key set from %s" % self.right_file_path, flush=True)
         key_set: typint.Set[str] = self.extract_key_set(right_kr, "right", right_key_columns)
-        if self.verbose:
+        if self.verbose or self.very_verbose:
             print("There are %d entries in the key set." % len(key_set))
+            if self.very_verbose:
+                print("Keys: %s" % " ".join(key_set))
 
         if self.verbose:
             print("Opening the output file: %s" % self.output_path, flush=True)
@@ -185,23 +194,29 @@ def process(self):
         for row in left_kr:
             input_line_count += 1
             left_key: str = self.build_key(row, left_key_columns)
-            if left_key in key_set:
-                ew.write(row)
-                output_line_count += 1
-        ew.close()
+            if self.invert:
+                if left_key not in key_set:
+                    ew.write(row)
+                    output_line_count += 1
+            else:
+                if left_key in key_set:
+                    ew.write(row)
+                    output_line_count += 1
 
         if self.verbose:
-            print("Read %d records, write %d records." % (input_line_count, output_line_count))
+            print("Read %d records, wrote %d records." % (input_line_count, output_line_count), flush=True)
         
+        ew.close()
+
 def main():
     """
     Test the KGTK file joiner.
     """
     parser = ArgumentParser()
 
-    parser.add_argument(dest="left_file_path", help="The left KGTK file to join", type=Path)
+    parser.add_argument(dest="left_kgtk_file", help="The left KGTK file to join", type=Path)
 
-    parser.add_argument(dest="right_file_path", help="The right KGTK file to join", type=Path)
+    parser.add_argument(dest="right_kgtk_file", help="The right KGTK file to join", type=Path)
 
     parser.add_argument(      "--error-limit", dest="error_limit",
                               help="The maximum number of errors to report before failing", type=int, default=KgtkReader.ERROR_LIMIT_DEFAULT)
@@ -213,6 +228,8 @@ def main():
 
     parser.add_argument(      "--gzip-in-parallel", dest="gzip_in_parallel", help="Execute gzip in parallel.", action='store_true')
 
+    parser.add_argument(      "--invert", dest="invert", help="Invert the test (if not exists).", action='store_true')
+
     parser.add_argument(      "--left-keys", dest="left_keys", help="The key columns in the left file.", nargs='*')
 
     parser.add_argument(      "--long-line-action", dest="long_line_action",
@@ -241,9 +258,10 @@ def main():
     # Build the value parsing option structure.
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
-    ie: IfExists = IfExists(left_file_path=args.left_file_path,
-                            right_file_path=args.right_file_path,
+    ie: IfExists = IfExists(left_file_path=args.left_kgtk_file,
+                            right_file_path=args.right_kgtk_file,
                             output_path=args.output_file_path,
+                            invert=args.invert,
                             left_keys=args.left_keys,
                             right_keys=args.right_keys,
                             field_separator=args.field_separator,
@@ -261,4 +279,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-

From 33b2c91dc28707877702e39ea52c12baaf1e18a9 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Thu, 7 May 2020 14:33:38 -0700
Subject: [PATCH 113/278] added a readme

---
 kgtk/cli/generate_wikidata_triples.md | 114 ++++++++++++++++++++++++++
 kgtk/cli/generate_wikidata_triples.py |   2 +-
 2 files changed, 115 insertions(+), 1 deletion(-)
 create mode 100644 kgtk/cli/generate_wikidata_triples.md

diff --git a/kgtk/cli/generate_wikidata_triples.md b/kgtk/cli/generate_wikidata_triples.md
new file mode 100644
index 000000000..984db2541
--- /dev/null
+++ b/kgtk/cli/generate_wikidata_triples.md
@@ -0,0 +1,114 @@
+## The generate_wikidata_triples command converts a kgtk file to a ttl file that can be loaded into a wikidata Blazegraph.
+
+The triple generator take a tab-separated kgtk file from standard input.
+```
+node1	property	node2	id
+Q2140726727_mag_author	P6366	2140726727	id1
+Q2140726727_mag_author	label	Zunyou Wu@en	id2
+Q2140726727_mag_author	P1416	Q184490438_mag_affiliation	id3
+Q184490438_mag_affiliation	label	Chinese Center For Disease Control And Prevention@en	id4
+```
+to an rdf file like this.
+
+```
+rdfs:label "Zunyou Wu"@en ;
+schema:name "Zunyou Wu"@en ;
+skos:prefLabel "Zunyou Wu"@en ;
+p:P1416 wds:Q2140726727_mag_author-abcdefg ;
+p:P6366 wds:Q2140726727_mag_author-abcdefg ;
+wdt:P1416 wd:Q184490438_mag_affiliation ;
+wdt:P6366 "2140726727"^^xsd:string .
+
+```
+
+
+## Required Option
+
+- `--pf --property-types {path}`: path to the file which contains the property datatype mapping in kgtk format.
+
+## Optional Options
+
+- `-lp --label-property {str}`: property identifiers which will create labels, separated by comma','. Default to **label**.
+- `-ap --alias-property {str}`: alias identifiers which will create labels, separated by comma','. Default to **aliases**.
+- `-dp --description-property {str}`: description identifiers which will create labels, separated by comma','. Default to **descriptions**.
+- `-gt --generate-truthy {bool}`: the default is to not generate truthy triples. Specify this option to generate truthy triples. Default to **Yes**.
+- `-ig --ignore {bool}`: if set to yes, ignore various kinds of exceptions and mistakes and log them to a log file with line number in input file, rather than stopping. logging. Default to **False**.
+- `-n --output-n-lines {number}`: output triples approximately every {n} lines of reading stdin. Default to **1000**.
+- `-gz --use-gz {number}`: if set to yes, read from compressed gz file. Default to **False**.
+- `-sid --use-id {bool}`: if set to yes, the id in the edge will be used as statement id when creating statement or truthy statement. Default to **False**
+
+
+## Shared Options
+
+- `--debug` run the command in debug mode.
+
+### property-types
+
+**--property-types** is the most important input file. It is also a kgtk file. Here is an example file `example_prop.tsv`
+
+```
+node1	label	node2
+P493	property_type	external-identifier
+P494	property_type	external-identifier
+P495	property_type	item
+P496	property_type	external-identifier
+P497	property_type	external-identifier
+P498	property_type	external-identifier
+P500	property_type	item
+P501	property_type	item
+P502	property_type	string
+```
+The header line is necessary. If property *P493* is used in the input kgtk file, then the edge `P493	property_type	external-identifier` must exists in the `example_prop.tsv` to tell triple generator that the object of `P493` is an external-identifier. If `p495` is used in the input kgtk file, then the object of `P495` will be treated as an entity.
+
+### label, aliases and descriptions
+
+**-lp**, **-ap**, **-dp** defines how you want the triple generator to identify the label, description and aliases. 
+
+For example, if you have `-ap aliases,alias`, then when the following edge is met, both `Alice` and `Alicia` will be treated as aliases to the node `q2020`.
+
+```
+node1	property	node2	id
+q2020	aliases	Alice@en	id1
+q2020	alias	Alicia@sp	id2
+```
+
+### truthy
+
+If `-gt --generate-truthy` set to `True`, the statement will be truthy. Truthy statements will have an additional spo with propert prefix `wdt`.
+
+### ignore
+
+ignore allows you to ignore various kind of errors written to the `ignore.log` file.
+
+### n
+
+`n` controls after how many lines of reading the standard input, To achieve optimal performance, you can set n larb b d d d d d d d dger to reduce overhead of creating knowledge graph object and frequent serialization. However, large n also requires larger memory.
+
+### gz
+
+Use compressed file as input.
+
+### use-id
+
+If `--use-id` is set to true, the `id` column of the kgtk file will be used as the statement id if the corresponding edge is a statement edge. It is the user's responsiblity to make sure there is no duplicated statement id across the whole knowledge graph then.
+
+## Usage
+
+
+### Standard Usage
+
+```bash
+
+kgtk generate_wikidata_triples -pf example_prop.tsv < input_file.tsv > output_file.ttl
+
+```
+
+### Run in parallel
+
+You can split the input files into several smaller pieces and run the command simultaneuously. 
+
+Let's say you are in a directory which contains the `tsv` files. The following command will generate the `ttl` files with the same file name. 
+
+```bash
+ls *tsv | parallel -j+0 --eta 'kgtk generate_wikidata_triples -pf example_props.tsv -n 1000 -ig no --debug -gt yes < {} > {.}.ttl'
+```
diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index cdc8b44ad..cc21f5c2a 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -89,7 +89,7 @@ def add_arguments(parser):
         type=str2bool,
         required = False,
         default="yes",
-        help="the default is to not generate truthy triples. Specify this option to generate truthy triples. NOTIMPLEMENTED",
+        help="the default is to not generate truthy triples. Specify this option to generate truthy triples.",
         dest="truthy",
     )
     parser.add_argument(

From c6348bf538bf13ca4a4e5dae049d2a914876c724 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 7 May 2020 15:06:11 -0700
Subject: [PATCH 114/278] Put the vallue option arguments in one group.

---
 kgtk/join/kgtkvalueoptions.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/kgtk/join/kgtkvalueoptions.py b/kgtk/join/kgtkvalueoptions.py
index b061b92e3..fddda19e7 100644
--- a/kgtk/join/kgtkvalueoptions.py
+++ b/kgtk/join/kgtkvalueoptions.py
@@ -59,51 +59,52 @@ class KgtkValueOptions:
 
     @classmethod
     def add_arguments(cls, parser: ArgumentParser):
-        parser.add_argument(      "--additional-language-codes", dest="additional_language_codes",
+        vgroup = parser.add_argument_group("KgtkValueOptions", "Options controlling the parsing and repair of KGTK data values.")
+        vgroup.add_argument(      "--additional-language-codes", dest="additional_language_codes",
                                   help="Additional language codes.", nargs="*", default=None)
 
-        lsgroup= parser.add_mutually_exclusive_group()
+        lsgroup= vgroup.add_mutually_exclusive_group()
         lsgroup.add_argument(      "--allow-language-suffixes", dest="allow_language_suffixes",
                                    help="Allow language identifier suffixes starting with a dash.", action='store_true', default=True)
 
         lsgroup.add_argument(      "--disallow-language-suffixes", dest="allow_language_suffixes",
                                    help="Disallow language identifier suffixes starting with a dash.", action='store_false')
 
-        laxgroup= parser.add_mutually_exclusive_group()
+        laxgroup= vgroup.add_mutually_exclusive_group()
         laxgroup.add_argument(      "--allow-lax-strings", dest="allow_lax_strings",
                                     help="Do not check if double quotes are backslashed inside strings.", action='store_true', default=False)
 
         laxgroup.add_argument(      "--disallow-lax-strings", dest="allow_lax_strings",
                                     help="Check if double quotes are backslashed inside strings.", action='store_false')
 
-        lqgroup= parser.add_mutually_exclusive_group()
+        lqgroup= vgroup.add_mutually_exclusive_group()
         lqgroup.add_argument(      "--allow-lax-lq-strings", dest="allow_lax_lq_strings",
                                    help="Do not check if single quotes are backslashed inside language qualified strings.", action='store_true', default=False)
 
         lqgroup.add_argument(      "--disallow-lax-lq-strings", dest="allow_lax_lq_strings",
                                    help="Check if single quotes are backslashed inside language qualified strings.", action='store_false')
 
-        amd0group= parser.add_mutually_exclusive_group()
+        amd0group= vgroup.add_mutually_exclusive_group()
         amd0group.add_argument(      "--allow-month-or-day-zero", dest="allow_month_or_day_zero",
                                     help="Allow month or day zero in dates.", action='store_true', default=False)
 
         amd0group.add_argument(      "--disallow-month-or-day-zero", dest="allow_month_or_day_zero",
                                     help="Allow month or day zero in dates.", action='store_false')
 
-        rmd0group= parser.add_mutually_exclusive_group()
+        rmd0group= vgroup.add_mutually_exclusive_group()
         rmd0group.add_argument(      "--repair-month-or-day-zero", dest="repair_month_or_day_zero",
                                     help="Repair month or day zero in dates.", action='store_true', default=False)
 
         rmd0group.add_argument(      "--no-repair-month-or-day-zero", dest="repair_month_or_day_zero",
                                     help="Do not repair month or day zero in dates.", action='store_false')
 
-        parser.add_argument(      "--minimum-valid-year", dest="minimum_valid_year",
+        vgroup.add_argument(      "--minimum-valid-year", dest="minimum_valid_year",
                                   help="The minimum valid year in dates.", type=int, default=cls.MINIMUM_VALID_YEAR)
 
-        parser.add_argument(      "--maximum-valid-year", dest="maximum_valid_year",
+        vgroup.add_argument(      "--maximum-valid-year", dest="maximum_valid_year",
                                   help="The maximum valid year in dates.", type=int, default=cls.MAXIMUM_VALID_YEAR)
 
-        elsgroup= parser.add_mutually_exclusive_group()
+        elsgroup= vgroup.add_mutually_exclusive_group()
         elsgroup.add_argument(      "--escape-list-separators", dest="escape_list_separators",
                                     help="Escape all list separators instead of splitting on them.", action='store_true', default=False)
 

From 521bb10c0bd01bfafa6c42f0e9d4b63304834b6a Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 7 May 2020 16:29:46 -0700
Subject: [PATCH 115/278] Provide a better organization for the arguments.

---
 kgtk/join/edgereader.py       |   6 +-
 kgtk/join/kgtkreader.py       | 106 +++++++++++++++++++---------------
 kgtk/join/kgtkvalueoptions.py |   2 +-
 kgtk/join/nodereader.py       |   6 +-
 4 files changed, 67 insertions(+), 53 deletions(-)

diff --git a/kgtk/join/edgereader.py b/kgtk/join/edgereader.py
index d4d343148..3fe378fa0 100644
--- a/kgtk/join/edgereader.py
+++ b/kgtk/join/edgereader.py
@@ -149,7 +149,6 @@ def _skip_reserved_fields(self, column_name)->bool:
 
     @classmethod
     def add_arguments(cls, parser: ArgumentParser):
-        # super().add_arguments(parser)
         parser.add_argument(      "--blank-node1-line-action", dest="blank_node1_line_action",
                                   help="The action to take when a blank node1 field is detected.",
                                   type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
@@ -163,8 +162,9 @@ def main():
     Test the KGTK edge file reader.
     """
     parser = ArgumentParser()
-    KgtkReader.add_shared_arguments(parser)
-    EdgeReader.add_arguments(parser)
+    KgtkReader.add_operation_arguments(parser)
+    (fgroup, hgroup, lgroup) = KgtkReader.add_shared_arguments(parser)
+    EdgeReader.add_arguments(lgroup)
     KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
 
diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index 1b2c321e8..b694e9e8e 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -16,7 +16,7 @@
 
 """
 
-from argparse import ArgumentParser
+from argparse import ArgumentParser, _ArgumentGroup
 import attr
 import bz2
 from enum import Enum
@@ -822,74 +822,87 @@ def merge_columns(self, additional_columns: typing.List[str])->typing.List[str]:
         return merged_columns
 
     @classmethod
-    def add_shared_arguments(cls, parser: ArgumentParser):
-        parser.add_argument(dest="kgtk_file", help="The KGTK file to read", type=Path, nargs="?")
+    def add_operation_arguments(cls, parser: ArgumentParser):
+        errors_to = parser.add_mutually_exclusive_group()
+        errors_to.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
+                                     help="Send errors to stdout instead of stderr", action="store_true")
+        errors_to.add_argument(      "--errors-to-stderr", dest="errors_to_stderr",
+                                     help="Send errors to stderr instead of stdout", action="store_true")
 
-        parser.add_argument(      "--blank-required-field-line-action", dest="blank_line_action",
-                                  help="The action to take when a line with a blank node1, node2, or id field (per mode) is detected.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-                                  
-        parser.add_argument(      "--comment-line-action", dest="comment_line_action",
-                                  help="The action to take when a comment line is detected.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-        parser.add_argument(      "--column-separator", dest="column_separator",
-                                  help="Column separator.", type=str, default=cls.COLUMN_SEPARATOR)
+        parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
 
-        parser.add_argument(      "--compression-type", dest="compression_type", help="Specify the compression type.")
+        parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
+        
+    @classmethod
+    def add_shared_arguments(cls, parser: ArgumentParser)->typing.Tuple[_ArgumentGroup, _ArgumentGroup, _ArgumentGroup]:
+        parser.add_argument(dest="kgtk_file", help="The KGTK file to read", type=Path, nargs="?")
 
-        parser.add_argument(      "--empty-line-action", dest="empty_line_action",
-                                  help="The action to take when an empty line is detected.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+        fgroup: _ArgumentGroup = parser.add_argument_group("File options", "Options affecting file processing")
+        fgroup.add_argument(      "--column-separator", dest="column_separator",
+                                  help="Column separator.", type=str, default=cls.COLUMN_SEPARATOR)
 
-        parser.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
-                                  help="Send errors to stdout instead of stderr", action="store_true")
+        fgroup.add_argument(      "--compression-type", dest="compression_type", help="Specify the compression type.")
 
-        parser.add_argument(      "--error-limit", dest="error_limit",
+        fgroup.add_argument(      "--error-limit", dest="error_limit",
                                   help="The maximum number of errors to report before failing", type=int, default=cls.ERROR_LIMIT_DEFAULT)
 
-        parser.add_argument(      "--fill-short-lines", dest="fill_short_lines",
-                                  help="Fill missing trailing columns in short lines with empty values.", action='store_true')
+        fgroup.add_argument(      "--gzip-in-parallel", dest="gzip_in_parallel", help="Execute gzip in parallel.", action='store_true')
 
-        parser.add_argument(      "--force-column-names", dest="force_column_names", help="Force the column names.", nargs='+')
+        fgroup.add_argument(      "--gzip-queue-size", dest="gzip_queue_size",
+                                  help="Queue size for parallel gzip.", type=int, default=cls.GZIP_QUEUE_SIZE_DEFAULT)
 
-        parser.add_argument(      "--gzip-in-parallel", dest="gzip_in_parallel", help="Execute gzip in parallel.", action='store_true')
+        hgroup: _ArgumentGroup = parser.add_argument_group("Header parsing", "Options affecting header parsing")
 
-        parser.add_argument(      "--gzip-queue-size", dest="gzip_queue_size",
-                                  help="Queue size for parallel gzip.", type=int, default=cls.GZIP_QUEUE_SIZE_DEFAULT)
+        hgroup.add_argument(      "--force-column-names", dest="force_column_names", help="Force the column names.", nargs='+')
 
-        parser.add_argument(      "--header-error-action", dest="header_error_action",
+        hgroup.add_argument(      "--header-error-action", dest="header_error_action",
                                   help="The action to take when a header error is detected  Only ERROR or EXIT are supported.",
                                   type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXIT)
 
-        parser.add_argument(      "--invalid-value-action", dest="invalid_value_action",
-                                  help="The action to take when a data cell value is invalid.",
+        hgroup.add_argument(      "--skip-first-record", dest="skip_first_record",
+                                  help="Skip the first record when forcing column names.", action='store_true')
+
+        hgroup.add_argument(      "--unsafe-column-name-action", dest="unsafe_column_name_action",
+                                  help="The action to take when a column name is unsafe.",
                                   type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
 
-        parser.add_argument(      "--long-line-action", dest="long_line_action",
-                                  help="The action to take when a long line is detected.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+        lgroup: _ArgumentGroup = parser.add_argument_group("Line parsing", "Options affecting data line parsing")
 
-        parser.add_argument(      "--short-line-action", dest="short_line_action",
-                                  help="The action to take when a short line is detected.",
+        lgroup.add_argument(      "--blank-required-field-line-action", dest="blank_line_action",
+                                  help="The action to take when a line with a blank node1, node2, or id field (per mode) is detected.",
+                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+                                  
+        lgroup.add_argument(      "--comment-line-action", dest="comment_line_action",
+                                  help="The action to take when a comment line is detected.",
                                   type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
 
-        parser.add_argument(      "--skip-first-record", dest="skip_first_record", help="Skip the first record when forcing column names.", action='store_true')
+        lgroup.add_argument(      "--empty-line-action", dest="empty_line_action",
+                                  help="The action to take when an empty line is detected.",
+                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
 
-        parser.add_argument(      "--truncate-long-lines", dest="truncate_long_lines",
-                                  help="Remove excess trailing columns in long lines.", action='store_true')
+        lgroup.add_argument(      "--fill-short-lines", dest="fill_short_lines",
+                                  help="Fill missing trailing columns in short lines with empty values.", action='store_true')
 
-        parser.add_argument(      "--unsafe-column-name-action", dest="unsafe_column_name_action",
-                                  help="The action to take when a column name is unsafe.",
+        lgroup.add_argument(      "--invalid-value-action", dest="invalid_value_action",
+                                  help="The action to take when a data cell value is invalid.",
                                   type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
 
-        parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
+        lgroup.add_argument(      "--long-line-action", dest="long_line_action",
+                                  help="The action to take when a long line is detected.",
+                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
 
-        parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
+        lgroup.add_argument(      "--short-line-action", dest="short_line_action",
+                                  help="The action to take when a short line is detected.",
+                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
 
-        parser.add_argument(      "--whitespace-line-action", dest="whitespace_line_action",
+        lgroup.add_argument(      "--truncate-long-lines", dest="truncate_long_lines",
+                                  help="Remove excess trailing columns in long lines.", action='store_true')
+
+        lgroup.add_argument(      "--whitespace-line-action", dest="whitespace_line_action",
                                   help="The action to take when a whitespace line is detected.",
                                   type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+
+        return (fgroup, hgroup, lgroup)
                                   
     # May be overridden
     @classmethod
@@ -909,10 +922,11 @@ def main():
     from kgtk.join.nodereader import NodeReader
 
     parser = ArgumentParser()
-    KgtkReader.add_shared_arguments(parser)
-    KgtkReader.add_arguments(parser)
-    EdgeReader.add_arguments(parser)
-    NodeReader.add_arguments(parser)
+    KgtkReader.add_operation_arguments(parser)
+    (fgroup, hgroup, lgroup) = KgtkReader.add_shared_arguments(parser)
+    KgtkReader.add_arguments(fgroup)
+    EdgeReader.add_arguments(lgroup)
+    NodeReader.add_arguments(lgroup)
     KgtkValueOptions.add_arguments(parser)
 
     parser.add_argument(       "--test", dest="test_method", help="The test to perform",
diff --git a/kgtk/join/kgtkvalueoptions.py b/kgtk/join/kgtkvalueoptions.py
index fddda19e7..767f2d199 100644
--- a/kgtk/join/kgtkvalueoptions.py
+++ b/kgtk/join/kgtkvalueoptions.py
@@ -59,7 +59,7 @@ class KgtkValueOptions:
 
     @classmethod
     def add_arguments(cls, parser: ArgumentParser):
-        vgroup = parser.add_argument_group("KgtkValueOptions", "Options controlling the parsing and repair of KGTK data values.")
+        vgroup = parser.add_argument_group("Data value parsing", "Options controlling the parsing and processing of KGTK data values.")
         vgroup.add_argument(      "--additional-language-codes", dest="additional_language_codes",
                                   help="Additional language codes.", nargs="*", default=None)
 
diff --git a/kgtk/join/nodereader.py b/kgtk/join/nodereader.py
index 4f4189a6f..9fb88d260 100644
--- a/kgtk/join/nodereader.py
+++ b/kgtk/join/nodereader.py
@@ -130,7 +130,6 @@ def _skip_reserved_fields(self, column_name)->bool:
 
     @classmethod
     def add_arguments(cls, parser: ArgumentParser):
-        # super().add_arguments(parser)
         parser.add_argument(      "--blank-id-line-action", dest="blank_id_line_action",
                                   help="The action to take when a blank id field is detected.",
                                   type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
@@ -141,8 +140,9 @@ def main():
     Test the KGTK node file reader.
     """
     parser = ArgumentParser()
-    KgtkReader.add_shared_arguments(parser)
-    NodeReader.add_arguments(parser)
+    KgtkReader.add_operation_arguments(parser)
+    (fgroup, hgroup, lgroup) = KgtkReader.add_shared_arguments(parser)
+    NodeReader.add_arguments(lgroup)
     KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
 

From 80f2f94277c5749acd4005022c0e5676bdb4adfd Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 7 May 2020 16:57:54 -0700
Subject: [PATCH 116/278] Semicolons are frequently in column names, disable
 the check.

---
 kgtk/join/kgtkbase.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kgtk/join/kgtkbase.py b/kgtk/join/kgtkbase.py
index 7f6ed4afd..2299725d2 100644
--- a/kgtk/join/kgtkbase.py
+++ b/kgtk/join/kgtkbase.py
@@ -75,7 +75,7 @@ def check_column_name(cls,
         #    1) except inside "" and '' quoted strings
         # 4) Check for commas
         # 5) Check for vertical bars
-        # 6) Check for semicolons
+        # 6) Check for semicolons (disabled)
         #
         # TODO: It might be possible to make some of these checks more efficient.
         results: typing.List[str] = [ ]
@@ -90,8 +90,8 @@ def check_column_name(cls,
             results.append("Warning: Column name '%s' contains a comma (,)" % column_name)
         if "|" in column_name:
             results.append("Warning: Column name '%s' contains a vertical bar (|)" % column_name)
-        if ";" in column_name:
-            results.append("Warning: Column name '%s' contains a semicolon (;)" % column_name)
+        # if ";" in column_name:
+        #    results.append("Warning: Column name '%s' contains a semicolon (;)" % column_name)
         kv: KgtkValue = KgtkValue(column_name)
         if not kv.is_valid():
             results.append(kv.describe())

From a46eda01a01cabba1b2c4acce789f9c382107e39 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 7 May 2020 16:58:33 -0700
Subject: [PATCH 117/278] Plumb control over value checking into ifexists.

---
 kgtk/cli/ifexists.py  | 1 +
 kgtk/join/ifexists.py | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
index fdff15e56..a1e288b25 100644
--- a/kgtk/cli/ifexists.py
+++ b/kgtk/cli/ifexists.py
@@ -116,6 +116,7 @@ def run(input_kgtk_file: typing.Optional[Path],
                                 left_keys=left_keys,
                                 right_keys=right_keys,
                                 field_separator=field_separator,
+                                invalid_value_action=ValidationAction.PASS,
                                 value_options=value_options,
                                 error_limit=error_limit,
                                 verbose=verbose,
diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index beea083b6..0cab4735b 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -57,6 +57,7 @@ class IfExists(KgtkFormat):
     fill_short_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     truncate_long_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
+    invalid_value_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.PASS)
     # TODO: find a working validator
     # value_options: typing.Optional[KgtkValueOptions] = attr.ib(attr.validators.optional(attr.validators.instance_of(KgtkValueOptions)), default=None)
     value_options: typing.Optional[KgtkValueOptions] = attr.ib(default=None)
@@ -140,6 +141,7 @@ def process(self):
                                                long_line_action=self.long_line_action,
                                                fill_short_lines=self.fill_short_lines,
                                                truncate_long_lines=self.truncate_long_lines,
+                                               invalid_value_action=self.invalid_value_action,
                                                value_options = self.value_options,
                                                error_limit=self.error_limit,
                                                verbose=self.verbose,
@@ -153,6 +155,7 @@ def process(self):
                                                long_line_action=self.long_line_action,
                                                fill_short_lines=self.fill_short_lines,
                                                truncate_long_lines=self.truncate_long_lines,
+                                               invalid_value_action=self.invalid_value_action,
                                                value_options = self.value_options,
                                                error_limit=self.error_limit,
                                                verbose=self.verbose,
@@ -228,6 +231,10 @@ def main():
 
     parser.add_argument(      "--gzip-in-parallel", dest="gzip_in_parallel", help="Execute gzip in parallel.", action='store_true')
 
+    parser.add_argument(      "--invalid-value-action", dest="invalid_value_action",
+                              help="The action to take when an invalid data value is detected.",
+                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.PASS)
+
     parser.add_argument(      "--invert", dest="invert", help="Invert the test (if not exists).", action='store_true')
 
     parser.add_argument(      "--left-keys", dest="left_keys", help="The key columns in the left file.", nargs='*')
@@ -269,6 +276,7 @@ def main():
                             long_line_action=args.long_line_action,
                             fill_short_lines=args.fill_short_lines,
                             truncate_long_lines=args.truncate_long_lines,
+                            invalid_value_action=args.invalid_valid_action,
                             value_options=value_options,
                             gzip_in_parallel=args.gzip_in_parallel,
                             error_limit=args.error_limit,

From 8b697e46b65b46ecb22c1b3032e3d00255a6fd84 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 7 May 2020 19:51:45 -0700
Subject: [PATCH 118/278] Add option prefixes.  Share options differently.

---
 kgtk/join/edgereader.py |  13 +--
 kgtk/join/kgtkreader.py | 183 ++++++++++++++++++++++++----------------
 kgtk/join/nodereader.py |  10 +--
 3 files changed, 112 insertions(+), 94 deletions(-)

diff --git a/kgtk/join/edgereader.py b/kgtk/join/edgereader.py
index 3fe378fa0..93c25799b 100644
--- a/kgtk/join/edgereader.py
+++ b/kgtk/join/edgereader.py
@@ -147,24 +147,13 @@ def _skip_reserved_fields(self, column_name)->bool:
             return True
         return False
 
-    @classmethod
-    def add_arguments(cls, parser: ArgumentParser):
-        parser.add_argument(      "--blank-node1-line-action", dest="blank_node1_line_action",
-                                  help="The action to take when a blank node1 field is detected.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-        parser.add_argument(      "--blank-node2-line-action", dest="blank_node2_line_action",
-                                  help="The action to take when a blank node2 field is detected.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-    
 def main():
     """
     Test the KGTK edge file reader.
     """
     parser = ArgumentParser()
     KgtkReader.add_operation_arguments(parser)
-    (fgroup, hgroup, lgroup) = KgtkReader.add_shared_arguments(parser)
-    EdgeReader.add_arguments(lgroup)
+    KgtkReader.add_arguments(parser, edge_options=True)
     KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
 
diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index b694e9e8e..d79d20c9a 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -834,83 +834,123 @@ def add_operation_arguments(cls, parser: ArgumentParser):
         parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
         
     @classmethod
-    def add_shared_arguments(cls, parser: ArgumentParser)->typing.Tuple[_ArgumentGroup, _ArgumentGroup, _ArgumentGroup]:
-        parser.add_argument(dest="kgtk_file", help="The KGTK file to read", type=Path, nargs="?")
-
-        fgroup: _ArgumentGroup = parser.add_argument_group("File options", "Options affecting file processing")
-        fgroup.add_argument(      "--column-separator", dest="column_separator",
-                                  help="Column separator.", type=str, default=cls.COLUMN_SEPARATOR)
-
-        fgroup.add_argument(      "--compression-type", dest="compression_type", help="Specify the compression type.")
-
-        fgroup.add_argument(      "--error-limit", dest="error_limit",
-                                  help="The maximum number of errors to report before failing", type=int, default=cls.ERROR_LIMIT_DEFAULT)
-
-        fgroup.add_argument(      "--gzip-in-parallel", dest="gzip_in_parallel", help="Execute gzip in parallel.", action='store_true')
-
-        fgroup.add_argument(      "--gzip-queue-size", dest="gzip_queue_size",
-                                  help="Queue size for parallel gzip.", type=int, default=cls.GZIP_QUEUE_SIZE_DEFAULT)
-
-        hgroup: _ArgumentGroup = parser.add_argument_group("Header parsing", "Options affecting header parsing")
+    def add_arguments(cls,
+                      parser: ArgumentParser,
+                      node_options: bool = False,
+                      edge_options: bool = False,
+                      mode_options: bool = False,
+                      who: str = ""):
+        prefix1: str = "--" if len(who) == 0 else "--" + who + "-"
+        prefix2: str = "" if len(who) == 0 else who + "_"
+        prefix3: str = "" if len(who) == 0 else who + " "
+
+        parser.add_argument(dest=prefix2 + "kgtk_file", help="The KGTK file to read", type=Path, nargs="?")
+
+        fgroup: _ArgumentGroup = parser.add_argument_group(prefix3 + "File options",
+                                                           "Options affecting " + prefix3 + "processing")
+        fgroup.add_argument(prefix1 + "column-separator",
+                            dest=prefix2 + "column_separator",
+                            help="Column separator.", type=str, default=cls.COLUMN_SEPARATOR)
+
+        fgroup.add_argument(prefix1 + "compression-type",
+                            dest=prefix2 + "compression_type", help="Specify the compression type.")
+
+        fgroup.add_argument(prefix1 + "error-limit",
+                            dest=prefix2 + "error_limit",
+                            help="The maximum number of errors to report before failing", type=int, default=cls.ERROR_LIMIT_DEFAULT)
+
+        fgroup.add_argument(prefix1 + "gzip-in-parallel",
+                            dest=prefix2 + "gzip_in_parallel", help="Execute gzip in parallel.", action='store_true')
+
+        fgroup.add_argument(prefix1 + "gzip-queue-size",
+                            dest=prefix2 + "gzip_queue_size",
+                            help="Queue size for parallel gzip.", type=int, default=cls.GZIP_QUEUE_SIZE_DEFAULT)
+
+        if mode_options:
+            fgroup.add_argument(prefix1 + "mode",
+                                dest=prefix2 + "mode",
+                                help="Determine the KGTK file mode.",
+                                type=KgtkReader.Mode, action=EnumNameAction, default=KgtkReader.Mode.AUTO)
+            
+        hgroup: _ArgumentGroup = parser.add_argument_group(prefix3 + "Header parsing", "Options affecting header parsing")
 
-        hgroup.add_argument(      "--force-column-names", dest="force_column_names", help="Force the column names.", nargs='+')
+        hgroup.add_argument(prefix1 + "force-column-names",
+                            dest=prefix2 + "force_column_names", help="Force the column names.", nargs='+')
 
-        hgroup.add_argument(      "--header-error-action", dest="header_error_action",
-                                  help="The action to take when a header error is detected  Only ERROR or EXIT are supported.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXIT)
+        hgroup.add_argument(prefix1 + "header-error-action",
+                            dest=prefix2 + "header_error_action",
+                            help="The action to take when a header error is detected  Only ERROR or EXIT are supported.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXIT)
 
-        hgroup.add_argument(      "--skip-first-record", dest="skip_first_record",
-                                  help="Skip the first record when forcing column names.", action='store_true')
+        hgroup.add_argument(prefix1 + "skip-first-record",
+                            dest=prefix2 + "skip_first_record",
+                            help="Skip the first record when forcing column names.", action='store_true')
 
-        hgroup.add_argument(      "--unsafe-column-name-action", dest="unsafe_column_name_action",
-                                  help="The action to take when a column name is unsafe.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
+        hgroup.add_argument(prefix1 + "unsafe-column-name-action",
+                            dest=prefix2 + "unsafe_column_name_action",
+                            help="The action to take when a column name is unsafe.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
 
         lgroup: _ArgumentGroup = parser.add_argument_group("Line parsing", "Options affecting data line parsing")
 
-        lgroup.add_argument(      "--blank-required-field-line-action", dest="blank_line_action",
-                                  help="The action to take when a line with a blank node1, node2, or id field (per mode) is detected.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-                                  
-        lgroup.add_argument(      "--comment-line-action", dest="comment_line_action",
-                                  help="The action to take when a comment line is detected.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-        lgroup.add_argument(      "--empty-line-action", dest="empty_line_action",
-                                  help="The action to take when an empty line is detected.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-        lgroup.add_argument(      "--fill-short-lines", dest="fill_short_lines",
-                                  help="Fill missing trailing columns in short lines with empty values.", action='store_true')
-
-        lgroup.add_argument(      "--invalid-value-action", dest="invalid_value_action",
-                                  help="The action to take when a data cell value is invalid.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
-
-        lgroup.add_argument(      "--long-line-action", dest="long_line_action",
-                                  help="The action to take when a long line is detected.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-        lgroup.add_argument(      "--short-line-action", dest="short_line_action",
-                                  help="The action to take when a short line is detected.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-        lgroup.add_argument(      "--truncate-long-lines", dest="truncate_long_lines",
-                                  help="Remove excess trailing columns in long lines.", action='store_true')
-
-        lgroup.add_argument(      "--whitespace-line-action", dest="whitespace_line_action",
-                                  help="The action to take when a whitespace line is detected.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-        return (fgroup, hgroup, lgroup)
+        if node_options:
+            lgroup.add_argument(prefix1 + "blank-id-line-action",
+                                dest=prefix2 + "blank_id_line_action",
+                                help="The action to take when a blank id field is detected.",
+                                type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+
+        if edge_options:
+            lgroup.add_argument(prefix1 + "blank-node1-line-action",
+                                dest=prefix2 + "blank_node1_line_action",
+                                help="The action to take when a blank node1 field is detected.",
+                                type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+
+            lgroup.add_argument(prefix1 + "blank-node2-line-action",
+                                dest=prefix2 + "blank_node2_line_action",
+                                help="The action to take when a blank node2 field is detected.",
+                                type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+        lgroup.add_argument(prefix1 + "blank-required-field-line-action",
+                            dest=prefix2 + "blank_line_action",
+                            help="The action to take when a line with a blank node1, node2, or id field (per mode) is detected.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
                                   
-    # May be overridden
-    @classmethod
-    def add_arguments(cls, parser: ArgumentParser):
-        parser.add_argument(      "--mode", dest="mode",
-                                  help="Determine the KGTK file mode.", type=KgtkReader.Mode, action=EnumNameAction, default=KgtkReader.Mode.AUTO)
-
-
+        lgroup.add_argument(prefix1 + "comment-line-action",
+                            dest=prefix2 + "comment_line_action",
+                            help="The action to take when a comment line is detected.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+
+        lgroup.add_argument(prefix1 + "empty-line-action",
+                            dest=prefix2 + "empty_line_action",
+                            help="The action to take when an empty line is detected.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+
+        lgroup.add_argument(prefix1 + "fill-short-lines",
+                            dest=prefix2 + "fill_short_lines",
+                            help="Fill missing trailing columns in short lines with empty values.", action='store_true')
+
+        lgroup.add_argument(prefix1 + "invalid-value-action",
+                            dest=prefix2 + "invalid_value_action",
+                            help="The action to take when a data cell value is invalid.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
+
+        lgroup.add_argument(prefix1 + "long-line-action",
+                            dest=prefix2 + "long_line_action",
+                            help="The action to take when a long line is detected.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+
+        lgroup.add_argument(prefix1 + "short-line-action",
+                            dest=prefix2 + "short_line_action",
+                            help="The action to take when a short line is detected.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+
+        lgroup.add_argument(prefix1 + "truncate-long-lines",
+                            dest=prefix2 + "truncate_long_lines",
+                            help="Remove excess trailing columns in long lines.", action='store_true')
+
+        lgroup.add_argument(prefix1 + "whitespace-line-action",
+                            dest=prefix2 + "whitespace_line_action",
+                            help="The action to take when a whitespace line is detected.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
     
 def main():
     """
@@ -923,10 +963,7 @@ def main():
 
     parser = ArgumentParser()
     KgtkReader.add_operation_arguments(parser)
-    (fgroup, hgroup, lgroup) = KgtkReader.add_shared_arguments(parser)
-    KgtkReader.add_arguments(fgroup)
-    EdgeReader.add_arguments(lgroup)
-    NodeReader.add_arguments(lgroup)
+    KgtkReader.add_arguments(parser, node_options=True, edge_options=True, mode_options=True)
     KgtkValueOptions.add_arguments(parser)
 
     parser.add_argument(       "--test", dest="test_method", help="The test to perform",
diff --git a/kgtk/join/nodereader.py b/kgtk/join/nodereader.py
index 9fb88d260..3a804398a 100644
--- a/kgtk/join/nodereader.py
+++ b/kgtk/join/nodereader.py
@@ -128,21 +128,13 @@ def _skip_reserved_fields(self, column_name)->bool:
             return True
         return False
 
-    @classmethod
-    def add_arguments(cls, parser: ArgumentParser):
-        parser.add_argument(      "--blank-id-line-action", dest="blank_id_line_action",
-                                  help="The action to take when a blank id field is detected.",
-                                  type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-    
 def main():
     """
     Test the KGTK node file reader.
     """
     parser = ArgumentParser()
     KgtkReader.add_operation_arguments(parser)
-    (fgroup, hgroup, lgroup) = KgtkReader.add_shared_arguments(parser)
-    NodeReader.add_arguments(lgroup)
+    KgtkReader.add_arguments(parser, node_options=True)
     KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
 

From d89e32b5fe8966aa353dd65187a15bb7cfcf889e Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 7 May 2020 19:54:12 -0700
Subject: [PATCH 119/278] Make the name more descriptive.

---
 kgtk/join/edgereader.py |  2 +-
 kgtk/join/kgtkreader.py | 14 +++++++-------
 kgtk/join/nodereader.py |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/kgtk/join/edgereader.py b/kgtk/join/edgereader.py
index 93c25799b..b382c11c8 100644
--- a/kgtk/join/edgereader.py
+++ b/kgtk/join/edgereader.py
@@ -153,7 +153,7 @@ def main():
     """
     parser = ArgumentParser()
     KgtkReader.add_operation_arguments(parser)
-    KgtkReader.add_arguments(parser, edge_options=True)
+    KgtkReader.add_file_arguments(parser, edge_options=True)
     KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
 
diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index d79d20c9a..43d251ca0 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -834,12 +834,12 @@ def add_operation_arguments(cls, parser: ArgumentParser):
         parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
         
     @classmethod
-    def add_arguments(cls,
-                      parser: ArgumentParser,
-                      node_options: bool = False,
-                      edge_options: bool = False,
-                      mode_options: bool = False,
-                      who: str = ""):
+    def add_file_arguments(cls,
+                           parser: ArgumentParser,
+                           node_options: bool = False,
+                           edge_options: bool = False,
+                           mode_options: bool = False,
+                           who: str = ""):
         prefix1: str = "--" if len(who) == 0 else "--" + who + "-"
         prefix2: str = "" if len(who) == 0 else who + "_"
         prefix3: str = "" if len(who) == 0 else who + " "
@@ -963,7 +963,7 @@ def main():
 
     parser = ArgumentParser()
     KgtkReader.add_operation_arguments(parser)
-    KgtkReader.add_arguments(parser, node_options=True, edge_options=True, mode_options=True)
+    KgtkReader.add_file_arguments(parser, node_options=True, edge_options=True, mode_options=True)
     KgtkValueOptions.add_arguments(parser)
 
     parser.add_argument(       "--test", dest="test_method", help="The test to perform",
diff --git a/kgtk/join/nodereader.py b/kgtk/join/nodereader.py
index 3a804398a..7291089ee 100644
--- a/kgtk/join/nodereader.py
+++ b/kgtk/join/nodereader.py
@@ -134,7 +134,7 @@ def main():
     """
     parser = ArgumentParser()
     KgtkReader.add_operation_arguments(parser)
-    KgtkReader.add_arguments(parser, node_options=True)
+    KgtkReader.add_file_arguments(parser, node_options=True)
     KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
 

From 7b2f7b47c85cd0a750a80c897042c85ba77c15a7 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Fri, 8 May 2020 10:36:51 -0700
Subject: [PATCH 120/278] Incomplete changes.

---
 kgtk/join/ifexists.py   | 106 ++++++++++++----------------------------
 kgtk/join/kgtkreader.py |   6 ++-
 2 files changed, 36 insertions(+), 76 deletions(-)

diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index 0cab4735b..00ebc9ce5 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -16,7 +16,7 @@
 
 """
 
-from argparse import ArgumentParser
+from argparse import ArgumentParser, Namespace
 import attr
 import gzip
 from pathlib import Path
@@ -33,39 +33,25 @@
 
 @attr.s(slots=True, frozen=True)
 class IfExists(KgtkFormat):
-    left_file_path: Path = attr.ib(validator=attr.validators.instance_of(Path))
-    right_file_path: Path = attr.ib(validator=attr.validators.instance_of(Path))
-    output_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
+    input_reader_args: typing.Mapping[str, typing.Any] = attr.ib()
+    input_keys: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.optional(attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
+                                                                                                                            iterable_validator=attr.validators.instance_of(list))))
 
-    invert: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+    filter_reader_args: typing.Mapping[str, typing.Any] = attr.ib()
+    filter_keys: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.optional(attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
+                                                                                                                             iterable_validator=attr.validators.instance_of(list))))
 
-    left_keys: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.optional(attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
-                                                                                                                            iterable_validator=attr.validators.instance_of(list))),
-                                                           default=None)
-    right_keys: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.optional(attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
-                                                                                                                             iterable_validator=attr.validators.instance_of(list))),
-                                                            default=None)
+    output_file_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
 
     # The field separator used in multifield joins.  The KGHT list character should be safe.
     field_separator: str = attr.ib(validator=attr.validators.instance_of(str), default=KgtkFormat.LIST_SEPARATOR)
 
-    # Ignore records with too many or too few fields?
-    short_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
-    long_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
-
-    # Require or fill trailing fields?
-    fill_short_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-    truncate_long_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+    invert: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
-    invalid_value_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.PASS)
     # TODO: find a working validator
     # value_options: typing.Optional[KgtkValueOptions] = attr.ib(attr.validators.optional(attr.validators.instance_of(KgtkValueOptions)), default=None)
     value_options: typing.Optional[KgtkValueOptions] = attr.ib(default=None)
 
-    gzip_in_parallel: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-
-    error_limit: int = attr.ib(validator=attr.validators.instance_of(int), default=KgtkReader.ERROR_LIMIT_DEFAULT)
-
     verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     very_verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
@@ -135,8 +121,8 @@ def extract_key_set(self, kr: KgtkReader, who: str, key_columns: typing.List[int
     def process(self):
         # Open the input files once.
         if self.verbose:
-            print("Opening the left input file: %s" % self.left_file_path, flush=True)
-        left_kr: KgtkReader =  KgtkReader.open(self.left_file_path,
+            print("Opening the input file: %s" % self.left_file_path, flush=True)
+            left_kr: KgtkReader =  KgtkReader.open(self.left_file_path,
                                                short_line_action=self.short_line_action,
                                                long_line_action=self.long_line_action,
                                                fill_short_lines=self.fill_short_lines,
@@ -215,71 +201,41 @@ def main():
     """
     Test the KGTK file joiner.
     """
-    parser = ArgumentParser()
-
-    parser.add_argument(dest="left_kgtk_file", help="The left KGTK file to join", type=Path)
-
-    parser.add_argument(dest="right_kgtk_file", help="The right KGTK file to join", type=Path)
-
-    parser.add_argument(      "--error-limit", dest="error_limit",
-                              help="The maximum number of errors to report before failing", type=int, default=KgtkReader.ERROR_LIMIT_DEFAULT)
+    parser: ArgumentParser = ArgumentParser()
+    KgtkReader.add_operation_arguments(parser)
 
+    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to read", type=Path, default=None)
+    
     parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=IfExists.FIELD_SEPARATOR_DEFAULT)
-
-    parser.add_argument(      "--fill-short-lines", dest="fill_short_lines",
-                              help="Fill missing trailing columns in short lines with empty values.", action='store_true')
-
-    parser.add_argument(      "--gzip-in-parallel", dest="gzip_in_parallel", help="Execute gzip in parallel.", action='store_true')
-
-    parser.add_argument(      "--invalid-value-action", dest="invalid_value_action",
-                              help="The action to take when an invalid data value is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.PASS)
-
+   
     parser.add_argument(      "--invert", dest="invert", help="Invert the test (if not exists).", action='store_true')
 
-    parser.add_argument(      "--left-keys", dest="left_keys", help="The key columns in the left file.", nargs='*')
-
-    parser.add_argument(      "--long-line-action", dest="long_line_action",
-                              help="The action to take when a long line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+    parser.add_argument(      "--input-keys", dest="_input_keys", help="The key columns in the input file.", nargs='*')
+    parser.add_argument(      "--filter-keys", dest="_filter_keys", help="The key columns in the filter file.", nargs='*')
 
-    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to read", type=Path, default=None)
-
-    parser.add_argument(      "--right-keys", dest="right_keys", help="The key columns in the right file.", nargs='*')
+    KgtkReader.add_file_arguments(parser, mode_options=True, who="input")
 
-    parser.add_argument(      "--short-line-action", dest="short_line_action",
-                              help="The action to take whe a short line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-    
-    parser.add_argument(      "--truncate-long-lines", dest="truncate_long_lines",
-                              help="Remove excess trailing columns in long lines.", action='store_true')
-
-    parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
-
-    parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
+    # TODO: Find a way to use "--filter-on"
+    KgtkReader.add_file_arguments(parser, mode_options=True, who="filter", optional_file=True)
 
     KgtkValueOptions.add_arguments(parser)
 
-    args = parser.parse_args()
+    args: Namespace = parser.parse_args()
+
+    input_args: typing.Mapping[str, typing.Any] = dict(((item[0][len("input_"):], item[1]) for item in vars(args) if item[0].startswith("input_")))
+    filter_args: typing.Mapping[str, typing.Any] = dict(((item[0][len("filter_"):], item[1]) for item in vars(args) if item[0].startswith("filter_")))
 
     # Build the value parsing option structure.
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
-    ie: IfExists = IfExists(left_file_path=args.left_kgtk_file,
-                            right_file_path=args.right_kgtk_file,
-                            output_path=args.output_file_path,
-                            invert=args.invert,
-                            left_keys=args.left_keys,
-                            right_keys=args.right_keys,
+    ie: IfExists = IfExists(input_reader_args=input_args,
+                            input_keys=args._input_keys,
+                            filter_reader_args=filter_args,
+                            filter_keys=args._filter_keys,
+                            output_file_path=args.output_file_path,
                             field_separator=args.field_separator,
-                            short_line_action=args.short_line_action,
-                            long_line_action=args.long_line_action,
-                            fill_short_lines=args.fill_short_lines,
-                            truncate_long_lines=args.truncate_long_lines,
-                            invalid_value_action=args.invalid_valid_action,
+                            invert=args.invert,
                             value_options=value_options,
-                            gzip_in_parallel=args.gzip_in_parallel,
-                            error_limit=args.error_limit,
                             verbose=args.verbose,
                             very_verbose=args.very_verbose)
 
diff --git a/kgtk/join/kgtkreader.py b/kgtk/join/kgtkreader.py
index 43d251ca0..2375c3fcc 100644
--- a/kgtk/join/kgtkreader.py
+++ b/kgtk/join/kgtkreader.py
@@ -839,12 +839,16 @@ def add_file_arguments(cls,
                            node_options: bool = False,
                            edge_options: bool = False,
                            mode_options: bool = False,
+                           optional_file: bool = True,
                            who: str = ""):
         prefix1: str = "--" if len(who) == 0 else "--" + who + "-"
         prefix2: str = "" if len(who) == 0 else who + "_"
         prefix3: str = "" if len(who) == 0 else who + " "
 
-        parser.add_argument(dest=prefix2 + "kgtk_file", help="The KGTK file to read", type=Path, nargs="?")
+        if optional_file:
+            parser.add_argument(dest=prefix2 + "kgtk_file", help="The " + who + " KGTK file to read", type=Path, nargs="?")
+        else:
+            parser.add_argument(dest=prefix2 + "kgtk_file", help="The " + who + " KGTK file to read", type=Path)
 
         fgroup: _ArgumentGroup = parser.add_argument_group(prefix3 + "File options",
                                                            "Options affecting " + prefix3 + "processing")

From aef6af01950a47c72c575e426a021403534d2aa9 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Fri, 8 May 2020 10:37:24 -0700
Subject: [PATCH 121/278] Reorganize new source files.

---
 kgtk/io/__init__.py                       | 0
 kgtk/{join => io}/edgereader.py           | 0
 kgtk/{join => io}/kgtkbase.py             | 0
 kgtk/{join => io}/kgtkreader.py           | 0
 kgtk/{join => io}/kgtkwriter.py           | 0
 kgtk/{join => io}/nodereader.py           | 0
 kgtk/{join => }/kgtkformat.py             | 0
 kgtk/utils/__init__.py                    | 0
 kgtk/{join => utils}/closableiter.py      | 0
 kgtk/{join => utils}/enumnameaction.py    | 0
 kgtk/{join => utils}/gzipprocess.py       | 0
 kgtk/{join => utils}/validationaction.py  | 0
 kgtk/value/__init__.py                    | 0
 kgtk/{join => value}/kgtkvalue.py         | 0
 kgtk/{join => value}/kgtkvalueoptions.py  | 0
 kgtk/{join => value}/languagevalidator.py | 0
 16 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 kgtk/io/__init__.py
 rename kgtk/{join => io}/edgereader.py (100%)
 rename kgtk/{join => io}/kgtkbase.py (100%)
 rename kgtk/{join => io}/kgtkreader.py (100%)
 rename kgtk/{join => io}/kgtkwriter.py (100%)
 rename kgtk/{join => io}/nodereader.py (100%)
 rename kgtk/{join => }/kgtkformat.py (100%)
 create mode 100644 kgtk/utils/__init__.py
 rename kgtk/{join => utils}/closableiter.py (100%)
 rename kgtk/{join => utils}/enumnameaction.py (100%)
 rename kgtk/{join => utils}/gzipprocess.py (100%)
 rename kgtk/{join => utils}/validationaction.py (100%)
 create mode 100644 kgtk/value/__init__.py
 rename kgtk/{join => value}/kgtkvalue.py (100%)
 rename kgtk/{join => value}/kgtkvalueoptions.py (100%)
 rename kgtk/{join => value}/languagevalidator.py (100%)

diff --git a/kgtk/io/__init__.py b/kgtk/io/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/kgtk/join/edgereader.py b/kgtk/io/edgereader.py
similarity index 100%
rename from kgtk/join/edgereader.py
rename to kgtk/io/edgereader.py
diff --git a/kgtk/join/kgtkbase.py b/kgtk/io/kgtkbase.py
similarity index 100%
rename from kgtk/join/kgtkbase.py
rename to kgtk/io/kgtkbase.py
diff --git a/kgtk/join/kgtkreader.py b/kgtk/io/kgtkreader.py
similarity index 100%
rename from kgtk/join/kgtkreader.py
rename to kgtk/io/kgtkreader.py
diff --git a/kgtk/join/kgtkwriter.py b/kgtk/io/kgtkwriter.py
similarity index 100%
rename from kgtk/join/kgtkwriter.py
rename to kgtk/io/kgtkwriter.py
diff --git a/kgtk/join/nodereader.py b/kgtk/io/nodereader.py
similarity index 100%
rename from kgtk/join/nodereader.py
rename to kgtk/io/nodereader.py
diff --git a/kgtk/join/kgtkformat.py b/kgtk/kgtkformat.py
similarity index 100%
rename from kgtk/join/kgtkformat.py
rename to kgtk/kgtkformat.py
diff --git a/kgtk/utils/__init__.py b/kgtk/utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/kgtk/join/closableiter.py b/kgtk/utils/closableiter.py
similarity index 100%
rename from kgtk/join/closableiter.py
rename to kgtk/utils/closableiter.py
diff --git a/kgtk/join/enumnameaction.py b/kgtk/utils/enumnameaction.py
similarity index 100%
rename from kgtk/join/enumnameaction.py
rename to kgtk/utils/enumnameaction.py
diff --git a/kgtk/join/gzipprocess.py b/kgtk/utils/gzipprocess.py
similarity index 100%
rename from kgtk/join/gzipprocess.py
rename to kgtk/utils/gzipprocess.py
diff --git a/kgtk/join/validationaction.py b/kgtk/utils/validationaction.py
similarity index 100%
rename from kgtk/join/validationaction.py
rename to kgtk/utils/validationaction.py
diff --git a/kgtk/value/__init__.py b/kgtk/value/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/kgtk/join/kgtkvalue.py b/kgtk/value/kgtkvalue.py
similarity index 100%
rename from kgtk/join/kgtkvalue.py
rename to kgtk/value/kgtkvalue.py
diff --git a/kgtk/join/kgtkvalueoptions.py b/kgtk/value/kgtkvalueoptions.py
similarity index 100%
rename from kgtk/join/kgtkvalueoptions.py
rename to kgtk/value/kgtkvalueoptions.py
diff --git a/kgtk/join/languagevalidator.py b/kgtk/value/languagevalidator.py
similarity index 100%
rename from kgtk/join/languagevalidator.py
rename to kgtk/value/languagevalidator.py

From 60b5051b1d39b3d386e3bae1a3ba5806ea683d58 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Fri, 8 May 2020 10:37:48 -0700
Subject: [PATCH 122/278] Reorganize new source files.

---
 kgtk/cli/clean_data.py          | 10 +++++-----
 kgtk/cli/ifexists.py            | 12 ++++++------
 kgtk/cli/ifnotexists.py         | 12 ++++++------
 kgtk/cli/validate.py            | 10 +++++-----
 kgtk/io/edgereader.py           | 10 +++++-----
 kgtk/io/kgtkbase.py             |  6 +++---
 kgtk/io/kgtkreader.py           | 24 ++++++++++++------------
 kgtk/io/kgtkwriter.py           | 12 ++++++------
 kgtk/io/nodereader.py           | 10 +++++-----
 kgtk/join/ifexists.py           | 12 ++++++------
 kgtk/join/kgtkjoiner.py         | 12 ++++++------
 kgtk/utils/gzipprocess.py       |  2 +-
 kgtk/value/kgtkvalue.py         |  6 +++---
 kgtk/value/languagevalidator.py |  2 +-
 14 files changed, 70 insertions(+), 70 deletions(-)

diff --git a/kgtk/cli/clean_data.py b/kgtk/cli/clean_data.py
index 5fdb9dcd4..bb4059d6b 100644
--- a/kgtk/cli/clean_data.py
+++ b/kgtk/cli/clean_data.py
@@ -8,11 +8,11 @@
 import sys
 import typing
 
-from kgtk.join.enumnameaction import EnumNameAction
-from kgtk.join.kgtkformat import KgtkFormat
-from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.kgtkwriter import KgtkWriter
-from kgtk.join.validationaction import ValidationAction
+from kgtk.kgtkformat import KgtkFormat
+from kgtk.io.kgtkreader import KgtkReader
+from kgtk.io.kgtkwriter import KgtkWriter
+from kgtk.utils.enumnameaction import EnumNameAction
+from kgtk.utils.validationaction import ValidationAction
 
 def parser():
     return {
diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
index a1e288b25..d72f320a6 100644
--- a/kgtk/cli/ifexists.py
+++ b/kgtk/cli/ifexists.py
@@ -6,13 +6,13 @@
 import sys
 import typing
 
-from kgtk.join.enumnameaction import EnumNameAction
-from kgtk.join.kgtkformat import KgtkFormat
+from kgtk.kgtkformat import KgtkFormat
+from kgtk.io.kgtkreader import KgtkReader
+from kgtk.io.kgtkwriter import KgtkWriter
 from kgtk.join.ifexists import IfExists
-from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.kgtkwriter import KgtkWriter
-from kgtk.join.kgtkvalueoptions import KgtkValueOptions
-from kgtk.join.validationaction import ValidationAction
+from kgtk.utils.enumnameaction import EnumNameAction
+from kgtk.utils.validationaction import ValidationAction
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 def parser():
     return {
diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
index e9003a9ef..69bbbff61 100644
--- a/kgtk/cli/ifnotexists.py
+++ b/kgtk/cli/ifnotexists.py
@@ -7,13 +7,13 @@
 import sys
 import typing
 
-from kgtk.join.enumnameaction import EnumNameAction
-from kgtk.join.kgtkformat import KgtkFormat
+from kgtk.kgtkformat import KgtkFormat
+from kgtk.io.kgtkreader import KgtkReader
+from kgtk.io.kgtkwriter import KgtkWriter
 from kgtk.join.ifexists import IfExists
-from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.kgtkwriter import KgtkWriter
-from kgtk.join.kgtkvalueoptions import KgtkValueOptions
-from kgtk.join.validationaction import ValidationAction
+from kgtk.utils.enumnameaction import EnumNameAction
+from kgtk.utils.validationaction import ValidationAction
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 def parser():
     return {
diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index ce2fce9e2..a674baac0 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -15,11 +15,11 @@
 import sys
 import typing
 
-from kgtk.join.enumnameaction import EnumNameAction
-from kgtk.join.kgtkformat import KgtkFormat
-from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.kgtkvalueoptions import KgtkValueOptions
-from kgtk.join.validationaction import ValidationAction
+from kgtk.kgtkformat import KgtkFormat
+from kgtk.io.kgtkreader import KgtkReader
+from kgtk.utils.enumnameaction import EnumNameAction
+from kgtk.utils.validationaction import ValidationAction
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 def parser():
     return {
diff --git a/kgtk/io/edgereader.py b/kgtk/io/edgereader.py
index 3fe378fa0..92aa60d17 100644
--- a/kgtk/io/edgereader.py
+++ b/kgtk/io/edgereader.py
@@ -10,11 +10,11 @@
 import sys
 import typing
 
-from kgtk.join.closableiter import ClosableIter
-from kgtk.join.enumnameaction import EnumNameAction
-from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.kgtkvalueoptions import KgtkValueOptions
-from kgtk.join.validationaction import ValidationAction
+from kgtk.io.kgtkreader import KgtkReader
+from kgtk.utils.closableiter import ClosableIter
+from kgtk.utils.enumnameaction import EnumNameAction
+from kgtk.utils.validationaction import ValidationAction
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 @attr.s(slots=True, frozen=False)
 class EdgeReader(KgtkReader):
diff --git a/kgtk/io/kgtkbase.py b/kgtk/io/kgtkbase.py
index 2299725d2..7cbcef44c 100644
--- a/kgtk/io/kgtkbase.py
+++ b/kgtk/io/kgtkbase.py
@@ -7,9 +7,9 @@
 import sys
 import typing
 
-from kgtk.join.validationaction import ValidationAction
-from kgtk.join.kgtkformat import KgtkFormat
-from kgtk.join.kgtkvalue import KgtkValue
+from kgtk.kgtkformat import KgtkFormat
+from kgtk.utils.validationaction import ValidationAction
+from kgtk.value.kgtkvalue import KgtkValue
 
 class KgtkBase(KgtkFormat):
     @classmethod
diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index b694e9e8e..707fbdb01 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -28,14 +28,14 @@
 import sys
 import typing
 
-from kgtk.join.closableiter import ClosableIter, ClosableIterTextIOWrapper
-from kgtk.join.enumnameaction import EnumNameAction
-from kgtk.join.gzipprocess import GunzipProcess
-from kgtk.join.kgtkbase import KgtkBase
-from kgtk.join.kgtkformat import KgtkFormat
-from kgtk.join.kgtkvalue import KgtkValue
-from kgtk.join.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
-from kgtk.join.validationaction import ValidationAction
+from kgtk.kgtkformat import KgtkFormat
+from kgtk.io.kgtkbase import KgtkBase
+from kgtk.utils.closableiter import ClosableIter, ClosableIterTextIOWrapper
+from kgtk.utils.enumnameaction import EnumNameAction
+from kgtk.utils.gzipprocess import GunzipProcess
+from kgtk.utils.validationaction import ValidationAction
+from kgtk.value.kgtkvalue import KgtkValue
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
 
 @attr.s(slots=True, frozen=False)
 class KgtkReader(KgtkBase, ClosableIter[typing.List[str]]):
@@ -216,7 +216,7 @@ def open(cls,
         if is_edge_file:
             # We'll instantiate an EdgeReader, which is a subclass of KgtkReader.
             # The EdgeReader import is deferred to avoid circular imports.
-            from kgtk.join.edgereader import EdgeReader
+            from kgtk.io.edgereader import EdgeReader
             
             # Get the indices of the required columns.
             node1_column_idx: int
@@ -276,7 +276,7 @@ def open(cls,
         elif is_node_file:
             # We'll instantiate an NodeReader, which is a subclass of KgtkReader.
             # The NodeReader import is deferred to avoid circular imports.
-            from kgtk.join.nodereader import NodeReader
+            from kgtk.io.nodereader import NodeReader
             
             # Get the index of the required column:
             id_column_idx: int = cls.required_node_column(column_name_map,
@@ -917,9 +917,9 @@ def main():
     Test the KGTK file reader.
     """
     # The EdgeReader import is deferred to avoid circular imports.
-    from kgtk.join.edgereader import EdgeReader
+    from kgtk.io.edgereader import EdgeReader
     # The NodeReader import is deferred to avoid circular imports.
-    from kgtk.join.nodereader import NodeReader
+    from kgtk.io.nodereader import NodeReader
 
     parser = ArgumentParser()
     KgtkReader.add_operation_arguments(parser)
diff --git a/kgtk/io/kgtkwriter.py b/kgtk/io/kgtkwriter.py
index 367b6f88f..d3d256c07 100644
--- a/kgtk/io/kgtkwriter.py
+++ b/kgtk/io/kgtkwriter.py
@@ -15,12 +15,12 @@
 import sys
 import typing
 
-from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.enumnameaction import EnumNameAction
-from kgtk.join.gzipprocess import GzipProcess
-from kgtk.join.kgtkbase import KgtkBase
-from kgtk.join.kgtkformat import KgtkFormat
-from kgtk.join.validationaction import ValidationAction
+from kgtk.kgtkformat import KgtkFormat
+from kgtk.io.kgtkbase import KgtkBase
+from kgtk.io.kgtkreader import KgtkReader
+from kgtk.utils.enumnameaction import EnumNameAction
+from kgtk.utils.gzipprocess import GzipProcess
+from kgtk.utils.validationaction import ValidationAction
 
 @attr.s(slots=True, frozen=False)
 class KgtkWriter(KgtkBase):
diff --git a/kgtk/io/nodereader.py b/kgtk/io/nodereader.py
index 9fb88d260..9d7df148f 100644
--- a/kgtk/io/nodereader.py
+++ b/kgtk/io/nodereader.py
@@ -10,11 +10,11 @@
 import sys
 import typing
 
-from kgtk.join.closableiter import ClosableIter
-from kgtk.join.enumnameaction import EnumNameAction
-from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.kgtkvalueoptions import KgtkValueOptions
-from kgtk.join.validationaction import ValidationAction
+from kgtk.io.kgtkreader import KgtkReader
+from kgtk.utils.closableiter import ClosableIter
+from kgtk.utils.enumnameaction import EnumNameAction
+from kgtk.utils.validationaction import ValidationAction
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 @attr.s(slots=True, frozen=False)
 class NodeReader(KgtkReader):
diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index 0cab4735b..bed0d6408 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -24,12 +24,12 @@
 import sys
 import typing
 
-from kgtk.join.enumnameaction import EnumNameAction
-from kgtk.join.kgtkformat import KgtkFormat
-from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.kgtkwriter import KgtkWriter
-from kgtk.join.kgtkvalueoptions import KgtkValueOptions
-from kgtk.join.validationaction import ValidationAction
+from kgtk.kgtkformat import KgtkFormat
+from kgtk.io.kgtkreader import KgtkReader
+from kgtk.io.kgtkwriter import KgtkWriter
+from kgtk.utils.enumnameaction import EnumNameAction
+from kgtk.utils.validationaction import ValidationAction
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 @attr.s(slots=True, frozen=True)
 class IfExists(KgtkFormat):
diff --git a/kgtk/join/kgtkjoiner.py b/kgtk/join/kgtkjoiner.py
index d512d7a0b..4855ac774 100644
--- a/kgtk/join/kgtkjoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -14,12 +14,12 @@
 import sys
 import typing
 
-from kgtk.join.enumnameaction import EnumNameAction
-from kgtk.join.kgtkreader import KgtkReader
-from kgtk.join.kgtkformat import KgtkFormat
-from kgtk.join.kgtkwriter import KgtkWriter
-from kgtk.join.kgtkvalueoptions import KgtkValueOptions
-from kgtk.join.validationaction import ValidationAction
+from kgtk.kgtkformat import KgtkFormat
+from kgtk.io.kgtkreader import KgtkReader
+from kgtk.io.kgtkwriter import KgtkWriter
+from kgtk.utils.enumnameaction import EnumNameAction
+from kgtk.utils.validationaction import ValidationAction
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 @attr.s(slots=True, frozen=True)
 class KgtkJoiner(KgtkFormat):
diff --git a/kgtk/utils/gzipprocess.py b/kgtk/utils/gzipprocess.py
index acacacb83..b63e86719 100644
--- a/kgtk/utils/gzipprocess.py
+++ b/kgtk/utils/gzipprocess.py
@@ -5,7 +5,7 @@
 from multiprocessing import Process, Queue
 import typing
 
-from kgtk.join.closableiter import ClosableIter
+from kgtk.utils.closableiter import ClosableIter
 
 # This helper class supports running gzip in parallel.
 #
diff --git a/kgtk/value/kgtkvalue.py b/kgtk/value/kgtkvalue.py
index 9bec2605e..ea65b66bd 100644
--- a/kgtk/value/kgtkvalue.py
+++ b/kgtk/value/kgtkvalue.py
@@ -8,9 +8,9 @@
 import sys
 import typing
 
-from kgtk.join.kgtkformat import KgtkFormat
-from kgtk.join.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
-from kgtk.join.languagevalidator import LanguageValidator
+from kgtk.kgtkformat import KgtkFormat
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
+from kgtk.value.languagevalidator import LanguageValidator
 
 @attr.s(slots=True, frozen=False)
 class KgtkValue(KgtkFormat):
diff --git a/kgtk/value/languagevalidator.py b/kgtk/value/languagevalidator.py
index 4c7c9ff2e..f9606f221 100644
--- a/kgtk/value/languagevalidator.py
+++ b/kgtk/value/languagevalidator.py
@@ -8,7 +8,7 @@
 import pycountry # type: ignore
 import typing
 
-from kgtk.join.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
 
 # Problem: pycountry incorporates the Debian team's ISO 639-3 table,
 # which as of 03-May-2020 has not been updated in four years!

From 5b1fab022c329b93bc880373426582a67feccf3a Mon Sep 17 00:00:00 2001
From: rongpenl <45610532+rongpenl@users.noreply.github.com>
Date: Fri, 8 May 2020 10:55:03 -0700
Subject: [PATCH 123/278] Deleting the hard-coded test

---
 kgtk/cli/generate_wikidata_triples.py | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/kgtk/cli/generate_wikidata_triples.py b/kgtk/cli/generate_wikidata_triples.py
index cc21f5c2a..474ee51d5 100644
--- a/kgtk/cli/generate_wikidata_triples.py
+++ b/kgtk/cli/generate_wikidata_triples.py
@@ -161,30 +161,3 @@ def run(
         else:
             generator.entry_point(line_num+1,edge)
     generator.finalize()
-
-# testing profiling locally with direct call
-# pip3 install snakeviz
-# run `snakeviz /tmp/tmp.dat` to visualize the call stacks.
-# python3 -m cProfile -o /tmp/tmp.dat  generate_wikidata_triples.py
-if __name__ == "__main__":
-    import gzip
-    from kgtk.triple_generator import TripleGenerator
-    import sys
-    with open("/tmp/gwt.log","w") as dest_fp:
-        generator = TripleGenerator(
-            prop_file="/Users/rongpeng/Documents/ISI/Covid19/covid_data/v1.3/heng_props.tsv",
-            label_set="label",
-            alias_set="aliases",
-            description_set="descriptions",
-            n=10000,
-            ignore=True,
-            truthy=True,
-            dest_fp = dest_fp
-        )   
-        with open("/Users/rongpeng/Documents/ISI/Covid19/covid_data/v1.3/kgtk_sample_sorted.tsv","r") as fp:
-            for num, edge in enumerate(fp.readlines()):
-                if edge.startswith("#") or num == 0:
-                    continue
-                else:
-                    generator.entry_point(num+1,edge)
-            generator.finalize() 
\ No newline at end of file

From f74029d9029b86f43e709a828788c96587be1d10 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Fri, 8 May 2020 10:55:36 -0700
Subject: [PATCH 124/278] Support prefixed KgtkValueOptions initialization.

---
 kgtk/value/kgtkvalueoptions.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/kgtk/value/kgtkvalueoptions.py b/kgtk/value/kgtkvalueoptions.py
index 767f2d199..be957d808 100644
--- a/kgtk/value/kgtkvalueoptions.py
+++ b/kgtk/value/kgtkvalueoptions.py
@@ -113,16 +113,21 @@ def add_arguments(cls, parser: ArgumentParser):
 
     @classmethod
     # Build the value parsing option structure.
-    def from_args(cls, args: Namespace)->'KgtkValueOptions':
-        return cls(allow_month_or_day_zero=args.allow_month_or_day_zero,
-                   repair_month_or_day_zero=args.repair_month_or_day_zero,
-                   allow_language_suffixes=args.allow_language_suffixes,
-                   allow_lax_strings=args.allow_lax_strings,
-                   allow_lax_lq_strings=args.allow_lax_lq_strings,
-                   additional_language_codes=args.additional_language_codes,
-                   minimum_valid_year=args.minimum_valid_year,
-                   maximum_valid_year=args.maximum_valid_year,
-                   escape_list_separators=args.escape_list_separators)
+    def from_dict(cls, d: dict, prefix: str = "")->'KgtkValueOptions':
+        return cls(allow_month_or_day_zero=d.get(prefix + "allow_month_or_day_zero", False),
+                   repair_month_or_day_zero=d.get(prefix + "repair_month_or_day_zero", False),
+                   allow_language_suffixes=d.get(prefix + "allow_language_suffixes", True),
+                   allow_lax_strings=d.get(prefix + "allow_lax_strings", False),
+                   allow_lax_lq_strings=d.get(prefix + "allow_lax_lq_strings", False),
+                   additional_language_codes=d.get(prefix + "additional_language_codes", None),
+                   minimum_valid_year=d.get(prefix + "minimum_valid_year", cls.MINIMUM_VALID_YEAR),
+                   maximum_valid_year=d.get(prefix + "maximum_valid_year", cls.MAXIMUM_VALID_YEAR),
+                   escape_list_separators=d.get(prefix + "escape_list_separators", False))
+
+    @classmethod
+    # Build the value parsing option structure.
+    def from_args(cls, args: Namespace, prefix: str = "")->'KgtkValueOptions':
+        return cls.from_dict(vars(args), prefix=prefix)
 
 DEFAULT_KGTK_VALUE_OPTIONS: KgtkValueOptions = KgtkValueOptions()
 

From 0db82df2c16ac34828e7326a5887d65d780104db Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Fri, 8 May 2020 13:52:18 -0700
Subject: [PATCH 125/278] Prefixed KGTK value option arguments.

---
 kgtk/value/kgtkvalueoptions.py | 100 +++++++++++++++++++++------------
 1 file changed, 65 insertions(+), 35 deletions(-)

diff --git a/kgtk/value/kgtkvalueoptions.py b/kgtk/value/kgtkvalueoptions.py
index be957d808..124fc17be 100644
--- a/kgtk/value/kgtkvalueoptions.py
+++ b/kgtk/value/kgtkvalueoptions.py
@@ -58,62 +58,83 @@ class KgtkValueOptions:
     
 
     @classmethod
-    def add_arguments(cls, parser: ArgumentParser):
-        vgroup = parser.add_argument_group("Data value parsing", "Options controlling the parsing and processing of KGTK data values.")
-        vgroup.add_argument(      "--additional-language-codes", dest="additional_language_codes",
-                                  help="Additional language codes.", nargs="*", default=None)
+    def add_arguments(cls, parser: ArgumentParser, who: str = "", desc: str = "."):
+        """Add arguments for KgtkValue option processing.
+
+        When "who" is not empty, it prefixes the options, destinations, and
+        help messages.  This facilitates constructing command lines with
+        multiple sets of KGTKValue options, such as for different input files.
+        """
+        prefix1: str = "--" # The command line argument prefix.
+        prefix2: str = ""   # The destination name prefix.
+        prefix3: str = ""   # The help message prefix.
+
+        if len(who) > 0:
+            prefix1 = "--" + who + "-"
+            prefix2 = who + "_"
+            prefix3 = who + ": "
+        
+        vgroup = parser.add_argument_group(prefix3 + "Data value parsing", "Options controlling the parsing and processing of KGTK data values" + desc)
+        vgroup.add_argument(      prefix1 + "additional-language-codes", dest=prefix2 + "additional_language_codes",
+                                  help=prefix3 + "Additional language codes.", nargs="*", default=None)
 
         lsgroup= vgroup.add_mutually_exclusive_group()
-        lsgroup.add_argument(      "--allow-language-suffixes", dest="allow_language_suffixes",
-                                   help="Allow language identifier suffixes starting with a dash.", action='store_true', default=True)
+        lsgroup.add_argument(      prefix1 + "allow-language-suffixes", dest=prefix2 + "allow_language_suffixes",
+                                   help=prefix3 + "Allow language identifier suffixes starting with a dash.", action='store_true', default=True)
 
-        lsgroup.add_argument(      "--disallow-language-suffixes", dest="allow_language_suffixes",
-                                   help="Disallow language identifier suffixes starting with a dash.", action='store_false')
+        lsgroup.add_argument(      prefix1 + "disallow-language-suffixes", dest=prefix2 + "allow_language_suffixes",
+                                   help=prefix3 + "Disallow language identifier suffixes starting with a dash.", action='store_false')
 
         laxgroup= vgroup.add_mutually_exclusive_group()
-        laxgroup.add_argument(      "--allow-lax-strings", dest="allow_lax_strings",
-                                    help="Do not check if double quotes are backslashed inside strings.", action='store_true', default=False)
+        laxgroup.add_argument(      prefix1 + "allow-lax-strings", dest=prefix2 + "allow_lax_strings",
+                                    help=prefix3 + "Do not check if double quotes are backslashed inside strings.", action='store_true', default=False)
 
-        laxgroup.add_argument(      "--disallow-lax-strings", dest="allow_lax_strings",
-                                    help="Check if double quotes are backslashed inside strings.", action='store_false')
+        laxgroup.add_argument(      prefix1 + "disallow-lax-strings", dest=prefix2 + "allow_lax_strings",
+                                    help=prefix3 + "Check if double quotes are backslashed inside strings.", action='store_false')
 
         lqgroup= vgroup.add_mutually_exclusive_group()
-        lqgroup.add_argument(      "--allow-lax-lq-strings", dest="allow_lax_lq_strings",
-                                   help="Do not check if single quotes are backslashed inside language qualified strings.", action='store_true', default=False)
+        lqgroup.add_argument(      prefix1 + "allow-lax-lq-strings", dest=prefix2 + "allow_lax_lq_strings",
+                                   help=prefix3 + "Do not check if single quotes are backslashed inside language qualified strings.",
+                                   action='store_true', default=False)
 
-        lqgroup.add_argument(      "--disallow-lax-lq-strings", dest="allow_lax_lq_strings",
-                                   help="Check if single quotes are backslashed inside language qualified strings.", action='store_false')
+        lqgroup.add_argument(      prefix1 + "disallow-lax-lq-strings", dest=prefix2 + "allow_lax_lq_strings",
+                                   help=prefix3 + "Check if single quotes are backslashed inside language qualified strings.",
+                                   action='store_false')
 
         amd0group= vgroup.add_mutually_exclusive_group()
-        amd0group.add_argument(      "--allow-month-or-day-zero", dest="allow_month_or_day_zero",
-                                    help="Allow month or day zero in dates.", action='store_true', default=False)
+        amd0group.add_argument(      prefix1 + "allow-month-or-day-zero", dest=prefix2 + "allow_month_or_day_zero",
+                                    help=prefix3 + "Allow month or day zero in dates.", action='store_true', default=False)
 
-        amd0group.add_argument(      "--disallow-month-or-day-zero", dest="allow_month_or_day_zero",
-                                    help="Allow month or day zero in dates.", action='store_false')
+        amd0group.add_argument(      prefix1 + "disallow-month-or-day-zero", dest=prefix2 + "allow_month_or_day_zero",
+                                    help=prefix3 + "Allow month or day zero in dates.", action='store_false')
 
         rmd0group= vgroup.add_mutually_exclusive_group()
-        rmd0group.add_argument(      "--repair-month-or-day-zero", dest="repair_month_or_day_zero",
-                                    help="Repair month or day zero in dates.", action='store_true', default=False)
+        rmd0group.add_argument(      prefix1 + "repair-month-or-day-zero", dest=prefix2 + "repair_month_or_day_zero",
+                                    help=prefix3 + "Repair month or day zero in dates.", action='store_true', default=False)
 
-        rmd0group.add_argument(      "--no-repair-month-or-day-zero", dest="repair_month_or_day_zero",
-                                    help="Do not repair month or day zero in dates.", action='store_false')
+        rmd0group.add_argument(      prefix1 + "no-repair-month-or-day-zero", dest=prefix2 + "repair_month_or_day_zero",
+                                    help=prefix3 + "Do not repair month or day zero in dates.", action='store_false')
 
-        vgroup.add_argument(      "--minimum-valid-year", dest="minimum_valid_year",
-                                  help="The minimum valid year in dates.", type=int, default=cls.MINIMUM_VALID_YEAR)
+        vgroup.add_argument(      prefix1 + "minimum-valid-year", dest=prefix2 + "minimum_valid_year",
+                                  help=prefix3 + "The minimum valid year in dates.", type=int, default=cls.MINIMUM_VALID_YEAR)
 
-        vgroup.add_argument(      "--maximum-valid-year", dest="maximum_valid_year",
-                                  help="The maximum valid year in dates.", type=int, default=cls.MAXIMUM_VALID_YEAR)
+        vgroup.add_argument(      prefix1 + "maximum-valid-year", dest=prefix2 + "maximum_valid_year",
+                                  help=prefix3 + "The maximum valid year in dates.", type=int, default=cls.MAXIMUM_VALID_YEAR)
 
         elsgroup= vgroup.add_mutually_exclusive_group()
-        elsgroup.add_argument(      "--escape-list-separators", dest="escape_list_separators",
-                                    help="Escape all list separators instead of splitting on them.", action='store_true', default=False)
+        elsgroup.add_argument(      prefix1 + "escape-list-separators", dest=prefix2 + "escape_list_separators",
+                                    help=prefix3 + "Escape all list separators instead of splitting on them.", action='store_true', default=False)
 
-        elsgroup.add_argument(      "--no-escape-list-separators", dest="escape_list_separators",
-                                    help="Do not escape list separators.", action='store_false')
+        elsgroup.add_argument(      prefix1 + "no-escape-list-separators", dest=prefix2 + "escape_list_separators",
+                                    help=prefix3 + "Do not escape list separators.", action='store_false')
 
     @classmethod
     # Build the value parsing option structure.
-    def from_dict(cls, d: dict, prefix: str = "")->'KgtkValueOptions':
+    def from_dict(cls, d: dict, who: str = "")->'KgtkValueOptions':
+        prefix: str = ""   # The destination name prefix.
+        if len(who) > 0:
+            prefix = who + "_"
+
         return cls(allow_month_or_day_zero=d.get(prefix + "allow_month_or_day_zero", False),
                    repair_month_or_day_zero=d.get(prefix + "repair_month_or_day_zero", False),
                    allow_language_suffixes=d.get(prefix + "allow_language_suffixes", True),
@@ -126,8 +147,8 @@ def from_dict(cls, d: dict, prefix: str = "")->'KgtkValueOptions':
 
     @classmethod
     # Build the value parsing option structure.
-    def from_args(cls, args: Namespace, prefix: str = "")->'KgtkValueOptions':
-        return cls.from_dict(vars(args), prefix=prefix)
+    def from_args(cls, args: Namespace, who: str = "")->'KgtkValueOptions':
+        return cls.from_dict(vars(args), who=who)
 
 DEFAULT_KGTK_VALUE_OPTIONS: KgtkValueOptions = KgtkValueOptions()
 
@@ -137,6 +158,8 @@ def main():
     """
     parser: ArgumentParser = ArgumentParser()
     KgtkValueOptions.add_arguments(parser)
+    KgtkValueOptions.add_arguments(parser, who="left", desc=" for the left file.")
+    KgtkValueOptions.add_arguments(parser, who="right", desc=" for the right file.")
     args: Namespace = parser.parse_args()
 
     # Build the value parsing option structure.
@@ -150,6 +173,13 @@ def main():
         print("additional_language_codes: None")
     else:
         print("additional_language_codes: [ %s ]" % ", ".join(value_options.additional_language_codes))
+    
+    # Test prefixed value option processing.
+    left_value_options: KgtkValueOptions = KgtkValueOptions.from_args(args, who="left")
+    print("left_allow_month_or_day_zero: %s" % str(left_value_options.allow_month_or_day_zero))
+
+    right_value_options: KgtkValueOptions = KgtkValueOptions.from_args(args, who="right")
+    print("right_allow_month_or_day_zero: %s" % str(right_value_options.allow_month_or_day_zero))
 
 if __name__ == "__main__":
     main()

From 35ae6f98a8687b5ffe2f9d9ff23edf82d0c590a3 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Fri, 8 May 2020 13:55:59 -0700
Subject: [PATCH 126/278] Add options for minimum/maximum lat/lon override.

---
 kgtk/value/kgtkvalueoptions.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/kgtk/value/kgtkvalueoptions.py b/kgtk/value/kgtkvalueoptions.py
index 124fc17be..d37a99243 100644
--- a/kgtk/value/kgtkvalueoptions.py
+++ b/kgtk/value/kgtkvalueoptions.py
@@ -121,6 +121,18 @@ def add_arguments(cls, parser: ArgumentParser, who: str = "", desc: str = "."):
         vgroup.add_argument(      prefix1 + "maximum-valid-year", dest=prefix2 + "maximum_valid_year",
                                   help=prefix3 + "The maximum valid year in dates.", type=int, default=cls.MAXIMUM_VALID_YEAR)
 
+        vgroup.add_argument(      prefix1 + "minimum-valid-lat", dest=prefix2 + "minimum_valid_lat",
+                                  help=prefix3 + "The minimum valid latitude.", type=int, default=cls.MINIMUM_VALID_LAT)
+
+        vgroup.add_argument(      prefix1 + "maximum-valid-lat", dest=prefix2 + "maximum_valid_lat",
+                                  help=prefix3 + "The maximum valid latitude.", type=int, default=cls.MAXIMUM_VALID_LAT)
+
+        vgroup.add_argument(      prefix1 + "minimum-valid-lon", dest=prefix2 + "minimum_valid_lon",
+                                  help=prefix3 + "The minimum valid longitude.", type=int, default=cls.MINIMUM_VALID_LON)
+
+        vgroup.add_argument(      prefix1 + "maximum-valid-lon", dest=prefix2 + "maximum_valid_lon",
+                                  help=prefix3 + "The maximum valid longitude.", type=int, default=cls.MAXIMUM_VALID_LON)
+
         elsgroup= vgroup.add_mutually_exclusive_group()
         elsgroup.add_argument(      prefix1 + "escape-list-separators", dest=prefix2 + "escape_list_separators",
                                     help=prefix3 + "Escape all list separators instead of splitting on them.", action='store_true', default=False)

From e595c287611e23f632f30af4811a37fe8b2a40b4 Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Fri, 8 May 2020 14:42:28 -0700
Subject: [PATCH 127/278] remove removal of very small values

---
 kgtk/triple_generator.py | 50 ++++++++++++++++++++--------------------
 requirements.txt         |  1 +
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 675ceffe9..1ed5b5414 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -21,7 +21,7 @@
 )
 
 BAD_CHARS = [":", "-", "&", ",", " ",
-             "(", ")", "\'", '\"', "/", "\\", "[", "]", ";","|"]
+             "(", ")", "\'", '\"', "/", "\\", "[", "]", ";", "|"]
 
 
 class TripleGenerator:
@@ -30,16 +30,16 @@ class TripleGenerator:
     """
 
     def __init__(
-        self,
-        prop_file: str,
-        label_set: str,
-        alias_set: str,
-        description_set: str,
-        ignore: bool,
-        n: int,
-        dest_fp: TextIO = sys.stdout,
-        truthy: bool = False,
-        use_id:bool=False,
+            self,
+            prop_file: str,
+            label_set: str,
+            alias_set: str,
+            description_set: str,
+            ignore: bool,
+            n: int,
+            dest_fp: TextIO = sys.stdout,
+            truthy: bool = False,
+            use_id: bool = False,
     ):
         from etk.wikidata.statement import Rank
         self.ignore = ignore
@@ -200,7 +200,7 @@ def generate_prop_declaration_triple(self, node1: str, label: str, node2: str) -
         return True
 
     def generate_normal_triple(
-            self, node1: str, label: str, node2: str, is_qualifier_edge: bool,e_id:str) -> bool:
+            self, node1: str, label: str, node2: str, is_qualifier_edge: bool, e_id: str) -> bool:
         if self.use_id:
             e_id = TripleGenerator.replace_illegal_string(e_id)
         entity = self._node_2_entity(node1)
@@ -244,7 +244,7 @@ def generate_normal_triple(
                             dateTimeString.split("-00-00"))
                     elif dateTimeString[8:10] == "00":
                         dateTimeString = dateTimeString[:8] + \
-                            "01" + dateTimeString[10:]
+                                         "01" + dateTimeString[10:]
                     object = TimeValue(
                         value=dateTimeString,
                         calendar=Item("Q1985727"),
@@ -267,9 +267,6 @@ def generate_normal_triple(
             res = self.quantity_pattern.match(node2).groups()
             amount, lower_bound, upper_bound, unit = res
 
-            # Handle extra small numbers for now. TODO
-            if TripleGenerator.is_invalid_decimal_string(amount) or TripleGenerator.is_invalid_decimal_string(lower_bound) or TripleGenerator.is_invalid_decimal_string(upper_bound):
-                return False
             amount = TripleGenerator.clean_number_string(amount)
             lower_bound = TripleGenerator.clean_number_string(lower_bound)
             upper_bound = TripleGenerator.clean_number_string(upper_bound)
@@ -285,6 +282,7 @@ def generate_normal_triple(
                         amount, upper_bound=upper_bound, lower_bound=lower_bound)
                 else:
                     object = QuantityValue(amount)
+
         elif edge_type == MonolingualText:
             text_string, lang = TripleGenerator.process_text_string(node2)
             object = MonolingualText(text_string, lang)
@@ -314,10 +312,10 @@ def generate_normal_triple(
                 self.doc.kg.add_subject(object)
             if self.truthy:
                 self.to_append_statement = entity.add_truthy_statement(
-                    label, object,statement_id=e_id) if self.use_id else entity.add_truthy_statement(label,object)
+                    label, object, statement_id=e_id) if self.use_id else entity.add_truthy_statement(label, object)
             else:
                 self.to_append_statement = entity.add_statement(
-                    label, object,statement_id=e_id) if self.use_id else entity.add_statement(label, object)
+                    label, object, statement_id=e_id) if self.use_id else entity.add_statement(label, object)
             self.doc.kg.add_subject(entity)
         return True
 
@@ -370,8 +368,9 @@ def entry_point(self, line_number: int, edge: str):
             node2_index = edge_list.index("node2")
             prop_index = edge_list.index("property")
             id_index = edge_list.index("id")
-            if not all([node1_index>-1,node2_index>-1,prop_index>-1,id_index>-1]):
-                raise KGTKException("Header of kgtk file misses at least one of required column names: (node1, node2, property and id)")
+            if not all([node1_index > -1, node2_index > -1, prop_index > -1, id_index > -1]):
+                raise KGTKException(
+                    "Header of kgtk file misses at least one of required column names: (node1, node2, property and id)")
             else:
                 self.order_map["node1"] = node1_index
                 self.order_map["node2"] = node2_index
@@ -380,12 +379,12 @@ def entry_point(self, line_number: int, edge: str):
                 return
 
         # use the order_map to map the node
-         
+
         node1 = edge_list[self.order_map["node1"]].strip()
         node2 = edge_list[self.order_map["node2"]].strip()
         prop = edge_list[self.order_map["prop"]].strip()
         e_id = edge_list[self.order_map["id"]].strip()
-        if line_number == 2: 
+        if line_number == 2:
             # by default a statement edge
             is_qualifier_edge = False
             # print("#Debug Info: ",line_number, self.to_append_statement_id, e_id, is_qualifier_edge,self.to_append_statement)
@@ -428,7 +427,7 @@ def entry_point(self, line_number: int, edge: str):
         else:
             if prop in self.prop_types:
                 success = self.generate_normal_triple(
-                    node1, prop, node2, is_qualifier_edge,e_id)
+                    node1, prop, node2, is_qualifier_edge, e_id)
             else:
                 if not self.ignore:
                     raise KGTKException(
@@ -438,8 +437,9 @@ def entry_point(self, line_number: int, edge: str):
                     success = False
         if (not success) and (not is_qualifier_edge) and (not self.ignore):
             # We have a corrupted edge here.
-            self.ignore_file.write("Corrupted statement at line number: {} with id {} with current corrupted id {}\n".format(
-                line_number, e_id, self.corrupted_statement_id))
+            self.ignore_file.write(
+                "Corrupted statement at line number: {} with id {} with current corrupted id {}\n".format(
+                    line_number, e_id, self.corrupted_statement_id))
             self.ignore_file.flush()
             self.corrupted_statement_id = e_id
         else:
diff --git a/requirements.txt b/requirements.txt
index 254c51aa1..27e52a2c2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,3 +16,4 @@ attrs
 pycountry
 iso-639
 redis
+rfc3986

From 0be3326e1ccd51f9709c91761c073915bf15591c Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Fri, 8 May 2020 15:52:04 -0700
Subject: [PATCH 128/278] Return parsed fields in a seperate object.

---
 kgtk/value/kgtkvalue.py        | 620 +++++++++++++++++----------------
 kgtk/value/kgtkvalueoptions.py |   5 +-
 2 files changed, 317 insertions(+), 308 deletions(-)

diff --git a/kgtk/value/kgtkvalue.py b/kgtk/value/kgtkvalue.py
index ea65b66bd..e38cf575f 100644
--- a/kgtk/value/kgtkvalue.py
+++ b/kgtk/value/kgtkvalue.py
@@ -12,27 +12,15 @@
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
 from kgtk.value.languagevalidator import LanguageValidator
 
-@attr.s(slots=True, frozen=False)
-class KgtkValue(KgtkFormat):
-    value: str = attr.ib(validator=attr.validators.instance_of(str))
-    options: KgtkValueOptions = attr.ib(validator=attr.validators.instance_of(KgtkValueOptions), default=DEFAULT_KGTK_VALUE_OPTIONS)
-
-    # TODO: proper validation.
-    parent: typing.Optional['KgtkValue'] = attr.ib(default=None)
 
-    # Cache some properties of the value that would be expensive to
-    # continuously recompute.
-    data_type: typing.Optional[KgtkFormat.DataType] = None
-    valid: typing.Optional[bool] = None
-
-    # If this is a list, cache a KgtkValue object for each item of the list.
-    #
-    # Note: Please do not access this list directly.  Use get_list_items().
-    list_items: typing.Optional[typing.List['KgtkValue']] = None
+@attr.s(slots=True, frozen=False)
+class KgtkValueFields():
+    data_type: KgtkFormat.DataType = attr.ib(validator=attr.validators.instance_of(KgtkFormat.DataType))
+    valid: bool = attr.ib(validator=attr.validators.instance_of(bool))
 
     # The following members offer access to the components (fields) of a
     # KgtkValue.  They are accessible immediately after validating the
-    # contents of the KgtkValue object:
+    # contents of the KgtkValue object when kgtk_value.parse_fields is True.
     #
     # obj.is_valid() return True
     # obj.validate() returns True
@@ -43,44 +31,157 @@ class KgtkValue(KgtkFormat):
     # The fields may be accessed directly from this object or they may be
     # obtained as a map via obj.get_fields()
 
+    # >0 if this is a list.
+    list_len: int = attr.ib(validator=attr.validators.instance_of(int), default=0)
+
     # Offer the components of a string or language-qualified string, after validating the item.
-    contents: typing.Optional[str] = None # String contents without the enclosing quotes
-    lang: typing.Optional[str] = None # 2- or 3-character code without suffix.
-    suffix: typing.Optional[str] = None # Language code suffix, including the leading dash.
+    # String contents without the enclosing quotes
+    contents: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+
+    # 2- or 3-character language code code without suffix.
+    lang: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+
+    # The language code suffix, including the leading dash.
+    suffix: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
 
     # Offer the components of a number or quantity, after validating the item.
-    numberstr: typing.Optional[str] = None # Note: not converted to int or float
-    number: typing.Optional[typing.Union[int, float]] = None
-    low_tolerancestr: typing.Optional[str] = None # Note: not converted to int or float
-    high_tolerancestr: typing.Optional[str] = None # Note: not converted to int or float
-    si_units: typing.Optional[str] = None
-    wikidata_node: typing.Optional[str] = None
+    numberstr: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+    number: typing.Optional[typing.Union[int, float]] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of((int, float))), default=None)
+
+    low_tolerancestr: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+
+    high_tolerancestr: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+
+    si_units: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+
+    wikidata_node: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
 
     # Offer the components of a location coordinates, after validaating the item:
-    latstr: typing.Optional[str] = None
-    lat: typing.Optional[float] = None
-    lonstr: typing.Optional[str] = None
-    lon: typing.Optional[float] = None
+    latstr: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+    lat: typing.Optional[float] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(float)), default=None)
+
+    lonstr: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+    lon: typing.Optional[float] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(float)), default=None)
 
     # Offer the components of a date and times, after validating the item:
-    yearstr: typing.Optional[str] = None # Note: before conversion to int
-    year: typing.Optional[int] = None
-    monthstr: typing.Optional[str] = None # Note: before conversion to int
-    month: typing.Optional[int] = None
-    daystr: typing.Optional[str] = None # Note: before conversion to int
-    day: typing.Optional[int] = None
-    hourstr: typing.Optional[str] = None # Note: before conversion to int or float
-    hour: typing.Optional[int] = None
-    minutesstr: typing.Optional[str] = None # Note: before conversion to int or float
-    minutes: typing.Optional[int] = None
-    secondsstr: typing.Optional[str] = None # Note: before conversion to int or float
-    seconds: typing.Optional[int] = None
-    zonestr: typing.Optional[str] = None # Z or [-+]HH or [-+]HHSS or [-+]HH:SS
-    precisionstr: typing.Optional[str] = None
-    iso8601extended: typing.Optional[bool] = None # True when hyphens/colons are present.
+    yearstr: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+    year: typing.Optional[int] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(int)), default=None)
+
+    monthstr: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+    month: typing.Optional[int] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(int)), default=None)
+
+    daystr: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+    day: typing.Optional[int] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(int)), default=None)
+    
+    hourstr: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+    hour: typing.Optional[int] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(int)), default=None)
+
+    minutesstr: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+    minutes: typing.Optional[int] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(int)), default=None)
+    
+    secondsstr: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+    seconds: typing.Optional[int] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(int)), default=None)
+    
+    # Z or [-+]HH or [-+]HHSS or [-+]HH:SS
+    zonestr: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+    
+    precisionstr: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
+    
+    # True when hyphens/colons are present.
+    iso8601extended: typing.Optional[bool] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(bool)), default=None)
 
     # Offer the contents of a boolean, after validating the item:
-    truth: typing.Optional[bool] = None
+    truth: typing.Optional[bool] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(bool)), default=None)
+
+    def to_map(self)->typing.Mapping[str, typing.Union[str, int, float, bool]]:
+        results: typing.MutableMapping[str, typing.Union[str, int, float, bool]] = { }
+        results["list_len"] = self.list_len
+        if self.data_type is not None:
+            results["data_type"] = self.data_type.name
+        if self.valid is not None:
+            results["valid"] = self.valid
+        if self.contents is not None:
+            results["contents"] = self.contents
+        if self.lang is not None:
+            results["lang"] = self.lang
+        if self.suffix is not None:
+            results["suffix"] = self.suffix
+        if self.numberstr is not None:
+            results["numberstr"] = self.numberstr
+        if self.number is not None:
+            results["number"] = self.number
+        if self.low_tolerancestr is not None:
+            results["low_tolerancestr"] = self.low_tolerancestr
+        if self.high_tolerancestr is not None:
+            results["high_tolerancestr"] = self.high_tolerancestr
+        if self.si_units is not None:
+            results["si_units"] = self.si_units
+        if self.wikidata_node is not None:
+            results["wikidata_node"] = self.wikidata_node
+        if self.latstr is not None:
+            results["latstr"] = self.latstr
+        if self.lat is not None:
+            results["lat"] = self.lat
+        if self.lonstr is not None:
+            results["lonstr"] = self.lonstr
+        if self.lon is not None:
+            results["lon"] = self.lon
+        if self.yearstr is not None:
+            results["yearstr"] = self.yearstr
+        if self.year is not None:
+            results["year"] = self.year
+        if self.monthstr is not None:
+            results["monthstr"] = self.monthstr
+        if self.month is not None:
+            results["month"] = self.month
+        if self.daystr is not None:
+            results["daystr"] = self.daystr
+        if self.day is not None:
+            results["day"] = self.day
+        if self.hourstr is not None:
+            results["hourstr"] = self.hourstr
+        if self.hour is not None:
+            results["hour"] = self.hour
+        if self.minutesstr is not None:
+            results["minutesstr"] = self.minutesstr
+        if self.minutes is not None:
+            results["minutes"] = self.minutes
+        if self.secondsstr is not None:
+            results["secondsstr"] = self.secondsstr
+        if self.seconds is not None:
+            results["seconds"] = self.seconds
+        if self.zonestr is not None:
+            results["zonestr"] = self.zonestr
+        if self.precisionstr is not None:
+            results["precisionstr"] = self.precisionstr
+        if self.iso8601extended is not None:
+            results["iso8601extended"] = self.iso8601extended
+        if self.truth is not None:
+            results["truth"] = self.truth
+        return results
+    
+@attr.s(slots=True, frozen=False)
+class KgtkValue(KgtkFormat):
+    value: str = attr.ib(validator=attr.validators.instance_of(str))
+    options: KgtkValueOptions = attr.ib(validator=attr.validators.instance_of(KgtkValueOptions), default=DEFAULT_KGTK_VALUE_OPTIONS)
+    parse_fields: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+
+    # The current fields when available:
+    # fields: typing.Optional[KgtkValueFields] = attr.ib(attr.validators.instance_of(KgtkValueFields), default=None, init=False)
+    fields: typing.Optional[KgtkValueFields] = attr.ib(default=None, init=False)
+
+    # TODO: proper validation.
+    parent: typing.Optional['KgtkValue'] = attr.ib(default=None)
+
+    # Cache some properties of the value that would be expensive to
+    # continuously recompute.
+    data_type: typing.Optional[KgtkFormat.DataType] = None
+    valid: typing.Optional[bool] = None
+
+    # If this is a list, cache a KgtkValue object for each item of the list.
+    #
+    # Note: Please do not access this list directly.  Use get_list_items().
+    list_items: typing.Optional[typing.List['KgtkValue']] = None
 
     def is_valid(self)->bool:
         # Is this a valid whatever it is?
@@ -101,6 +202,8 @@ def is_empty(self, validate: bool = False)->bool:
         # We are certain that this is an empty value.  We can be certain it is valid.
         self.data_type = KgtkFormat.DataType.EMPTY
         self.valid = True
+        if self.parse_fields:
+            self.fields = KgtkValueFields(data_type=self.data_type, valid=self.valid)
         return True
 
     split_list_re: typing.Pattern = re.compile(r"(?<!\\)" + "\\" + KgtkFormat.LIST_SEPARATOR)
@@ -146,6 +249,12 @@ def is_list(self, validate: bool = False)->bool:
         if self.valid is not None:
             return self.valid
         
+        # We will save the list length even if invalid.
+        if self.parse_fields:
+            self.fields = KgtkValueFields(data_type=KgtkFormat.DataType.LIST,
+                                          valid=False,
+                                          list_len=len(self.get_list_items()))
+
         # Validate the list.
         item: 'KgtkValue'
         for item in self.get_list_items():
@@ -156,6 +265,10 @@ def is_list(self, validate: bool = False)->bool:
 
         # This is a valid list.
         self.valid = True
+        if self.parse_fields:
+            self.fields = KgtkValueFields(data_type=KgtkFormat.DataType.LIST,
+                                          valid=self.valid,
+                                          list_len=len(self.get_list_items()))
         return True
 
     def rebuild_list(self):
@@ -169,7 +282,6 @@ def rebuild_list(self):
         for item in list_items:
             values.append(item.value)
         self.value = KgtkFormat.LIST_SEPARATOR.join(values)
-        
 
     def _is_number_or_quantity(self)->bool:
         return self.value.startswith(("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "-", "."))
@@ -275,21 +387,8 @@ def is_number_or_quantity(self, validate: bool=False)->bool:
             elif self.data_type == KgtkFormat.DataType.QUANTITY:
                 return self.is_quantity(validate=validate)
             else:
-                # Clear the number or quantity components:
-                self.numberstr = None
-                self.low_tolerancestr = None
-                self.high_tolerancestr = None
-                self.si_units = None
-                self.wikidata_node = None
                 return False # Not a number or quantity.
 
-        # Clear the number or quantity components:
-        self.numberstr = None
-        self.low_tolerancestr = None
-        self.high_tolerancestr = None
-        self.si_units = None
-        self.wikidata_node = None
-
         if not self._is_number_or_quantity():
             return False
 
@@ -304,24 +403,25 @@ def is_number_or_quantity(self, validate: bool=False)->bool:
             return False
 
         # Extract the number or quantity components:
-        self.numberstr = m.group("number")
-        self.low_tolerancestr = m.group("low_tolerance")
-        self.high_tolerancestr = m.group("high_tolerance")
-        self.si_units = m.group("si_units")
-        self.wikidata_node = m.group("wikidata_node")
+        numberstr: str = m.group("number")
+        low_tolerancestr: str = m.group("low_tolerance")
+        high_tolerancestr: str = m.group("high_tolerance")
+        si_units: str = m.group("si_units")
+        wikidata_node: str = m.group("wikidata_node")
 
         # For convenience, convert the numeric part to int or float:
         #
         # TODO: go to this extra work only when requested?
-        if self.numberstr is None:
+        if numberstr is None:
             raise ValueError("Missing numeric part")
-        n: str = self.numberstr.lower()
+        n: str = numberstr.lower()
+        number: typing.Union[float, int]
         if "." in n or ("e" in n and not n.startswith("0x")):
-            self.number = float(n)
+            number = float(n)
         else:
-            self.number = int(n)
+            number = int(n)
 
-        if self.low_tolerancestr is not None or self.high_tolerancestr is not None or self.si_units is not None or self.wikidata_node is not None:
+        if low_tolerancestr is not None or high_tolerancestr is not None or si_units is not None or wikidata_node is not None:
             # We can be certain that this is a quantity.
             self.data_type = KgtkFormat.DataType.QUANTITY
         else:
@@ -329,6 +429,15 @@ def is_number_or_quantity(self, validate: bool=False)->bool:
             self.data_type = KgtkFormat.DataType.NUMBER
 
         self.valid = True
+        if self.parse_fields:
+            self.fields = KgtkValueFields(data_type=self.data_type,
+                                          valid=self.valid,
+                                          numberstr=numberstr,
+                                          number=number,
+                                          low_tolerancestr=low_tolerancestr,
+                                          high_tolerancestr=high_tolerancestr,
+                                          si_units=si_units,
+                                          wikidata_node=wikidata_node)
         return True
     
     def is_number(self, validate: bool=False)->bool:
@@ -373,22 +482,28 @@ def is_number(self, validate: bool=False)->bool:
             return False
 
         # Extract the number components:
-        self.numberstr = m.group("number")
+        numberstr: str = m.group("number")
 
         # For convenience, convert the numeric part to int or float:
         #
         # TODO: go to this extra work only when requested?
-        if self.numberstr is None:
+        if numberstr is None:
             raise ValueError("Missing numeric part")
-        n: str = self.numberstr.lower()
+        n: str = numberstr.lower()
+        number: typing.Union[float, int]
         if "." in n or ("e" in n and not n.startswith("0x")):
-            self.number = float(n)
+            number = float(n)
         else:
-            self.number = int(n)
+            number = int(n)
 
         # Now we can be certain that this is a number.
         self.data_type = KgtkFormat.DataType.NUMBER
         self.valid = True
+        if self.parse_fields:
+            self.fields = KgtkValueFields(data_type=self.data_type,
+                                          valid=self.valid,
+                                          numberstr=numberstr,
+                                          number=number)
         return True
         
     
@@ -399,12 +514,6 @@ def is_quantity(self, validate: bool=False)->bool:
         """
         if self.data_type is not None:
             if self.data_type != KgtkFormat.DataType.QUANTITY:
-                # Clear the quantity components:
-                self.numberstr = None
-                self.low_tolerancestr = None
-                self.high_tolerancestr = None
-                self.si_units = None
-                self.wikidata_node = None
                 return False
             
             if not validate:
@@ -412,13 +521,6 @@ def is_quantity(self, validate: bool=False)->bool:
             if self.valid is not None:
                 return self.valid
         
-        # Clear the quantity components:
-        self.numberstr = None
-        self.low_tolerancestr = None
-        self.high_tolerancestr = None
-        self.si_units = None
-        self.wikidata_node = None
-
         if not self._is_number_or_quantity():
             return False
         # We don't know yet if this is a quantity.  It could be a number.
@@ -428,36 +530,51 @@ def is_quantity(self, validate: bool=False)->bool:
             return False
 
         # Extract the quantity components:
-        self.numberstr = m.group("number")
-        self.low_tolerancestr = m.group("low_tolerance")
-        self.high_tolerancestr = m.group("high_tolerance")
-        self.si_units = m.group("si_units")
-        self.wikidata_node = m.group("wikidata_node")
+        numberstr:str = m.group("number")
+        low_tolerancestr:str = m.group("low_tolerance")
+        high_tolerancestr:str = m.group("high_tolerance")
+        si_units:str = m.group("si_units")
+        wikidata_node:str = m.group("wikidata_node")
 
         # For convenience, convert the numeric part to int or float:
         #
         # TODO: go to this extra work only when requested?
-        if self.numberstr is None:
+        if numberstr is None:
             raise ValueError("Missing numeric part")
-        n: str = self.numberstr.lower()
+        n: str = numberstr.lower()
+        number: typing.Union[float, int]
         if "." in n or ("e" in n and not n.startswith("0x")):
-            self.number = float(n)
+            number = float(n)
         else:
-            self.number = int(n)
+            number = int(n)
 
-        if self.low_tolerancestr is None and self.high_tolerancestr is None and self.si_units is None and self.wikidata_node is None:
+        if low_tolerancestr is None and high_tolerancestr is None and si_units is None and wikidata_node is None:
             # This is a number, not a quantity
             self.data_type = KgtkFormat.DataType.NUMBER
             self.valid = True
+            if self.parse_fields:
+                self.fields = KgtkValueFields(data_type=self.data_type,
+                                              valid=self.valid,
+                                              numberstr=numberstr,
+                                              number=number)
             return False
 
         # Now we can be certain that this is a quantity.
         self.data_type = KgtkFormat.DataType.QUANTITY
         self.valid = True
+        if self.parse_fields:
+            self.fields = KgtkValueFields(data_type=self.data_type,
+                                          valid=self.valid,
+                                          numberstr=numberstr,
+                                          number=number,
+                                          low_tolerancestr=low_tolerancestr,
+                                          high_tolerancestr=high_tolerancestr,
+                                          si_units=si_units,
+                                          wikidata_node=wikidata_node)
         return True
     
     lax_string_re: typing.Pattern = re.compile(r'^"(?P<contents>.*)"$')
-    strict_string_re: typing.Pattern = re.compile(r'^"(?P<contents>(?:[^"\\]|\\.)*"$)')
+    strict_string_re: typing.Pattern = re.compile(r'^"(?P<contents>(?:[^"\\]|\\.)*)"$')
 
     def is_string(self, validate: bool = False)->bool:
         """
@@ -470,15 +587,11 @@ def is_string(self, validate: bool = False)->bool:
         """
         if self.data_type is None:
             if not self.value.startswith('"'):
-                # Clear the string components:
-                self.contents = None
                 return False
             # We are certain this is a string.  We don't yet know if it is valid.
             self.data_type = KgtkFormat.DataType.STRING
         else:
             if self.data_type != KgtkFormat.DataType.STRING:
-                # Clear the string components:
-                self.contents = None
                 return False
 
         if not validate:
@@ -486,9 +599,6 @@ def is_string(self, validate: bool = False)->bool:
         if self.valid is not None:
             return self.valid
         
-        # Clear the string components:
-        self.contents = None
-        
         # Validate the string:
         m: typing.Optional[typing.Match]
         if self.options.allow_lax_strings:
@@ -498,11 +608,12 @@ def is_string(self, validate: bool = False)->bool:
         if m is None:
             return False
 
-        # Extract the contents components:
-        self.contents = m.group("contents")
-
         # We are certain that this is a valid string.
         self.valid = True
+        if self.parse_fields:
+            self.fields = KgtkValueFields(data_type=KgtkFormat.DataType.STRING,
+                                          valid=self.valid,
+                                          contents=m.group("contents"))
         return True
 
     def is_structured_literal(self)->bool:
@@ -527,30 +638,31 @@ def is_symbol(self, validate: bool = False)->bool:
         # We are certain this is a symbol.  We assume that it is valid.
         self.data_type = KgtkFormat.DataType.SYMBOL
         self.valid = True
+        if self.parse_fields:
+            self.fields = KgtkValueFields(data_type=self.data_type,
+                                          valid=self.valid)
         return True
 
     def is_boolean(self, validate: bool = False)->bool:
         """
         Return True if the value matches one of the special boolean symbols.
 
-        The validate parameter is ignored.
+        The validate parameter is ignored, we always validate.
         """
         if self.data_type is not None:
-            if self.data_type != KgtkFormat.DataType.BOOLEAN:
-                self.truth = None
-                return False
-            self.truth = self.value == KgtkFormat.TRUE_SYMBOL
-            return True
+            return self.data_type == KgtkFormat.DataType.BOOLEAN
 
         # Is this a boolean?
         if self.value != KgtkFormat.TRUE_SYMBOL and self.value != KgtkFormat.FALSE_SYMBOL:
-            self.truth = None
             return False
             
         # We are certain this is a valid boolean.
         self.data_type = KgtkFormat.DataType.BOOLEAN
         self.valid = True
-        self.truth = self.value == KgtkFormat.TRUE_SYMBOL
+        if self.parse_fields:
+            self.fields = KgtkValueFields(data_type=self.data_type,
+                                          valid=self.valid,
+                                          truth=self.value == KgtkFormat.TRUE_SYMBOL)
         return True
 
     # Support two or three character language codes.  Suports hyphenated codes
@@ -564,19 +676,11 @@ def is_language_qualified_string(self, validate: bool=False)->bool:
         """
         if self.data_type is None:
             if not self.value.startswith("'"):
-                # Clear the cached components of the language qualified string:
-                self.contents = None
-                self.lang = None
-                self.suffix = None
                 return False
             # We are certain that this is a language qualified string, although we haven't checked validity.
             self.data_type = KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING
         else:
             if self.data_type != KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING:
-                # Clear the cached components of the language qualified string:
-                self.contents = None
-                self.lang = None
-                self.suffix = None
                 return False
 
         if not validate:
@@ -584,11 +688,6 @@ def is_language_qualified_string(self, validate: bool=False)->bool:
         if self.valid is not None:
             return self.valid
         
-        # Clear the cached components of the language qualified string:
-        self.contents = None
-        self.lang = None
-        self.suffix = None
-
         # Validate the language qualified string.
         # print("checking %s" % self.value)
         m: typing.Optional[typing.Match]
@@ -600,22 +699,23 @@ def is_language_qualified_string(self, validate: bool=False)->bool:
             # print("match failed for %s" % self.value)
             return False
 
-        # Extract the contents, lang, and optional suffix components:
-        self.contents = m.group("contents")
-        self.lang = m.group("lang")
-        self.suffix = m.group("suffix")
-
         # Extract the combined lang and suffix for use by the LanguageValidator.
-        lang_suffix: str = m.group("lang_suffix")
-        # print("lang: %s" % lang_suffix)
+        lang_and_suffix: str = m.group("lang_suffix")
+        # print("lang_and_suffix: %s" % lang_and_suffix)
 
         # Validate the language code:
-        if not LanguageValidator.validate(lang_suffix.lower(), options=self.options):
+        if not LanguageValidator.validate(lang_and_suffix.lower(), options=self.options):
             # print("language validation failed for %s" % self.value)
             return False
 
         # We are certain that this is a valid language qualified string.
         self.valid = True
+        if self.parse_fields:
+            self.fields = KgtkValueFields(data_type=KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING,
+                                          valid=self.valid,
+                                          contents=m.group("contents"),
+                                          lang=m.group("lang"),
+                                          suffix=m.group("suffix"))
         return True
 
     #location_coordinates_re: typing.Pattern = re.compile(r"^@(?P<lat>[-+]?\d{3}\.\d{5})/(?P<lon>[-+]?\d{3}\.\d{5})$")
@@ -631,19 +731,11 @@ def is_location_coordinates(self, validate: bool=False)->bool:
         """
         if self.data_type is None:
             if not self.value.startswith("@"):
-                self.latstr = None
-                self.lat = None
-                self.lonstr = None
-                self.lon = None
                 return False
             # We are certain that this is location coordinates, although we haven't checked validity.
             self.data_type = KgtkFormat.DataType.LOCATION_COORDINATES
         else:
             if self.data_type != KgtkFormat.DataType.LOCATION_COORDINATES:
-                self.latstr = None
-                self.lat = None
-                self.lonstr = None
-                self.lon = None
                 return False
 
         if not validate:
@@ -651,40 +743,39 @@ def is_location_coordinates(self, validate: bool=False)->bool:
         if self.valid is not None:
             return self.valid
         
-        # Clear the lat/lon components:
-        self.latstr = None
-        self.lat = None
-        self.lonstr = None
-        self.lon = None
-
         # Validate the location coordinates:
         m: typing.Optional[typing.Match] = KgtkValue.location_coordinates_re.match(self.value)
         if m is None:
             return False
 
         latstr: str = m.group("lat")
-        self.latstr = latstr
         lonstr: str = m.group("lon")
-        self.lonstr = lonstr
 
         # Latitude normally runs from -90 to +90:
         try:
-            self.lat = float(latstr)
-            if  self.lat < self.options.minimum_valid_lat or self.lat > self.options.maximum_valid_lat:
+            lat: float = float(latstr)
+            if  lat < self.options.minimum_valid_lat or lat > self.options.maximum_valid_lat:
                 return False
         except ValueError:
             return False
 
         # Longitude normally runs from -180 to +180:
         try:
-            self.lon = float(lonstr)
-            if self.lon < self.options.minimum_valid_lon or self.lon > self.options.maximum_valid_lon:
+            lon: float = float(lonstr)
+            if lon < self.options.minimum_valid_lon or lon > self.options.maximum_valid_lon:
                 return False
         except ValueError:
             return False
 
         # We are certain that this is valid.
         self.valid = True
+        if self.parse_fields:
+            self.fields = KgtkValueFields(data_type=KgtkFormat.DataType.LOCATION_COORDINATES,
+                                          valid=self.valid,
+                                          latstr=latstr,
+                                          lat=lat,
+                                          lonstr=lonstr,
+                                          lon=lon)
         return True
 
     # https://en.wikipedia.org/wiki/ISO_8601
@@ -780,43 +871,11 @@ def is_date_and_times(self, validate: bool=False)->bool:
         """
         if self.data_type is None:
             if not self.value.startswith("^"):
-                # Clear the cached date and times components:
-                self.yearstr = None
-                self.monthstr = None
-                self.daystr = None
-                self.hourstr = None
-                self.minutesstr = None
-                self.secondsstr = None
-                self.year = None
-                self.month = None
-                self.day = None
-                self.hour = None
-                self.minutes = None
-                self.seconds = None
-                self.zonestr = None
-                self.precisionstr = None
-                self.iso8601extended = None
                 return False
             # We are certain that this is location coordinates, although we haven't checked validity.
             self.data_type = KgtkFormat.DataType.DATE_AND_TIMES
         else:
             if self.data_type != KgtkFormat.DataType.DATE_AND_TIMES:
-                # Clear the cached date and times components:
-                self.yearstr = None
-                self.monthstr = None
-                self.daystr = None
-                self.hourstr = None
-                self.minutesstr = None
-                self.secondsstr = None
-                self.year = None
-                self.month = None
-                self.day = None
-                self.hour = None
-                self.minutes = None
-                self.seconds = None
-                self.zonestr = None
-                self.precisionstr = None
-                self.iso8601extended = None
                 return False
 
         if not validate:
@@ -825,105 +884,109 @@ def is_date_and_times(self, validate: bool=False)->bool:
             return self.valid
         
         # Clear the cached date and times components:
-        self.yearstr = None
-        self.monthstr = None
-        self.daystr = None
-        self.hourstr = None
-        self.minutesstr = None
-        self.secondsstr = None
-        self.year = None
-        self.month = None
-        self.day = None
-        self.hour = None
-        self.minutes = None
-        self.seconds = None
-        self.zonestr = None
-        self.precisionstr = None
-        self.iso8601extended = None
 
         # Validate the date and times:
         m: typing.Optional[typing.Match] = KgtkValue.lax_date_and_times_re.match(self.value)
         if m is None:
             return False
 
-        self.yearstr = m.group("year")
-        self.monthstr = m.group("month")
-        self.daystr = m.group("day")
-        self.hourstr = m.group("hour")
-        self.minutesstr = m.group("minutes")
-        self.secondsstr = m.group("seconds")
-        self.zonestr = m.group("zone")
-        self.precisionstr = m.group("precision")
-        self.iso8601extended = m.group("hyphen") is not None
+        yearstr: str = m.group("year")
+        monthstr: str = m.group("month")
+        daystr: str = m.group("day")
+        hourstr: str = m.group("hour")
+        minutesstr: str = m.group("minutes")
+        secondsstr: str = m.group("seconds")
+        zonestr: str = m.group("zone")
+        precisionstr: str = m.group("precision")
+        iso8601extended: bool = m.group("hyphen") is not None
 
         fixup_needed: bool = False
 
         # Validate the year:
-        if self.yearstr is None or len(self.yearstr) == 0:
+        if yearstr is None or len(yearstr) == 0:
             return False # Years are mandatory
         try:
-            self.year: int = int(self.yearstr)
+            year: int = int(yearstr)
         except ValueError:
             return False
-        if self.year < self.options.minimum_valid_year:
+        if year < self.options.minimum_valid_year:
             return False
-        if self.year > self.options.maximum_valid_year:
+        if year > self.options.maximum_valid_year:
             return False
 
-        if self.monthstr is not None:
+        if monthstr is not None:
             try:
-                self.month: int = int(self.monthstr)
+                month: int = int(monthstr)
             except ValueError:
                 return False # shouldn't happen
-            if self.month == 0:
+            if month == 0:
                 if self.options.repair_month_or_day_zero:
-                    self.month = 1
-                    self.monthstr = "01"
+                    month = 1
+                    monthstr = "01"
                     fixup_needed = True
                 elif not self.options.allow_month_or_day_zero:
                     return False # month 0 was disallowed.
 
-        if self.daystr is not None:
+        if daystr is not None:
             try:
-                self.day: int = int(self.daystr)
+                day: int = int(daystr)
             except ValueError:
                 return False # shouldn't happen
-            if self.day == 0:
+            if day == 0:
                 if self.options.repair_month_or_day_zero:
-                    self.day = 1
-                    self.daystr = "01"
+                    day = 1
+                    daystr = "01"
                     fixup_needed = True
                 elif not self.options.allow_month_or_day_zero:
                     return False # day 0 was disallowed.
 
         # Convert the time fields to ints:
-        if self.hourstr is not None:
+        if hourstr is not None:
             try:
-                self.hour: int = int(self.hourstr)
+                hour: int = int(hourstr)
             except ValueError:
                 return False # shouldn't happen
 
-        if self.minutesstr is not None:
+        if minutesstr is not None:
             try:
-                self.minutes: int = int(self.minutesstr)
+                minutes: int = int(minutesstr)
             except ValueError:
                 return False # shouldn't happen
 
-        if self.secondsstr is not None:
+        if secondsstr is not None:
             try:
-                self.seconds: int = int(self.secondsstr)
+                seconds: int = int(secondsstr)
             except ValueError:
                 return False # shouldn't happen
 
         if fixup_needed:
-            # Rapair a month or day zero problem.  If this value is the child
-            #of a list, repair the list parent value, too.
+            # Repair a month or day zero problem.  If this value is the child
+            # of a list, repair the list parent value, too.
             self.update_date_and_times()
             if self.parent is not None:
                 self.parent.rebuild_list()
 
         # We are fairly certain that this is a valid date and times.
         self.valid = True
+        if self.parse_fields:
+            self.fields = KgtkValueFields(data_type=KgtkFormat.DataType.DATE_AND_TIMES,
+                                          valid=self.valid,
+                                          yearstr=yearstr,
+                                          monthstr=monthstr,
+                                          daystr=daystr,
+                                          hourstr=hourstr,
+                                          minutesstr=minutesstr,
+                                          secondsstr=secondsstr,
+                                          year=year,
+                                          month=month,
+                                          day=day,
+                                          hour=hour,
+                                          minutes=minutes,
+                                          seconds=seconds,
+                                          zonestr=zonestr,
+                                          precisionstr=precisionstr,
+                                          iso8601extended=iso8601extended,
+            )
         return True
 
     def update_date_and_times(self):
@@ -1019,6 +1082,7 @@ def reclassify(self)->KgtkFormat.DataType:
         # Classify this KgtkValue into a KgtkDataType, ignoring any cached data_type.
         self.data_type = None
         self.valid = None
+        self.fields = None
         return self.classify()
 
     def validate(self)->bool:
@@ -1030,6 +1094,9 @@ def validate(self)->bool:
         # If the valid flag has already been cached, return that.
         if self.valid is not None:
             return self.valid
+
+        # Clear any fields from prior validation:
+        self.fields = None
         
         # Validate the value.
         if dt == KgtkFormat.DataType.EMPTY:
@@ -1062,6 +1129,7 @@ def revalidate(self, reclassify: bool=False)->bool:
         if reclassify:
             self.data_type = None
         self.valid = None
+        self.fields = None
         return self.validate()
         
     def describe(self)->str:
@@ -1089,7 +1157,7 @@ def describe(self)->str:
         elif dt == KgtkFormat.DataType.STRING:
             return "String" if self.is_string(validate=True) else "Invalid String"
         elif dt == KgtkFormat.DataType.LANGUAGE_QUALIFIED_STRING:
-            return "Language Qualified String (%s)" % self.lang if self.is_language_qualified_string(validate=True) else "Invalid Language Qualified String"
+            return "Language Qualified String" if self.is_language_qualified_string(validate=True) else "Invalid Language Qualified String"
         elif dt == KgtkFormat.DataType.LOCATION_COORDINATES:
             return "Location Coordinates" if self.is_location_coordinates(validate=True) else "Invalid Location Coordinates"
         elif dt == KgtkFormat.DataType.DATE_AND_TIMES:
@@ -1103,79 +1171,19 @@ def describe(self)->str:
         else:
             return "Unknown"
 
-    def get_fields(self)->typing.Mapping[str, typing.Union[str, int, float, bool]]:
-        results: typing.MutableMapping[str, typing.Union[str, int, float, bool]] = { }
-        if self.data_type is not None:
-            results["data_type"] = str(self.data_type)
-        if self.valid is not None:
-            results["valid"] = self.valid
-        if self.contents is not None:
-            results["contents"] = self.contents
-        if self.lang is not None:
-            results["lang"] = self.lang
-        if self.suffix is not None:
-            results["suffix"] = self.suffix
-        if self.numberstr is not None:
-            results["numberstr"] = self.numberstr
-        if self.number is not None:
-            results["number"] = self.number
-        if self.low_tolerancestr is not None:
-            results["low_tolerancestr"] = self.low_tolerancestr
-        if self.high_tolerancestr is not None:
-            results["high_tolerancestr"] = self.high_tolerancestr
-        if self.si_units is not None:
-            results["si_units"] = self.si_units
-        if self.wikidata_node is not None:
-            results["wikidata_node"] = self.wikidata_node
-        if self.latstr is not None:
-            results["latstr"] = self.latstr
-        if self.lat is not None:
-            results["lat"] = self.lat
-        if self.lonstr is not None:
-            results["lonstr"] = self.lonstr
-        if self.lon is not None:
-            results["lon"] = self.lon
-        if self.yearstr is not None:
-            results["yearstr"] = self.yearstr
-        if self.year is not None:
-            results["year"] = self.year
-        if self.monthstr is not None:
-            results["monthstr"] = self.monthstr
-        if self.month is not None:
-            results["month"] = self.month
-        if self.daystr is not None:
-            results["daystr"] = self.daystr
-        if self.day is not None:
-            results["day"] = self.day
-        if self.hourstr is not None:
-            results["hourstr"] = self.hourstr
-        if self.hour is not None:
-            results["hour"] = self.hour
-        if self.minutesstr is not None:
-            results["minutesstr"] = self.minutesstr
-        if self.minutes is not None:
-            results["minutes"] = self.minutes
-        if self.secondsstr is not None:
-            results["secondsstr"] = self.secondsstr
-        if self.seconds is not None:
-            results["seconds"] = self.seconds
-        if self.zonestr is not None:
-            results["zonestr"] = self.zonestr
-        if self.precisionstr is not None:
-            results["precisionstr"] = self.precisionstr
-        if self.iso8601extended is not None:
-            results["iso8601extended"] = self.iso8601extended
-        list_items: typing.List[KgtkValue] = self.get_list_items()
-        if len(list_items) > 0:
-            results["list_len"] = len(list_items)
-        return results
-    
+    def get_field_map(self)->typing.Mapping[str, typing.Union[str, int, float, bool]]:
+        if self.fields is None:
+            return { }
+        else:
+            return self.fields.to_map()
+
 def main():
     """
     Test the KGTK value parser.
     """
     parser: ArgumentParser = ArgumentParser()
     parser.add_argument(dest="values", help="The values(s) to test", type=str, nargs="+")
+    parser.add_argument("-p", "--parse-fields", dest="parse_fields", help="Print additional progress messages.", action='store_true')
     parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
     parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
     KgtkValueOptions.add_arguments(parser)
@@ -1186,7 +1194,7 @@ def main():
 
     value: str
     for value in args.values:
-        kv: KgtkValue = KgtkValue(value, options=value_options)
+        kv: KgtkValue = KgtkValue(value, options=value_options, parse_fields=args.parse_fields)
         kv.validate()
         if value == kv.value:
             print("%s: %s" % (value, kv.describe()), flush=True)
@@ -1194,7 +1202,7 @@ def main():
             print("%s => %s: %s" % (value, kv.value, kv.describe()), flush=True)
 
         if args.verbose:
-            fields = kv.get_fields()
+            fields: typing.Mapping[str, typing.Any] = kv.get_field_map()
             for key in sorted(fields.keys()):
                 print("%s: %s" % (key, str(fields[key])))
             list_items: typing.List[KgtkValue] = kv.get_list_items()
diff --git a/kgtk/value/kgtkvalueoptions.py b/kgtk/value/kgtkvalueoptions.py
index d37a99243..a51b16dc7 100644
--- a/kgtk/value/kgtkvalueoptions.py
+++ b/kgtk/value/kgtkvalueoptions.py
@@ -9,8 +9,9 @@
 @attr.s(slots=True, frozen=True)
 class KgtkValueOptions:
     """
-    These options will affect some aspects of value processing. They are in a
-    seperate class for efficiency.
+    These options control various aspects of value processing. They are in a
+    seperate class for code isolation and efficiency.
+
     """
     
     # Allow month 00 or day 00 in dates?  This isn't really allowed by ISO

From 44124f07c147b8d852a1023d827e15fbb50c2f29 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Fri, 8 May 2020 16:07:02 -0700
Subject: [PATCH 129/278] Adapt to new KgtkValueOptions initialization.

---
 kgtk/cli/ifexists.py    | 22 ++--------------------
 kgtk/cli/ifnotexists.py | 22 ++--------------------
 kgtk/cli/validate.py    | 20 ++------------------
 3 files changed, 6 insertions(+), 58 deletions(-)

diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
index d72f320a6..d6c2a8b23 100644
--- a/kgtk/cli/ifexists.py
+++ b/kgtk/cli/ifexists.py
@@ -75,17 +75,7 @@ def run(input_kgtk_file: typing.Optional[Path],
         verbose: bool = False,
         very_verbose: bool = False,
 
-        # Arguments from KgtkValueOptions:
-        additional_language_codes: typing.Optional[typing.List[str]] = None,
-        allow_language_suffixes: bool = False,
-        allow_lax_strings: bool = False,
-        allow_lax_lq_strings: bool = False,
-        allow_month_or_day_zero: bool = False,
-        repair_month_or_day_zero: bool = False,
-        minimum_valid_year: int = KgtkValueOptions.MINIMUM_VALID_YEAR,
-        maximum_valid_year: int = KgtkValueOptions.MAXIMUM_VALID_YEAR,
-        escape_list_separators: bool = False,
-
+        **kwargs # Whatever KgtkValueOptions wants.
 )->int:
     # import modules locally
     from kgtk.exceptions import KGTKException
@@ -99,15 +89,7 @@ def run(input_kgtk_file: typing.Optional[Path],
     error_file: typing.TextIO = sys.stderr if errors_to_stderr else sys.stdout
 
     # Build the value parsing option structure.
-    value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=allow_month_or_day_zero,
-                                                       repair_month_or_day_zero=repair_month_or_day_zero,
-                                                       allow_lax_strings=allow_lax_strings,
-                                                       allow_lax_lq_strings=allow_lax_lq_strings,
-                                                       allow_language_suffixes=allow_language_suffixes,
-                                                       additional_language_codes=additional_language_codes,
-                                                       minimum_valid_year=minimum_valid_year,
-                                                       maximum_valid_year=maximum_valid_year,
-                                                       escape_list_separators=escape_list_separators)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
 
     try:
         ie: IfExists = IfExists(left_file_path=input_kgtk_file,
diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
index 69bbbff61..bd9f5a52b 100644
--- a/kgtk/cli/ifnotexists.py
+++ b/kgtk/cli/ifnotexists.py
@@ -76,17 +76,7 @@ def run(input_kgtk_file: typing.Optional[Path],
         verbose: bool = False,
         very_verbose: bool = False,
 
-        # Arguments from KgtkValueOptions:
-        additional_language_codes: typing.Optional[typing.List[str]] = None,
-        allow_language_suffixes: bool = False,
-        allow_lax_strings: bool = False,
-        allow_lax_lq_strings: bool = False,
-        allow_month_or_day_zero: bool = False,
-        repair_month_or_day_zero: bool = False,
-        minimum_valid_year: int = KgtkValueOptions.MINIMUM_VALID_YEAR,
-        maximum_valid_year: int = KgtkValueOptions.MAXIMUM_VALID_YEAR,
-        escape_list_separators: bool = False,
-
+        **kwargs # Whatever KgtkValueOptions wants.
 )->int:
     # import modules locally
     from kgtk.exceptions import KGTKException
@@ -100,15 +90,7 @@ def run(input_kgtk_file: typing.Optional[Path],
     error_file: typing.TextIO = sys.stderr if errors_to_stderr else sys.stdout
 
     # Build the value parsing option structure.
-    value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=allow_month_or_day_zero,
-                                                       repair_month_or_day_zero=repair_month_or_day_zero,
-                                                       allow_lax_strings=allow_lax_strings,
-                                                       allow_lax_lq_strings=allow_lax_lq_strings,
-                                                       allow_language_suffixes=allow_language_suffixes,
-                                                       additional_language_codes=additional_language_codes,
-                                                       minimum_valid_year=minimum_valid_year,
-                                                       maximum_valid_year=maximum_valid_year,
-                                                       escape_list_separators=escape_list_separators)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
 
     try:
         ie: IfExists = IfExists(left_file_path=input_kgtk_file,
diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index a674baac0..ccf442bc0 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -148,15 +148,6 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
         invalid_value_action: ValidationAction = ValidationAction.REPORT,
         header_error_action: ValidationAction = ValidationAction.EXIT,
         unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
-        additional_language_codes: typing.Optional[typing.List[str]] = None,
-        allow_language_suffixes: bool = False,
-        allow_lax_strings: bool = False,
-        allow_lax_lq_strings: bool = False,
-        allow_month_or_day_zero: bool = False,
-        repair_month_or_day_zero: bool = False,
-        escape_list_separators: bool = False,
-        minimum_valid_year: int = KgtkValueOptions.MINIMUM_VALID_YEAR,
-        maximum_valid_year: int = KgtkValueOptions.MAXIMUM_VALID_YEAR,
         compression_type: typing.Optional[str] = None,
         gzip_in_parallel: bool = False,
         gzip_queue_size: int = KgtkReader.GZIP_QUEUE_SIZE_DEFAULT,
@@ -165,6 +156,7 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
         header_only: bool = False,
         verbose: bool = False,
         very_verbose: bool = False,
+        **kwargs # Whatever KgtkValueOptions wants.
 )->int:
     # import modules locally
     from kgtk.exceptions import KGTKException
@@ -176,15 +168,7 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
     error_file: typing.TextIO = sys.stderr if errors_to_stderr else sys.stdout
 
     # Build the value parsing option structure.
-    value_options: KgtkValueOptions = KgtkValueOptions(allow_month_or_day_zero=allow_month_or_day_zero,
-                                                       repair_month_or_day_zero=repair_month_or_day_zero,
-                                                       allow_lax_strings=allow_lax_strings,
-                                                       allow_lax_lq_strings=allow_lax_lq_strings,
-                                                       allow_language_suffixes=allow_language_suffixes,
-                                                       additional_language_codes=additional_language_codes,
-                                                       minimum_valid_year=minimum_valid_year,
-                                                       maximum_valid_year=maximum_valid_year,
-                                                       escape_list_separators=escape_list_separators)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
 
     try:
         kgtk_file: typing.Optional[Path]

From 03d5f85426a4bd5e1cb951215c830e25010bb504 Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Fri, 8 May 2020 17:37:55 -0700
Subject: [PATCH 130/278] update embeddindg sentence generating algorithm - for
 property-values, move to isa-properties part

---
 kgtk/gt/embedding_utils.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/kgtk/gt/embedding_utils.py b/kgtk/gt/embedding_utils.py
index 429d37b47..505636bbf 100644
--- a/kgtk/gt/embedding_utils.py
+++ b/kgtk/gt/embedding_utils.py
@@ -460,16 +460,17 @@ def read_input(self, file_path: str, target_properties: dict, property_labels_di
 
                     if node_property in properties_reversed:
                         roles = properties_reversed[node_property]
+                        node_value = self.get_real_label_name(node_value)
+                        # if we get property_values, it should be saved to isa-properties part
                         if "property_values" in roles:
                             # for property values part, changed to be "{property} {value}"
-                            node_value = self.get_real_label_name(node_property) + " " + self.get_real_label_name(node_value)
-                        else:
-                            node_value = self.get_real_label_name(node_value)
+                            node_value_combine = self.get_real_label_name(node_property) + " " + self.get_real_label_name(node_value)
+                            each_node_attributes["isa_properties"].append(node_value_combine)
+                            # remove those 2 roles in case we have duplicate using of this node later
+                            roles.discard("property_values")
+                            roles.discard("has_properties")
                         for each_role in roles:
-                            if each_role == "property_values" and "has_properties" not in roles:
-                                each_node_attributes["has_properties"].append(node_value)
-                            else:
-                                each_node_attributes[each_role].append(node_value)
+                            each_node_attributes[each_role].append(node_value)
                     elif add_all_properties:  # add remained properties if need all properties
                         each_node_attributes["has_properties"].append(self.get_real_label_name(node_property))
 

From b9ef7079c35371642220a36a9c12a9cdd396929b Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Fri, 8 May 2020 18:42:45 -0700
Subject: [PATCH 131/278] bug fix on property-value part

---
 kgtk/gt/embedding_utils.py | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/kgtk/gt/embedding_utils.py b/kgtk/gt/embedding_utils.py
index 505636bbf..8a4b2b833 100644
--- a/kgtk/gt/embedding_utils.py
+++ b/kgtk/gt/embedding_utils.py
@@ -407,7 +407,7 @@ def read_input(self, file_path: str, target_properties: dict, property_labels_di
                 self._logger.debug(str(column_references))
                 # read contents
                 each_node_attributes = {"has_properties": [], "isa_properties": [], "label_properties": [],
-                                        "description_properties": []}
+                                        "description_properties": [], "has_properties_values": []}
                 current_process_node_id = None
 
                 if self._parallel_count > 1:
@@ -454,18 +454,18 @@ def read_input(self, file_path: str, target_properties: dict, property_labels_di
 
                             # after write down finish, we can clear and start parsing next one
                             each_node_attributes = {"has_properties": [], "isa_properties": [], "label_properties": [],
-                                                    "description_properties": []}
+                                                    "description_properties": [], "has_properties_values": []}
                             # update to new id
                             current_process_node_id = node_id
 
                     if node_property in properties_reversed:
-                        roles = properties_reversed[node_property]
+                        roles = properties_reversed[node_property].copy()
                         node_value = self.get_real_label_name(node_value)
                         # if we get property_values, it should be saved to isa-properties part
                         if "property_values" in roles:
                             # for property values part, changed to be "{property} {value}"
                             node_value_combine = self.get_real_label_name(node_property) + " " + self.get_real_label_name(node_value)
-                            each_node_attributes["isa_properties"].append(node_value_combine)
+                            each_node_attributes["has_properties_values"].append(node_value_combine)
                             # remove those 2 roles in case we have duplicate using of this node later
                             roles.discard("property_values")
                             roles.discard("has_properties")
@@ -514,11 +514,21 @@ def attribute_to_sentence(self, attribute_dict: dict, node_id=None):
                         each = each.replace("||", " ")
                 temp += each + ", "
             if concated_sentence != "" and temp != "":
-                concated_sentence += " is a "
+                concated_sentence += " is "
             elif concated_sentence == "":
-                concated_sentence += "It is a "
+                concated_sentence += "It is "
             # remove last ", "
             concated_sentence += temp[:-2]
+        if "has_properties_values" in attribute_dict and len(attribute_dict["has_properties_values"]) > 0:
+            temp = [self.get_real_label_name(each) for each in attribute_dict["has_properties_values"]]
+            if concated_sentence != "":
+                if not have_isa_properties:
+                    concated_sentence += " is "
+                else:
+                    concated_sentence += ", "
+            else:
+                concated_sentence += "It is "
+            concated_sentence += " and ".join(temp)
         if "has_properties" in attribute_dict and len(attribute_dict["has_properties"]) > 0:
             temp = [self.get_real_label_name(each) for each in attribute_dict["has_properties"]]
             if concated_sentence != "" and temp[0] != "":
@@ -529,6 +539,9 @@ def attribute_to_sentence(self, attribute_dict: dict, node_id=None):
             elif temp[0] != "":
                 concated_sentence += "It has "
             concated_sentence += " and ".join(temp)
+        # add ending period
+        if concated_sentence != "":
+            concated_sentence += "."
         self._logger.debug("Transform node {} --> {}".format(node_id, concated_sentence))
         return concated_sentence
 

From a75c95b7446c214c1d99e5a52f7e10020f780743 Mon Sep 17 00:00:00 2001
From: Naren <naren954@gmail.com>
Date: Sat, 9 May 2020 13:07:04 -0700
Subject: [PATCH 132/278] bug fix

---
 kgtk/cli/connected_components.py | 35 +++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/kgtk/cli/connected_components.py b/kgtk/cli/connected_components.py
index 8b6f3e890..0df997fa4 100644
--- a/kgtk/cli/connected_components.py
+++ b/kgtk/cli/connected_components.py
@@ -15,20 +15,21 @@ def add_arguments(parser):
     Args:
             parser (argparse.ArgumentParser)
     """
-    parser.add_argument('-i','--inp',action="store", type=str, dest="filename", metavar='filename', help='filename here')
-    parser.add_argument('-o', '--out', action='store', type=str, dest='output', help='File to output the nodes file with respective components')
-    parser.add_argument("--header", action="store",type=bool, dest="header_bool", help="Does the file contain a header in its first row",default=True)
+    parser.add_argument(action="store", type=str, dest="filename", metavar='filename', help='input filename here')
+    parser.add_argument('-o', '--out', action='store', type=str, dest='output', help='File to output the nodes file with respective components,if empty will be written out to standard output',default=None)
+    parser.add_argument("--header", action="store_true", dest="header_bool", help="Does the file contain a header in its first row")
     parser.add_argument("--subj", action="store", type=int, dest="sub", help='Column in which the subject is given, default 0', default=0)
     parser.add_argument("--obj", action="store", type=int, dest="obj", help='Column in which the subject is given, default 2', default=2)
     parser.add_argument("--props", action="store", type=str, dest="props",help='Properties to consider while finding connected components - comma-separated string, default all properties considered',default=None)
-    parser.add_argument('--directed', action='store',type=bool, dest="directed", help="Is the graph directed or not?",default=True)
-    parser.add_argument('--strong', action='store',type=bool, dest="strong", help="If graph is directed, strongly connected components or treat graph as undirected",default=False)
+    parser.add_argument('--directed', action='store_true', dest="directed", help="Is the graph directed or not?")
+    parser.add_argument('--strong', action='store_true', dest="strong", help="If graph is directed, strongly connected components or treat graph as undirected")
 
 
     
-def run(filename,output,directed,header,sub,obj,props,strong):
+def run(filename,output,header_bool,sub,obj,props,directed,strong):
     # import modules locally
     import csv 
+    import sys
     from graph_tool import load_graph_from_csv
     from graph_tool.util import find_edge
     from graph_tool.topology import label_components
@@ -36,7 +37,8 @@ def run(filename,output,directed,header,sub,obj,props,strong):
     from kgtk.cli_argparse import KGTKArgumentParser
     
     try:
-        g=load_graph_from_csv(filename,directed,skip_first=header,hashed=True,csv_options={'delimiter': '\t'},ecols=(sub,obj))
+        header=['node1','label','node2']
+        g=load_graph_from_csv(filename,directed,skip_first=header_bool,hashed=True,csv_options={'delimiter': '\t'},ecols=(sub,obj))
         es=[]
         if props:
             properties=props.split(',')
@@ -45,11 +47,16 @@ def run(filename,output,directed,header,sub,obj,props,strong):
             g.clear_edges()
             g.add_edge_list(list(set(es)))
         comp, hist= label_components(g,directed=strong)
-        f=open(output,'w')
-        wr = csv.writer(f, quoting=csv.QUOTE_NONE,delimiter="\t",escapechar="\n",quotechar='')
-        wr.writerow(['node','component'])
-        for v,c in enumerate(comp):
-            wr.writerow([g.vertex_properties['name'][v],c])
-        f.close()
+        if output:
+            f=open(output,'w')
+            wr = csv.writer(f, quoting=csv.QUOTE_NONE,delimiter="\t",escapechar="\n",quotechar='')
+            wr.writerow(header)
+            for v,c in enumerate(comp):
+                wr.writerow([g.vertex_properties['name'][v],'connected_component',c])
+            f.close()
+        else:
+            sys.stdout.write('%s\t%s\t%s\n' % ('node1', 'label', 'node2'))
+            for v,c in enumerate(comp):
+                sys.stdout.write('%s\t%s\t%s\n' % (g.vertex_properties['name'][v], 'connected_component', str(c)))
     except:
-        raise KGTKException
+        raise KGTKException
\ No newline at end of file

From d737e7c20dfc000226770402bb12b83b7e69265d Mon Sep 17 00:00:00 2001
From: Naren <naren954@gmail.com>
Date: Sun, 10 May 2020 21:25:56 -0700
Subject: [PATCH 133/278] Add reachability command and minor bug fixes

---
 kgtk/cli/connected_components.py |  22 +++++--
 kgtk/cli/import_wikidata.py      |  38 ++++++++---
 kgtk/cli/reachable_nodes.py      | 109 +++++++++++++++++++++++++++++++
 3 files changed, 153 insertions(+), 16 deletions(-)
 create mode 100644 kgtk/cli/reachable_nodes.py

diff --git a/kgtk/cli/connected_components.py b/kgtk/cli/connected_components.py
index 0df997fa4..630796254 100644
--- a/kgtk/cli/connected_components.py
+++ b/kgtk/cli/connected_components.py
@@ -16,17 +16,18 @@ def add_arguments(parser):
             parser (argparse.ArgumentParser)
     """
     parser.add_argument(action="store", type=str, dest="filename", metavar='filename', help='input filename here')
-    parser.add_argument('-o', '--out', action='store', type=str, dest='output', help='File to output the nodes file with respective components,if empty will be written out to standard output',default=None)
-    parser.add_argument("--header", action="store_true", dest="header_bool", help="Does the file contain a header in its first row")
+    parser.add_argument('-o', '--out', action='store', type=str, dest='output', help='File to output the edge file with respective components,if empty will be written out to standard output',default=None)
+    parser.add_argument("--noheader", action="store_true", dest="header_bool", help="Option to specify that file does not have a header")
     parser.add_argument("--subj", action="store", type=int, dest="sub", help='Column in which the subject is given, default 0', default=0)
     parser.add_argument("--obj", action="store", type=int, dest="obj", help='Column in which the subject is given, default 2', default=2)
+    parser.add_argument("--pred",action="store" ,type=int, dest="pred",help='Column in which predicate is given, default 1',default=1)
     parser.add_argument("--props", action="store", type=str, dest="props",help='Properties to consider while finding connected components - comma-separated string, default all properties considered',default=None)
-    parser.add_argument('--directed', action='store_true', dest="directed", help="Is the graph directed or not?")
+    parser.add_argument('--undirected', action='store_true', dest="undirected", help="Option to specify graph as undirected?")
     parser.add_argument('--strong', action='store_true', dest="strong", help="If graph is directed, strongly connected components or treat graph as undirected")
 
 
     
-def run(filename,output,header_bool,sub,obj,props,directed,strong):
+def run(filename,output,header_bool,sub,obj,pred,props,undirected,strong):
     # import modules locally
     import csv 
     import sys
@@ -36,14 +37,23 @@ def run(filename,output,header_bool,sub,obj,props,directed,strong):
     from kgtk.exceptions import KGTKException
     from kgtk.cli_argparse import KGTKArgumentParser
     
+
+    def find_pred_position(sub,pred,obj):
+        if pred < sub and pred < obj:
+            return pred
+        elif (pred > sub and pred < obj) or (pred<sub and pred>obj):
+            return pred-1
+        else:
+            return pred-2
     try:
         header=['node1','label','node2']
-        g=load_graph_from_csv(filename,directed,skip_first=header_bool,hashed=True,csv_options={'delimiter': '\t'},ecols=(sub,obj))
+        label='c'+str(find_pred_position(sub,pred,obj))
+        g=load_graph_from_csv(filename,not(undirected),skip_first=not(header_bool),hashed=True,csv_options={'delimiter': '\t'},ecols=(sub,obj))
         es=[]
         if props:
             properties=props.split(',')
             for e in properties:
-                es+=(find_edge(g,g.edge_properties['c0'],e))        
+                es+=(find_edge(g,g.edge_properties[label],e))        
             g.clear_edges()
             g.add_edge_list(list(set(es)))
         comp, hist= label_components(g,directed=strong)
diff --git a/kgtk/cli/import_wikidata.py b/kgtk/cli/import_wikidata.py
index 0019fef32..aadb01493 100644
--- a/kgtk/cli/import_wikidata.py
+++ b/kgtk/cli/import_wikidata.py
@@ -155,8 +155,8 @@ def process(self,line,node_file,edge_file,qual_file,languages,doc_id):
 
                     if self.parse_labels:
                         labels = obj["labels"]
+                        label_list=[]
                         if labels:
-                            label_list=[]
                             for lang in languages:
                                 lang_label = labels.get(lang, None)
                                 if lang_label:
@@ -171,8 +171,8 @@ def process(self,line,node_file,edge_file,qual_file,languages,doc_id):
 
                     if self.parse_descr:
                         descriptions = obj["descriptions"]
+                        descr_list=[]
                         if descriptions:
-                            descr_list=[]
                             for lang in languages:
                                 lang_descr = descriptions.get(lang, None)
                                 if lang_descr:
@@ -186,8 +186,8 @@ def process(self,line,node_file,edge_file,qual_file,languages,doc_id):
 
                     if self.parse_aliases:
                         aliases = obj["aliases"]
+                        alias_list = []
                         if aliases:
-                            alias_list = []
                             for lang in languages:
                                 lang_aliases = aliases.get(lang, None)
                                 if lang_aliases:
@@ -235,6 +235,8 @@ def process(self,line,node_file,edge_file,qual_file,languages,doc_id):
                                     value = ''
                                     mag = ''
                                     unit = ''
+                                    date=''
+                                    item=''
                                     lower = ''
                                     upper = ''
                                     precision = ''
@@ -245,6 +247,7 @@ def process(self,line,node_file,edge_file,qual_file,languages,doc_id):
                                     if typ.startswith('wikibase'):
                                         enttype = val.get('entity-type')
                                         value = val.get('id', '')
+                                        item=value
                                     elif typ == 'quantity':
                                         value = val['amount']
                                         mag = val['amount']
@@ -267,11 +270,15 @@ def process(self,line,node_file,edge_file,qual_file,languages,doc_id):
                                         precision = val.get('precision', '')
                                         value = '@' + lat + '/' + long
                                     elif typ == 'time':
-                                        mag = "^" + val['time'][1:]
+                                        if val['time'][0]=='-':
+                                            pre="^-"
+                                        else:
+                                            pre="^"
+                                        date = pre + val['time'][1:]
                                         precision = str(val['precision'])
                                         calendar = val.get(
                                             'calendarmodel', '').split('/')[-1]
-                                        value = "^" + \
+                                        value = pre + \
                                             val['time'][1:] + '/' + str(val['precision'])
                                     elif typ == 'monolingualtext':
                                         value = '\'' + \
@@ -286,6 +293,8 @@ def process(self,line,node_file,edge_file,qual_file,languages,doc_id):
                                                      rank,
                                                      mag,
                                                      unit,
+                                                     date,
+                                                     item,
                                                      lower,
                                                      upper,
                                                      lat,
@@ -304,6 +313,8 @@ def process(self,line,node_file,edge_file,qual_file,languages,doc_id):
                                                         value = ''
                                                         mag = ''
                                                         unit = ''
+                                                        date= ''
+                                                        item=''
                                                         lower = ''
                                                         upper = ''
                                                         precision = ''
@@ -323,6 +334,7 @@ def process(self,line,node_file,edge_file,qual_file,languages,doc_id):
                                                                 'entity-type')
                                                             value = val.get(
                                                                 'id', '')
+                                                            item=value
                                                         elif typ == 'quantity':
                                                             value = val['amount']
                                                             mag = val['amount']
@@ -351,13 +363,17 @@ def process(self,line,node_file,edge_file,qual_file,languages,doc_id):
                                                                 'precision', '')
                                                             value = '@' + lat + '/' + long
                                                         elif typ == 'time':
-                                                            mag = "^" + \
+                                                            if val['time'][0]=='-':
+                                                                pre="^-"
+                                                            else:
+                                                                pre="^"
+                                                            date = pre + \
                                                                 val['time'][1:]
                                                             precision = str(
                                                                 val['precision'])
                                                             calendar = val.get(
                                                                 'calendarmodel', '').split('/')[-1]
-                                                            value = "^" + \
+                                                            value = pre + \
                                                                 val['time'][1:] + '/' + str(val['precision'])
                                                         elif typ == 'monolingualtext':
                                                             value = '\'' + \
@@ -372,6 +388,8 @@ def process(self,line,node_file,edge_file,qual_file,languages,doc_id):
                                                                 value,
                                                                 mag,
                                                                 unit,
+                                                                date,
+                                                                item,
                                                                 lower,
                                                                 upper,
                                                                 lat,
@@ -390,11 +408,11 @@ def process(self,line,node_file,edge_file,qual_file,languages,doc_id):
                                     sitelang=link.split('wiki')[0].replace('_','-')
                                     sitelink='http://'+sitelang+'.wikipedia.org/wiki/'+sitetitle
                                     if edge_file:
-                                        erows.append([sid, qnode, 'wikipedia_sitelink', sitelink,'','','','','',
+                                        erows.append([sid, qnode, 'wikipedia_sitelink', sitelink,'','','','','','','',
                                                       '','','','',''])
                                     if qual_file:
                                         tempid=sid+'-language-1'
-                                        qrows.append([tempid,sid,'language',sitelang,'','','','','','','','',''])
+                                        qrows.append([tempid,sid,'language',sitelang,'','','','','','','','','','',''])
 
             if node_file:
                 with open(node_file+'_{}'.format(self._idx), write_mode, newline='') as myfile:
@@ -442,7 +460,7 @@ def process(self,line,node_file,edge_file,qual_file,languages,doc_id):
                     escapechar="\n",
                     quotechar='')
                 wr.writerow(header)
-        header = ['id','node1','label','node2','rank','node2;magnitude','node2;unit','node2;lower','node2;upper',
+        header = ['id','node1','label','node2','rank','node2;magnitude','node2;unit','node2;date','node2;item','node2;lower','node2;upper',
               'node2;latitude','node2;longitude','node2;precision','node2;calendar','node2;entity-type']
         if edge_file:
             with open(edge_file+'_header', 'w', newline='') as myfile:
diff --git a/kgtk/cli/reachable_nodes.py b/kgtk/cli/reachable_nodes.py
new file mode 100644
index 000000000..bfa071574
--- /dev/null
+++ b/kgtk/cli/reachable_nodes.py
@@ -0,0 +1,109 @@
+"""
+Find reachable nodes given a set of root nodes and properties
+"""
+
+
+def parser():
+    return {
+        'help': 'Find reachable nodes in a graph.'
+    }
+
+
+def add_arguments(parser):
+    """
+    Parse arguments
+    Args:
+            parser (argparse.ArgumentParser)
+    """
+    parser.add_argument(action="store", type=str, dest="filename", metavar='filename', help='input filename here')
+    parser.add_argument('--root',action='store',dest='root',help='File containing the set of root nodes')
+    parser.add_argument('-o', '--out', action='store', type=str, dest='output', help='File to output the reachable nodes,if empty will be written out to standard output',default=None)
+    parser.add_argument("--noheader", action="store_true", dest="header_bool", help="Option to specify that file does not have a header")
+    parser.add_argument("--subj", action="store", type=int, dest="sub", help='Column in which the subject is given, default 0', default=0)
+    parser.add_argument("--obj", action="store", type=int, dest="obj", help='Column in which the subject is given, default 2', default=2)
+    parser.add_argument("--pred",action="store" ,type=int, dest="pred",help='Column in which predicate is given, default 1',default=1)
+    parser.add_argument("--props", action="store", type=str, dest="props",help='Properties to consider while finding reachable nodes - comma-separated string, default all properties considered',default=None)
+    parser.add_argument('--undirected', action='store_true', dest="undirected", help="Option to specify graph as undirected?")
+
+
+def run(filename,root,output,header_bool,sub,obj,pred,props,undirected):
+    import sys
+    import csv
+    from graph_tool.search import dfs_iterator
+    from graph_tool import load_graph_from_csv
+    from graph_tool.util import find_edge
+    from kgtk.exceptions import KGTKException
+    from kgtk.cli_argparse import KGTKArgumentParser
+
+    def find_pred_position(sub,pred,obj):
+        if pred < sub and pred < obj:
+            return pred
+        elif (pred > sub and pred < obj) or (pred<sub and pred>obj):
+            return pred-1
+        else:
+            return pred-2
+
+    def get_edges_by_edge_prop(g, p, v):
+        return find_edge(g, prop=g.properties[('e', p)], match=v)
+
+    label='c'+str(find_pred_position(sub,pred,obj))
+    header=['node1','label','node2']
+    root_list=[]
+    property_list=[]
+
+    tsv_file = open(root)
+    read_tsv = csv.reader(tsv_file, delimiter="\t")
+
+    for row in read_tsv:
+        root_list.append(row[0])
+    tsv_file.close()
+    property_list = [item for item in props.split(',')]
+    G = load_graph_from_csv(filename,not(undirected),skip_first=not(header_bool),hashed=True,csv_options={'delimiter': '\t'},ecols=(sub,obj))
+
+    name = G.vp["name"]
+
+    index_list = []
+    for v in G.vertices():
+        if name[v] in root_list:
+            index_list.append(v)
+
+    edge_filter_set = set()
+    for prop in property_list:
+        edge_filter_set.update(get_edges_by_edge_prop(G, label,prop));
+    e_prop= G.new_edge_property("bool")
+
+    v_prop= G.new_vertex_property("bool")
+    for e in G.edges():
+        if e in edge_filter_set:
+            e_prop[e] = True
+            v_prop[e.source()] = True
+            v_prop[e.target()] = True
+        else:
+            e_prop[e] = False
+            if(v_prop[e.source()] is None):
+                v_prop[e.source()] = False
+            if(v_prop[e.target()] is None):
+                v_prop[e.target()] = False
+    G.set_edge_filter(e_prop)
+    G.set_vertex_filter(v_prop)
+
+
+    if output:
+        f=open(output,'w')
+        tsv_writer = csv.writer(f, quoting=csv.QUOTE_NONE,delimiter="\t",escapechar="\n",quotechar='')
+        if index_list == []:
+            print("No root nodes found in the graph")
+        else:
+            tsv_writer.writerow(header)
+            for index in index_list:
+                for e in dfs_iterator(G, G.vertex(index)):
+                    tsv_writer.writerow([name[index], 'reachable', name[e.target()]])
+        f.close()
+    else:
+        if index_list == []:
+            print("No root nodes found in the graph")
+        else:
+            sys.stdout.write('%s\t%s\t%s\n' % ('node1', 'label', 'node2'))
+            for index in index_list:
+                for e in dfs_iterator(G, G.vertex(index)):
+                    sys.stdout.write('%s\t%s\t%s\n' % (name[index], 'reachable', name[e.target()]))
\ No newline at end of file

From 5d42b921d6e1685a88c0fe1e5206a52c27be6c4b Mon Sep 17 00:00:00 2001
From: Naren <naren954@gmail.com>
Date: Mon, 11 May 2020 03:31:35 -0700
Subject: [PATCH 134/278] improved performance of reachable script

---
 kgtk/cli/reachable_nodes.py | 58 ++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/kgtk/cli/reachable_nodes.py b/kgtk/cli/reachable_nodes.py
index bfa071574..fa988ae5b 100644
--- a/kgtk/cli/reachable_nodes.py
+++ b/kgtk/cli/reachable_nodes.py
@@ -16,25 +16,31 @@ def add_arguments(parser):
             parser (argparse.ArgumentParser)
     """
     parser.add_argument(action="store", type=str, dest="filename", metavar='filename', help='input filename here')
-    parser.add_argument('--root',action='store',dest='root',help='File containing the set of root nodes')
+    parser.add_argument('--root',action='store',dest='root',help='Set of root nodes to use, comma-separated string',default=None)
+    parser.add_argument('--rootfile',action='store',dest='rootfile',help='Option to specify a file containing the set of root nodes',default=None)
+    parser.add_argument('--rootfilecolumn',action='store',type=int,dest='rootfilecolumn',help='Option to specify column of roots file to use, default 0',default=0)
+    parser.add_argument('--norootheader',action='store_true',dest='root_header_bool',help='Option to specify that root file has no header')
     parser.add_argument('-o', '--out', action='store', type=str, dest='output', help='File to output the reachable nodes,if empty will be written out to standard output',default=None)
     parser.add_argument("--noheader", action="store_true", dest="header_bool", help="Option to specify that file does not have a header")
     parser.add_argument("--subj", action="store", type=int, dest="sub", help='Column in which the subject is given, default 0', default=0)
     parser.add_argument("--obj", action="store", type=int, dest="obj", help='Column in which the subject is given, default 2', default=2)
     parser.add_argument("--pred",action="store" ,type=int, dest="pred",help='Column in which predicate is given, default 1',default=1)
-    parser.add_argument("--props", action="store", type=str, dest="props",help='Properties to consider while finding reachable nodes - comma-separated string, default all properties considered',default=None)
+    parser.add_argument("--props", action="store", type=str, dest="props",help='Properties to consider while finding reachable nodes - comma-separated string',default=None)
     parser.add_argument('--undirected', action='store_true', dest="undirected", help="Option to specify graph as undirected?")
 
 
-def run(filename,root,output,header_bool,sub,obj,pred,props,undirected):
+def run(filename,root,rootfile,rootfilecolumn,root_header_bool,output,header_bool,sub,obj,pred,props,undirected):
     import sys
     import csv
+    import time
     from graph_tool.search import dfs_iterator
     from graph_tool import load_graph_from_csv
     from graph_tool.util import find_edge
     from kgtk.exceptions import KGTKException
     from kgtk.cli_argparse import KGTKArgumentParser
 
+
+    #Graph-tool names columns that are not subject or object c0, c1... This function finds the number that graph tool assigned to the predicate column
     def find_pred_position(sub,pred,obj):
         if pred < sub and pred < obj:
             return pred
@@ -46,17 +52,26 @@ def find_pred_position(sub,pred,obj):
     def get_edges_by_edge_prop(g, p, v):
         return find_edge(g, prop=g.properties[('e', p)], match=v)
 
+
     label='c'+str(find_pred_position(sub,pred,obj))
     header=['node1','label','node2']
+    root_set=set()
     root_list=[]
     property_list=[]
-
-    tsv_file = open(root)
-    read_tsv = csv.reader(tsv_file, delimiter="\t")
-
-    for row in read_tsv:
-        root_list.append(row[0])
-    tsv_file.close()
+    if (rootfile):
+        tsv_file = open(rootfile)
+        read_tsv = csv.reader(tsv_file, delimiter="\t")
+        first_row=True
+        for row in read_tsv:
+            if first_row and not root_header_bool:
+                    first_row=False
+                    continue
+            root_set.add(row[rootfilecolumn])
+        tsv_file.close()
+    if (root):
+        for r in root.split(','):
+            root_set.add(r)
+    root_list=list(root_set)
     property_list = [item for item in props.split(',')]
     G = load_graph_from_csv(filename,not(undirected),skip_first=not(header_bool),hashed=True,csv_options={'delimiter': '\t'},ecols=(sub,obj))
 
@@ -70,24 +85,9 @@ def get_edges_by_edge_prop(g, p, v):
     edge_filter_set = set()
     for prop in property_list:
         edge_filter_set.update(get_edges_by_edge_prop(G, label,prop));
-    e_prop= G.new_edge_property("bool")
-
-    v_prop= G.new_vertex_property("bool")
-    for e in G.edges():
-        if e in edge_filter_set:
-            e_prop[e] = True
-            v_prop[e.source()] = True
-            v_prop[e.target()] = True
-        else:
-            e_prop[e] = False
-            if(v_prop[e.source()] is None):
-                v_prop[e.source()] = False
-            if(v_prop[e.target()] is None):
-                v_prop[e.target()] = False
-    G.set_edge_filter(e_prop)
-    G.set_vertex_filter(v_prop)
-
-
+        
+    G.clear_edges()
+    G.add_edge_list(list(edge_filter_set))
     if output:
         f=open(output,'w')
         tsv_writer = csv.writer(f, quoting=csv.QUOTE_NONE,delimiter="\t",escapechar="\n",quotechar='')
@@ -106,4 +106,4 @@ def get_edges_by_edge_prop(g, p, v):
             sys.stdout.write('%s\t%s\t%s\n' % ('node1', 'label', 'node2'))
             for index in index_list:
                 for e in dfs_iterator(G, G.vertex(index)):
-                    sys.stdout.write('%s\t%s\t%s\n' % (name[index], 'reachable', name[e.target()]))
\ No newline at end of file
+                    sys.stdout.write('%s\t%s\t%s\n' % (name[index], 'reachable', name[e.target()]))

From 97bf304db7a2920deb010fcaf63aca90a250be95 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 10:29:32 -0700
Subject: [PATCH 135/278] Conversion to KgtkReaderOptions (incomplete).

---
 kgtk/cli/validate.py  | 141 +--------
 kgtk/io/edgereader.py | 114 ++------
 kgtk/io/kgtkreader.py | 652 ++++++++++++++++++------------------------
 kgtk/io/nodereader.py | 111 ++-----
 4 files changed, 349 insertions(+), 669 deletions(-)

diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index ccf442bc0..2b7c1a5dc 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -16,7 +16,7 @@
 import typing
 
 from kgtk.kgtkformat import KgtkFormat
-from kgtk.io.kgtkreader import KgtkReader
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.utils.enumnameaction import EnumNameAction
 from kgtk.utils.validationaction import ValidationAction
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions
@@ -35,124 +35,18 @@ def add_arguments(parser):
     """
     parser.add_argument(      "kgtk_files", nargs="*", help="The KGTK file(s) to validate. May be omitted or '-' for stdin.", type=Path)
 
-    parser.add_argument(      "--blank-id-line-action", dest="blank_id_line_action",
-                              help="The action to take when a blank id field is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=None)
-
-    parser.add_argument(      "--blank-node1-line-action", dest="blank_node1_line_action",
-                              help="The action to take when a blank node1 field is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=None)
-
-    parser.add_argument(      "--blank-node2-line-action", dest="blank_node2_line_action",
-                              help="The action to take when a blank node2 field is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=None)
-
-    parser.add_argument(      "--blank-required-field-line-action", dest="blank_line_action",
-                              help="The action to take when a line with a blank node1, node2, or id field (per mode) is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.COMPLAIN)
-
-    parser.add_argument(      "--column-separator", dest="column_separator",
-                              help="Column separator.", type=str, default=KgtkReader.COLUMN_SEPARATOR)
-
-    parser.add_argument(      "--comment-line-action", dest="comment_line_action",
-                              help="The action to take when a comment line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.COMPLAIN)
-
-    parser.add_argument(      "--compression-type", dest="compression_type",
-                              help="Specify the input file compression type, otherwise use the extension.")
-    
-    parser.add_argument(      "--empty-line-action", dest="empty_line_action",
-                              help="The action to take when an empty line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.COMPLAIN)
-
-    errors_to = parser.add_mutually_exclusive_group()
-    errors_to.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
-                              help="Send errors to stdout instead of stderr (default)", action="store_true")
-    errors_to.add_argument(      "--errors-to-stderr", dest="errors_to_stderr",
-                              help="Send errors to stderr instead of stdout", action="store_true")
-
-    parser.add_argument(      "--error-limit", dest="error_limit",
-                              help="The maximum number of errors to report before failing", type=int, default=KgtkReader.ERROR_LIMIT_DEFAULT)
-
-    parser.add_argument(      "--fill-short-lines", dest="fill_short_lines",
-                              help="Fill missing trailing columns in short lines with empty values.", action='store_true')
-
-    parser.add_argument(      "--force-column-names", dest="force_column_names", help="Force the column names.", nargs='+')
-
-    parser.add_argument(      "--gzip-in-parallel", dest="gzip_in_parallel", help="Execute gzip in parallel.", action='store_true')
-
-    parser.add_argument(      "--gzip-queue-size", dest="gzip_queue_size",
-                              help="Queue size for parallel gzip.", type=int, default=KgtkReader.GZIP_QUEUE_SIZE_DEFAULT)
-
-    parser.add_argument(      "--header-error-action", dest="header_error_action",
-                              help="The action to take when a header error is detected  Only ERROR or EXIT are supported.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXIT)
 
     parser.add_argument(      "--header-only", dest="header_only",
                               help="Process the only the header of the input file.", action="store_true")
 
-    parser.add_argument(      "--invalid-value-action", dest="invalid_value_action",
-                              help="The action to take when a data cell value is invalid.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
-
-    parser.add_argument(      "--long-line-action", dest="long_line_action",
-                              help="The action to take when a long line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.COMPLAIN)
-
-    parser.add_argument(      "--mode", dest="mode",
-                              help="Determine the KGTK input file mode.", type=KgtkReader.Mode, action=EnumNameAction, default=KgtkReader.Mode.AUTO)
-
-    parser.add_argument(      "--short-line-action", dest="short_line_action",
-                              help="The action to take whe a short line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.COMPLAIN)
-
-    parser.add_argument(      "--skip-first-record", dest="skip_first_record", help="Skip the first record when forcing column names.", action='store_true')
-
-    parser.add_argument(      "--truncate-long-lines", dest="truncate_long_lines",
-                              help="Remove excess trailing columns in long lines.", action='store_true')
-
-    parser.add_argument(      "--unsafe-column-name-action", dest="unsafe_column_name_action",
-                              help="The action to take when a column name is unsafe.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
-
-    parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
-    
-    parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
-    
-    parser.add_argument(      "--whitespace-line-action", dest="whitespace_line_action",
-                              help="The action to take when a whitespace line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-    # Note: Any arguments described by KgtkValueOptions.add_arguments(...)
-    # need to be included in the arguments to run(...), below.
+    KgtkReader.add_debug_arguments(parser)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True)
     KgtkValueOptions.add_arguments(parser)
 
 
 def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
-        force_column_names: typing.Optional[typing.List[str]] = None,
-        skip_first_record: bool = False,
-        fill_short_lines: bool = False,
-        truncate_long_lines: bool = False,
         errors_to_stdout: bool = False,
         errors_to_stderr: bool = False,
-        error_limit: int = KgtkReader.ERROR_LIMIT_DEFAULT,
-        empty_line_action: ValidationAction = ValidationAction.COMPLAIN,
-        comment_line_action: ValidationAction = ValidationAction.COMPLAIN,
-        whitespace_line_action: ValidationAction = ValidationAction.COMPLAIN,
-        blank_line_action: ValidationAction = ValidationAction.COMPLAIN,
-        blank_node1_line_action: typing.Optional[ValidationAction] = None,
-        blank_node2_line_action: typing.Optional[ValidationAction] = None,
-        blank_id_line_action: typing.Optional[ValidationAction] = None,
-        short_line_action: ValidationAction = ValidationAction.COMPLAIN,
-        long_line_action: ValidationAction = ValidationAction.COMPLAIN,
-        invalid_value_action: ValidationAction = ValidationAction.REPORT,
-        header_error_action: ValidationAction = ValidationAction.EXIT,
-        unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
-        compression_type: typing.Optional[str] = None,
-        gzip_in_parallel: bool = False,
-        gzip_queue_size: int = KgtkReader.GZIP_QUEUE_SIZE_DEFAULT,
-        column_separator: str = KgtkFormat.COLUMN_SEPARATOR,
-        mode: KgtkReader.Mode = KgtkReader.Mode.AUTO,
         header_only: bool = False,
         verbose: bool = False,
         very_verbose: bool = False,
@@ -167,7 +61,8 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
     # Select where to send error messages, defaulting to stderr.
     error_file: typing.TextIO = sys.stderr if errors_to_stderr else sys.stdout
 
-    # Build the value parsing option structure.
+    # Build the option structures.
+    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs)
     value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
 
     try:
@@ -181,31 +76,11 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
                     print ("Validating from stdin", file=error_file, flush=True)
 
                 kr: KgtkReader = KgtkReader.open(kgtk_file,
-                                                 force_column_names=force_column_names,
-                                                 skip_first_record=skip_first_record,
-                                                 fill_short_lines=fill_short_lines,
-                                                 truncate_long_lines=truncate_long_lines,
                                                  error_file=error_file,
-                                                 error_limit=error_limit,
-                                                 empty_line_action=empty_line_action,
-                                                 comment_line_action=comment_line_action,
-                                                 whitespace_line_action=whitespace_line_action,
-                                                 blank_line_action=blank_line_action,
-                                                 blank_node1_line_action=blank_node1_line_action,
-                                                 blank_node2_line_action=blank_node2_line_action,
-                                                 blank_id_line_action=blank_id_line_action,
-                                                 short_line_action=short_line_action,
-                                                 long_line_action=long_line_action,
-                                                 invalid_value_action=invalid_value_action,
-                                                 header_error_action=header_error_action,
-                                                 unsafe_column_name_action=unsafe_column_name_action,
-                                                 compression_type=compression_type,
+                                                 options=reader_options,
                                                  value_options=value_options,
-                                                 gzip_in_parallel=gzip_in_parallel,
-                                                 gzip_queue_size=gzip_queue_size,
-                                                 column_separator=column_separator,
-                                                 mode=mode,
-                                                 verbose=verbose, very_verbose=very_verbose)
+                                                 verbose=verbose,
+                                                 very_verbose=very_verbose)
         
                 if header_only:
                     kr.close()
diff --git a/kgtk/io/edgereader.py b/kgtk/io/edgereader.py
index 1f16b3961..837085269 100644
--- a/kgtk/io/edgereader.py
+++ b/kgtk/io/edgereader.py
@@ -10,7 +10,7 @@
 import sys
 import typing
 
-from kgtk.io.kgtkreader import KgtkReader
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.utils.closableiter import ClosableIter
 from kgtk.utils.enumnameaction import EnumNameAction
 from kgtk.utils.validationaction import ValidationAction
@@ -22,57 +22,32 @@ class EdgeReader(KgtkReader):
     @classmethod
     def open_edge_file(cls,
                        file_path: typing.Optional[Path],
-                       force_column_names: typing.Optional[typing.List[str]] = None, #
-                       skip_first_record: bool = False,
-                       fill_short_lines: bool = False,
-                       truncate_long_lines: bool = False,
                        error_file: typing.TextIO = sys.stderr,
-                       error_limit: int = KgtkReader.ERROR_LIMIT_DEFAULT,
-                       empty_line_action: ValidationAction = ValidationAction.EXCLUDE,
-                       comment_line_action: ValidationAction = ValidationAction.EXCLUDE,
-                       whitespace_line_action: ValidationAction = ValidationAction.EXCLUDE,
-                       blank_node1_line_action: ValidationAction = ValidationAction.EXCLUDE,
-                       blank_node2_line_action: ValidationAction = ValidationAction.EXCLUDE,
-                       short_line_action: ValidationAction = ValidationAction.EXCLUDE,
-                       long_line_action: ValidationAction = ValidationAction.EXCLUDE,
-                       invalid_value_action: ValidationAction = ValidationAction.REPORT,
-                       header_error_action: ValidationAction = ValidationAction.EXIT,
-                       unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
+                       options: typing.Optional[KgtkReaderOptions] = None,
                        value_options: typing.Optional[KgtkValueOptions] = None,
-                       compression_type: typing.Optional[str] = None,
-                       gzip_in_parallel: bool = False,
-                       gzip_queue_size: int = KgtkReader.GZIP_QUEUE_SIZE_DEFAULT,
-                       column_separator: str = KgtkReader.COLUMN_SEPARATOR,
                        verbose: bool = False,
                        very_verbose: bool = False)->"EdgeReader":
 
-        source: ClosableIter[str] = cls._openfile(file_path,
-                                                  compression_type=compression_type,
-                                                  gzip_in_parallel=gzip_in_parallel,
-                                                  gzip_queue_size=gzip_queue_size,
-                                                  error_file=error_file,
-                                                  verbose=verbose)
+        # Supply the default reader and value options:
+        (options, value_options) = cls._default_options(options, value_options)
+
+        source: ClosableIter[str] = cls._openfile(file_path, options=options, error_file=error_file, verbose=verbose)
 
         # Read the edge file header and split it into column names.
         header: str
         column_names: typing.List[str]
-        (header, column_names) = cls._build_column_names(source,
-                                                         force_column_names=force_column_names,
-                                                         skip_first_record=skip_first_record,
-                                                         column_separator=column_separator,
-                                                         error_file=error_file,
-                                                         verbose=verbose)
+        (header, column_names) = cls._build_column_names(source, options=options, error_file=error_file, verbose=verbose)
 
         # Check for unsafe column names.
         cls.check_column_names(column_names,
                                header_line=header,
-                               error_action=unsafe_column_name_action,
+                               error_action=options.unsafe_column_name_action,
                                error_file=error_file)
 
         # Build a map from column name to column index.
         column_name_map: typing.Mapping[str, int] = cls.build_column_name_map(column_names,
                                                                               header_line=header,
-                                                                              error_action=header_error_action,
+                                                                              error_action=options.header_error_action,
                                                                               error_file=error_file)
         # Get the indices of the required columns.
         node1_column_idx: int
@@ -80,7 +55,7 @@ def open_edge_file(cls,
         label_column_idx: int
         (node1_column_idx, node2_column_idx, label_column_idx) = cls.required_edge_columns(column_name_map,
                                                                                            header_line=header,
-                                                                                           error_action=header_error_action,
+                                                                                           error_action=options.header_error_action,
                                                                                            error_file=error_file)
 
         if verbose:
@@ -89,53 +64,35 @@ def open_edge_file(cls,
 
         return cls(file_path=file_path,
                    source=source,
-                   column_separator=column_separator,
                    column_names=column_names,
                    column_name_map=column_name_map,
                    column_count=len(column_names),
                    node1_column_idx=node1_column_idx,
                    node2_column_idx=node2_column_idx,
                    label_column_idx=label_column_idx,
-                   force_column_names=force_column_names,
-                   skip_first_record=skip_first_record,
-                   fill_short_lines=fill_short_lines,
-                   truncate_long_lines=truncate_long_lines,
                    error_file=error_file,
-                   error_limit=error_limit,
-                   empty_line_action=empty_line_action,
-                   comment_line_action=comment_line_action,
-                   whitespace_line_action=whitespace_line_action,
-                   blank_node1_line_action=blank_node1_line_action,
-                   blank_node2_line_action=blank_node2_line_action,
-                   short_line_action=short_line_action,
-                   long_line_action=long_line_action,
-                   invalid_value_action=invalid_value_action,
-                   header_error_action=header_error_action,
-                   unsafe_column_name_action=unsafe_column_name_action,
+                   options=options,
                    value_options=value_options,
-                   compression_type=compression_type,
-                   gzip_in_parallel=gzip_in_parallel,
-                   gzip_queue_size=gzip_queue_size,
                    is_edge_file=True,
                    is_node_file=False,
                    verbose=verbose,
                    very_verbose=very_verbose,
         )
 
-    def _ignore_if_blank_fields(self, values: typing.List[str], line: str)->bool:
+    def _ignore_if_blank_required_fields(self, values: typing.List[str], line: str)->bool:
         # Ignore line_action with blank node1 fields.  This code comes after
         # filling missing trailing columns, although it could be reworked
         # to come first.
-        if self.blank_node1_line_action != ValidationAction.PASS and self.node1_column_idx >= 0 and len(values) > self.node1_column_idx:
+        if self.options.blank_required_field_line_action != ValidationAction.PASS and self.node1_column_idx >= 0 and len(values) > self.node1_column_idx:
             node1_value: str = values[self.node1_column_idx]
             if len(node1_value) == 0 or node1_value.isspace():
-                return self.exclude_line(self.blank_node1_line_action, "node1 is blank", line)
+                return self.exclude_line(self.options.blank_required_field_line_action, "node1 is blank", line)
 
         # Ignore lines with blank node2 fields:
-        if self.blank_node2_line_action != ValidationAction.PASS and self.node2_column_idx >= 0 and len(values) > self.node2_column_idx:
+        if self.options.blank_required_field_line_action != ValidationAction.PASS and self.node2_column_idx >= 0 and len(values) > self.node2_column_idx:
             node2_value: str = values[self.node2_column_idx]
             if len(node2_value) == 0 or node2_value.isspace():
-                return self.exclude_line(self.blank_node2_line_action, "node2 is blank", line)
+                return self.exclude_line(self.options.blank_required_field_line_action, "node2 is blank", line)
         return False # Do not ignore this line
 
     def _skip_reserved_fields(self, column_name)->bool:
@@ -152,40 +109,25 @@ def main():
     Test the KGTK edge file reader.
     """
     parser = ArgumentParser()
-    KgtkReader.add_operation_arguments(parser)
-    KgtkReader.add_file_arguments(parser, edge_options=True)
+    parser.add_argument(dest="kgtk_file", help="The KGTK edge file to read", type=Path, nargs="?")
+    KgtkReader.add_debug_arguments(parser)
+    KgtkReaderOptions.add_arguments(parser)
     KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
 
     error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
 
-    # Build the value parsing option structure.
+    # Build the option structures.
+    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, mode=KgtkReaderMode.EDGE)
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
-    er: EdgeReader = EdgeReader.open(args.kgtk_file,
-                                     force_column_names=args.force_column_names,
-                                     skip_first_record=args.skip_first_record,
-                                     fill_short_lines=args.fill_short_lines,
-                                     truncate_long_lines=args.truncate_long_lines,
-                                     error_file=error_file,
-                                     error_limit=args.error_limit,
-                                     empty_line_action=args.empty_line_action,
-                                     comment_line_action=args.comment_line_action,
-                                     whitespace_line_action=args.whitespace_line_action,
-                                     blank_node1_line_action=args.blank_node1_line_action,
-                                     blank_node2_line_action=args.blank_node2_line_action,
-                                     short_line_action=args.short_line_action,
-                                     long_line_action=args.long_line_action,
-                                     invalid_value_action=args.invalid_value_action,
-                                     header_error_action=args.header_error_action,
-                                     unsafe_column_name_action=args.unsafe_column_name_action,
-                                     value_options=value_options,
-                                     compression_type=args.compression_type,
-                                     gzip_in_parallel=args.gzip_in_parallel,
-                                     gzip_queue_size=args.gzip_queue_size,
-                                     column_separator=args.column_separator,
-                                     mode=KgtkReader.Mode.EDGE,
-                                     verbose=args.verbose, very_verbose=args.very_verbose)
+    # Force the edge mode:
+    er: EdgeReader = EdgeReader.open_edge_file(args.kgtk_file,
+                                               error_file=error_file,
+                                               options=reader_options,
+                                               value_options=value_options,
+                                               column_separator=args.column_separator,
+                                               verbose=args.verbose, very_verbose=args.very_verbose)
 
     line_count: int = 0
     row: typing.List[str]
diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index ee6c91571..7a358d53b 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -16,7 +16,7 @@
 
 """
 
-from argparse import ArgumentParser, _ArgumentGroup
+from argparse import ArgumentParser, _ArgumentGroup, Namespace
 import attr
 import bz2
 from enum import Enum
@@ -37,25 +37,21 @@
 from kgtk.value.kgtkvalue import KgtkValue
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions, DEFAULT_KGTK_VALUE_OPTIONS
 
-@attr.s(slots=True, frozen=False)
-class KgtkReader(KgtkBase, ClosableIter[typing.List[str]]):
+class KgtkReaderMode(Enum):
+    """
+    There are four file reading modes:
+    """
+    NONE = 0 # Enforce neither edge nore node file required columns
+    EDGE = 1 # Enforce edge file required columns
+    NODE = 2 # Enforce node file require columns
+    AUTO = 3 # Automatically decide whether to enforce edge or node file required columns
+
+@attr.s(slots=True, frozen=True)
+class KgtkReaderOptions():
     ERROR_LIMIT_DEFAULT: int = 1000
     GZIP_QUEUE_SIZE_DEFAULT: int = GunzipProcess.GZIP_QUEUE_SIZE_DEFAULT
 
-    file_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
-    source: ClosableIter[str] = attr.ib() # Todo: validate
-    column_names: typing.List[str] = attr.ib(validator=attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
-                                                                                     iterable_validator=attr.validators.instance_of(list)))
-    column_name_map: typing.Mapping[str, int] = attr.ib(validator=attr.validators.deep_mapping(key_validator=attr.validators.instance_of(str),
-                                                                                               value_validator=attr.validators.instance_of(int)))
-
-    # For convenience, the count of columns. This is the same as len(column_names).
-    column_count: int = attr.ib(validator=attr.validators.instance_of(int))
-
-    data_lines_read: int = attr.ib(validator=attr.validators.instance_of(int), default=0)
-    data_lines_passed: int = attr.ib(validator=attr.validators.instance_of(int), default=0)
-    data_lines_ignored: int = attr.ib(validator=attr.validators.instance_of(int), default=0)
-    data_errors_reported: int = attr.ib(validator=attr.validators.instance_of(int), default=0)
+    mode: KgtkReaderMode = attr.ib(validator=attr.validators.instance_of(KgtkReaderMode), default=KgtkReaderMode.AUTO)
 
     # The column separator is normally tab.
     column_separator: str = attr.ib(validator=attr.validators.instance_of(str), default=KgtkFormat.COLUMN_SEPARATOR)
@@ -66,25 +62,16 @@ class KgtkReader(KgtkBase, ClosableIter[typing.List[str]]):
                                                                     default=None)
     skip_first_record: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
-    # The index of the mandatory columns.  -1 means missing:
-    node1_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1) # edge file
-    node2_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1) # edge file
-    label_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1) # edge file
-    id_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1) # node file
-
     # How do we handle errors?
-    error_file: typing.TextIO = attr.ib(default=sys.stderr)
-    error_limit: int = attr.ib(validator=attr.validators.instance_of(int), default=ERROR_LIMIT_DEFAULT) # >0 ==> limit error reports
+    error_limit: int = attr.ib(validator=attr.validators.instance_of(int), default=KgtkReaderOptions.ERROR_LIMIT_DEFAULT) # >0 ==> limit error reports
 
     # Ignore empty lines, comments, and all whitespace lines, etc.?
     empty_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
     comment_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
     whitespace_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
 
-    # Ignore records with values in certain fields:
-    blank_node1_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.PASS) # EXCLUDE on edge file
-    blank_node2_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.PASS) # EXCLUDE on edge file
-    blank_id_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.PASS) # EXCLUDE on node file
+    # Ignore records with empty values in certain fields:
+    blank_required_field_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
     
     # Ignore records with too many or too few fields?
     short_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
@@ -96,7 +83,6 @@ class KgtkReader(KgtkBase, ClosableIter[typing.List[str]]):
 
     # Validate data cell values?
     invalid_value_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.REPORT)
-    value_options: typing.Optional[KgtkValueOptions] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(KgtkValueOptions)), default=None)
 
     # Repair records with too many or too few fields?
     fill_short_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
@@ -107,92 +93,252 @@ class KgtkReader(KgtkBase, ClosableIter[typing.List[str]]):
     gzip_in_parallel: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     gzip_queue_size: int = attr.ib(validator=attr.validators.instance_of(int), default=GZIP_QUEUE_SIZE_DEFAULT)
 
+    @classmethod
+    def add_arguments(cls,
+                      parser: ArgumentParser,
+                      mode_options: bool = False,
+                      who: str = ""):
+        prefix1: str = "--" if len(who) == 0 else "--" + who + "-"
+        prefix2: str = "" if len(who) == 0 else who + "_"
+        prefix3: str = "" if len(who) == 0 else who + ": "
+        prefix4: str = "" if len(who) == 0 else who + " file "
+
+        fgroup: _ArgumentGroup = parser.add_argument_group(prefix3 + "File options",
+                                                           "Options affecting " + prefix4 + "processing")
+        fgroup.add_argument(prefix1 + "column-separator",
+                            dest=prefix2 + "column_separator",
+                            help=prefix3 + "Column separator.", type=str, default=KgtkFormat.COLUMN_SEPARATOR)
+
+        fgroup.add_argument(prefix1 + "compression-type",
+                            dest=prefix2 + "compression_type", help=prefix3 + "Specify the compression type.")
+
+        fgroup.add_argument(prefix1 + "error-limit",
+                            dest=prefix2 + "error_limit",
+                            help=prefix3 + "The maximum number of errors to report before failing", type=int, default=cls.ERROR_LIMIT_DEFAULT)
+
+        fgroup.add_argument(prefix1 + "gzip-in-parallel",
+                            dest=prefix2 + "gzip_in_parallel", help=prefix3 + "Execute gzip in parallel.", action='store_true')
+
+        fgroup.add_argument(prefix1 + "gzip-queue-size",
+                            dest=prefix2 + "gzip_queue_size",
+                            help=prefix3 + "Queue size for parallel gzip.", type=int, default=cls.GZIP_QUEUE_SIZE_DEFAULT)
+
+        if mode_options:
+            fgroup.add_argument(prefix1 + "mode",
+                                dest=prefix2 + "mode",
+                                help=prefix3 + "Determine the KGTK file mode.",
+                                type=KgtkReaderMode, action=EnumNameAction, default=KgtkReaderMode.AUTO)
+            
+        hgroup: _ArgumentGroup = parser.add_argument_group(prefix3 + "Header parsing", "Options affecting " + prefix4 + "header parsing")
+
+        hgroup.add_argument(prefix1 + "force-column-names",
+                            dest=prefix2 + "force_column_names",
+                            help=prefix3 + "Force the column names.", nargs='+')
+
+        hgroup.add_argument(prefix1 + "header-error-action",
+                            dest=prefix2 + "header_error_action",
+                            help=prefix3 + "The action to take when a header error is detected  Only ERROR or EXIT are supported.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXIT)
+
+        hgroup.add_argument(prefix1 + "skip-first-record",
+                            dest=prefix2 + "skip_first_record",
+                            help=prefix3 + "Skip the first record when forcing column names.", action='store_true')
+
+        hgroup.add_argument(prefix1 + "unsafe-column-name-action",
+                            dest=prefix2 + "unsafe_column_name_action",
+                            help=prefix3 + "The action to take when a column name is unsafe.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
+
+        lgroup: _ArgumentGroup = parser.add_argument_group("Line parsing", "Options affecting " + prefix4 + "data line parsing")
+
+        lgroup.add_argument(prefix1 + "blank-required-field-line-action",
+                            dest=prefix2 + "blank_required_field_line_action",
+                            help=prefix3 + "The action to take when a line with a blank node1, node2, or id field (per mode) is detected.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+                                  
+        lgroup.add_argument(prefix1 + "comment-line-action",
+                            dest=prefix2 + "comment_line_action",
+                            help=prefix3 + "The action to take when a comment line is detected.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+
+        lgroup.add_argument(prefix1 + "empty-line-action",
+                            dest=prefix2 + "empty_line_action",
+                            help=prefix3 + "The action to take when an empty line is detected.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+
+        lgroup.add_argument(prefix1 + "fill-short-lines",
+                            dest=prefix2 + "fill_short_lines",
+                            help=prefix3 + "Fill missing trailing columns in short lines with empty values.", action='store_true')
+
+        lgroup.add_argument(prefix1 + "invalid-value-action",
+                            dest=prefix2 + "invalid_value_action",
+                            help=prefix3 + "The action to take when a data cell value is invalid.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
+
+        lgroup.add_argument(prefix1 + "long-line-action",
+                            dest=prefix2 + "long_line_action",
+                            help=prefix3 + "The action to take when a long line is detected.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+
+        lgroup.add_argument(prefix1 + "short-line-action",
+                            dest=prefix2 + "short_line_action",
+                            help=prefix3 + "The action to take when a short line is detected.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+
+        lgroup.add_argument(prefix1 + "truncate-long-lines",
+                            dest=prefix2 + "truncate_long_lines",
+                            help=prefix3 + "Remove excess trailing columns in long lines.", action='store_true')
+
+        lgroup.add_argument(prefix1 + "whitespace-line-action",
+                            dest=prefix2 + "whitespace_line_action",
+                            help=prefix3 + "The action to take when a whitespace line is detected.",
+                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+    
+    @classmethod
+    # Build the value parsing option structure.
+    def from_dict(cls,
+                  d: dict,
+                  who: str = "",
+                  mode: typing.Optional[KgtkReaderMode] = None,
+    )->'KgtkReaderOptions':
+        prefix: str = ""   # The destination name prefix.
+        if len(who) > 0:
+            prefix = who + "_"
+
+        reader_mode: KgtkReaderMode
+        if mode is None:
+            reader_mode = d.get(prefix + "mode", KgtkReaderMode.AUTO)
+        else:
+            reader_mode = mode
+
+        return cls(
+            blank_required_field_line_action=d.get(prefix + "blank_required_field_line_action", ValidationAction.EXCLUDE),
+            column_separator=d.get(prefix + "column_separator", KgtkFormat.COLUMN_SEPARATOR),
+            comment_line_action=d.get(prefix + "comment_line_action", ValidationAction.EXCLUDE),
+            compression_type=d.get(prefix + "compression_type", None),
+            empty_line_action=d.get(prefix + "empty_line_action", ValidationAction.EXCLUDE),
+            error_limit=d.get(prefix + "error_limit", cls.ERROR_LIMIT_DEFAULT),
+            fill_short_lines=d.get(prefix + "fill_short_lines", False),
+            force_column_names=d.get(prefix + "force_column_names", None),
+            gzip_in_parallel=d.get(prefix + "gzip_in_parallel", False),
+            gzip_queue_size=d.get(prefix + "gzip_queue_size", KgtkReaderOptions.GZIP_QUEUE_SIZE_DEFAULT),
+            header_error_action=d.get(prefix + "header_error_action", ValidationAction.EXCLUDE),
+            invalid_value_action=d.get(prefix + "invalid_value_action", ValidationAction.REPORT),
+            long_line_action=d.get(prefix + "long_line_action", ValidationAction.EXCLUDE),
+            mode=reader_mode,
+            short_line_action=d.get(prefix + "short_line_action", ValidationAction.EXCLUDE),
+            skip_first_record=d.get(prefix + "skip_first_recordb", False),
+            truncate_long_lines=d.get(prefix + "truncate_long_lines", False),
+            unsafe_column_name_action=d.get(prefix + "unsafe_column_name_action", ValidationAction.REPORT),
+            whitespace_line_action=d.get(prefix + "whitespace_line_action", ValidationAction.EXCLUDE),
+        )
+
+    @classmethod
+    # Build the value parsing option structure.
+    def from_args(cls,
+                  args: Namespace,
+                  who: str = "",
+                  mode: typing.Optional[KgtkReaderMode] = None,
+    )->'KgtkReaderOptions':
+        return cls.from_dict(vars(args), who=who, mode=mode)
+
+DEFAULT_KGTK_READER_OPTIONS: KgtkReaderOptions = KgtkReaderOptions()
+
+
+@attr.s(slots=True, frozen=False)
+class KgtkReader(KgtkBase, ClosableIter[typing.List[str]]):
+    file_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
+    source: ClosableIter[str] = attr.ib() # Todo: validate
+
+    options: KgtkReaderOptions = attr.ib(validator=attr.validators.instance_of(KgtkReaderOptions))
+
+    value_options: KgtkValueOptions = attr.ib(validator=attr.validators.instance_of(KgtkValueOptions))
+
+    column_names: typing.List[str] = attr.ib(validator=attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
+                                                                                     iterable_validator=attr.validators.instance_of(list)))
+    # For convenience, the count of columns. This is the same as len(column_names).
+    column_count: int = attr.ib(validator=attr.validators.instance_of(int))
+    
+    column_name_map: typing.Mapping[str, int] = attr.ib(validator=attr.validators.deep_mapping(key_validator=attr.validators.instance_of(str),
+                                                                                               value_validator=attr.validators.instance_of(int)))
+
+    # The index of the mandatory columns.  -1 means missing:
+    node1_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1) # edge file
+    node2_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1) # edge file
+    label_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1) # edge file
+    id_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1) # node file
+
+    data_lines_read: int = attr.ib(validator=attr.validators.instance_of(int), default=0)
+    data_lines_passed: int = attr.ib(validator=attr.validators.instance_of(int), default=0)
+    data_lines_ignored: int = attr.ib(validator=attr.validators.instance_of(int), default=0)
+    data_errors_reported: int = attr.ib(validator=attr.validators.instance_of(int), default=0)
+
     # Is this an edge file or a node file?
     is_edge_file: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     is_node_file: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
+    # Feedback and error output:
+    error_file: typing.TextIO = attr.ib(default=sys.stderr)
     verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     very_verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
-    class Mode(Enum):
-        """
-        There are four file reading modes:
-        """
-        NONE = 0 # Enforce neither edge nore node file required columns
-        EDGE = 1 # Enforce edge file required columns
-        NODE = 2 # Enforce node file require columns
-        AUTO = 3 # Automatically decide whether to enforce edge or node file required columns
+    @classmethod
+    def _default_options(
+            cls,
+            options: typing.Optional[KgtkReaderOptions] = None,
+            value_options: typing.Optional[KgtkValueOptions] = None,
+    )->typing.Tuple[KgtkReaderOptions, KgtkValueOptions]:
+        # Supply the default reader and value options:
+        if options is None:
+            options = DEFAULT_KGTK_READER_OPTIONS
+        if value_options is None:
+            value_options = DEFAULT_KGTK_VALUE_OPTIONS
+
+        return (options, value_options)
 
     @classmethod
     def open(cls,
              file_path: typing.Optional[Path],
-             force_column_names: typing.Optional[typing.List[str]] = None,
-             skip_first_record: bool = False,
-             fill_short_lines: bool = False,
-             truncate_long_lines: bool = False,
              error_file: typing.TextIO = sys.stderr,
-             error_limit: int = ERROR_LIMIT_DEFAULT,
-             empty_line_action: ValidationAction = ValidationAction.EXCLUDE,
-             comment_line_action: ValidationAction = ValidationAction.EXCLUDE,
-             whitespace_line_action: ValidationAction = ValidationAction.EXCLUDE,
-             blank_line_action: ValidationAction = ValidationAction.EXCLUDE,
-             blank_node1_line_action: typing.Optional[ValidationAction] = None,
-             blank_node2_line_action: typing.Optional[ValidationAction] = None,
-             blank_id_line_action: typing.Optional[ValidationAction] = None,
-             short_line_action: ValidationAction = ValidationAction.EXCLUDE,
-             long_line_action: ValidationAction = ValidationAction.EXCLUDE,
-             invalid_value_action: ValidationAction = ValidationAction.REPORT,
-             header_error_action: ValidationAction = ValidationAction.EXIT,
-             unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
+             options: typing.Optional[KgtkReaderOptions] = None,
              value_options: typing.Optional[KgtkValueOptions] = None,
-             compression_type: typing.Optional[str] = None,
-             gzip_in_parallel: bool = False,
-             gzip_queue_size: int = GZIP_QUEUE_SIZE_DEFAULT,
-             column_separator: str = KgtkFormat.COLUMN_SEPARATOR,
-             mode: Mode = Mode.AUTO,
              verbose: bool = False,
              very_verbose: bool = False)->"KgtkReader":
         """
         Opens a KGTK file, which may be an edge file or a node file.  The appropriate reader is returned.
         """
-        source: ClosableIter[str] = cls._openfile(file_path,
-                                                  compression_type=compression_type,
-                                                  gzip_in_parallel=gzip_in_parallel,
-                                                  gzip_queue_size=gzip_queue_size,
-                                                  error_file=error_file,
-                                                  verbose=verbose)
+
+        # Supply the default reader and value options:
+        (options, value_options) = cls._default_options(options, value_options)
+
+        source: ClosableIter[str] = cls._openfile(file_path, options=options, error_file=error_file, verbose=verbose)
 
         # Read the kgtk file header and split it into column names.  We get the
         # header back, too, for use in debugging and error messages.
         header: str
         column_names: typing.List[str]
-        (header, column_names) = cls._build_column_names(source,
-                                                         force_column_names=force_column_names,
-                                                         skip_first_record=skip_first_record,
-                                                         column_separator=column_separator,
-                                                         error_file=error_file,
-                                                         verbose=verbose)
+        (header, column_names) = cls._build_column_names(source, options, error_file=error_file, verbose=verbose)
         # Check for unsafe column names.
         cls.check_column_names(column_names,
                                header_line=header,
-                               error_action=unsafe_column_name_action,
+                               error_action=options.unsafe_column_name_action,
                                error_file=error_file)
 
         # Build a map from column name to column index.
         column_name_map: typing.Mapping[str, int] = cls.build_column_name_map(column_names,
                                                                               header_line=header,
-                                                                              error_action=header_error_action,
+                                                                              error_action=options.header_error_action,
                                                                               error_file=error_file)
 
         # Should we automatically determine if this is an edge file or a node file?
         is_edge_file: bool = False
         is_node_file: bool = False
-        if mode is KgtkReader.Mode.AUTO:
+        if options.mode is KgtkReaderMode.AUTO:
             # If we have a node1 (or alias) column, then this must be an edge file. Otherwise, assume it is a node file.
             node1_idx: int = cls.get_column_idx(cls.NODE1_COLUMN_NAMES,
                                                 column_name_map,
                                                 header_line=header,
-                                                error_action=header_error_action,
+                                                error_action=options.header_error_action,
                                                 error_file=error_file,
                                                 is_optional=True)
             if node1_idx >= 0:
@@ -206,11 +352,11 @@ def open(cls,
                 if verbose:
                     print("node1 column not found, assuming this is a KGTK node file", file=error_file, flush=True)
 
-        elif mode is KgtkReader.Mode.EDGE:
+        elif options.mode is KgtkReaderMode.EDGE:
             is_edge_file = True
-        elif mode is KgtkReader.Mode.NODE:
+        elif options.mode is KgtkReaderMode.NODE:
             is_node_file = True
-        elif mode is KgtkReader.Mode.NONE:
+        elif options.mode is KgtkReaderMode.NONE:
             pass
 
         if is_edge_file:
@@ -224,50 +370,23 @@ def open(cls,
             label_column_idx: int
             (node1_column_idx, node2_column_idx, label_column_idx) = cls.required_edge_columns(column_name_map,
                                                                                                header_line=header,
-                                                                                               error_action=header_error_action,
+                                                                                               error_action=options.header_error_action,
                                                                                                error_file=error_file)
 
             if verbose:
                 print("KgtkReader: Reading an edge file. node1=%d label=%d node2=%d" % (node1_column_idx, label_column_idx, node2_column_idx), file=error_file, flush=True)
 
-            # Apply the proper defaults to the blank node1, node2, and id actions:
-            if blank_node1_line_action is None:
-                blank_node1_line_action = blank_line_action
-            if blank_node2_line_action is None:
-                blank_node2_line_action = blank_line_action
-            if blank_id_line_action is None:
-                blank_id_line_action = ValidationAction.PASS
-
             return EdgeReader(file_path=file_path,
                               source=source,
-                              column_separator=column_separator,
                               column_names=column_names,
                               column_name_map=column_name_map,
                               column_count=len(column_names),
                               node1_column_idx=node1_column_idx,
                               node2_column_idx=node2_column_idx,
                               label_column_idx=label_column_idx,
-                              force_column_names=force_column_names,
-                              skip_first_record=skip_first_record,
-                              fill_short_lines=fill_short_lines,
-                              truncate_long_lines=truncate_long_lines,
                               error_file=error_file,
-                              error_limit=error_limit,
-                              empty_line_action=empty_line_action,
-                              comment_line_action=comment_line_action,
-                              whitespace_line_action=whitespace_line_action,
-                              blank_node1_line_action=blank_node1_line_action,
-                              blank_node2_line_action=blank_node2_line_action,
-                              blank_id_line_action=blank_id_line_action,
-                              short_line_action=short_line_action,
-                              long_line_action=long_line_action,
-                              invalid_value_action=invalid_value_action,
-                              header_error_action=header_error_action,
-                              unsafe_column_name_action=unsafe_column_name_action,
+                              options=options,
                               value_options=value_options,
-                              compression_type=compression_type,
-                              gzip_in_parallel=gzip_in_parallel,
-                              gzip_queue_size=gzip_queue_size,
                               is_edge_file=is_edge_file,
                               is_node_file=is_node_file,
                               verbose=verbose,
@@ -281,89 +400,35 @@ def open(cls,
             # Get the index of the required column:
             id_column_idx: int = cls.required_node_column(column_name_map,
                                                           header_line=header,
-                                                          error_action=header_error_action,
+                                                          error_action=options.header_error_action,
                                                           error_file=error_file)
 
             if verbose:
                 print("KgtkReader: Reading an node file. id=%d" % (id_column_idx), file=error_file, flush=True)
 
-            # Apply the proper defaults to the blank node1, node2, and id actions:
-            if blank_node1_line_action is None:
-                blank_node1_line_action = ValidationAction.PASS
-            if blank_node2_line_action is None:
-                blank_node2_line_action = ValidationAction.PASS
-            if blank_id_line_action is None:
-                blank_id_line_action = blank_line_action
-
             return NodeReader(file_path=file_path,
                               source=source,
-                              column_separator=column_separator,
                               column_names=column_names,
                               column_name_map=column_name_map,
                               column_count=len(column_names),
                               id_column_idx=id_column_idx,
-                              force_column_names=force_column_names,
-                              skip_first_record=skip_first_record,
-                              fill_short_lines=fill_short_lines,
-                              truncate_long_lines=truncate_long_lines,
                               error_file=error_file,
-                              error_limit=error_limit,
-                              empty_line_action=empty_line_action,
-                              comment_line_action=comment_line_action,
-                              whitespace_line_action=whitespace_line_action,
-                              blank_node1_line_action=blank_node1_line_action,
-                              blank_node2_line_action=blank_node2_line_action,
-                              blank_id_line_action=blank_id_line_action,
-                              short_line_action=short_line_action,
-                              long_line_action=long_line_action,
-                              invalid_value_action=invalid_value_action,
-                              header_error_action=header_error_action,
-                              unsafe_column_name_action=unsafe_column_name_action,
+                              options=options,
                               value_options=value_options,
-                              compression_type=compression_type,
-                              gzip_in_parallel=gzip_in_parallel,
-                              gzip_queue_size=gzip_queue_size,
                               is_edge_file=is_edge_file,
                               is_node_file=is_node_file,
                               verbose=verbose,
                               very_verbose=very_verbose,
             )
         else:
-            # Apply the proper defaults to the blank node1, node2, and id actions:
-            if blank_node1_line_action is None:
-                blank_node1_line_action = ValidationAction.PASS
-            if blank_node2_line_action is None:
-                blank_node2_line_action = ValidationAction.PASS
-            if blank_id_line_action is None:
-                blank_id_line_action = ValidationAction.PASS
-
             return cls(file_path=file_path,
                        source=source,
-                       column_separator=column_separator,
                        column_names=column_names,
                        column_name_map=column_name_map,
                        column_count=len(column_names),
-                       force_column_names=force_column_names,
-                       skip_first_record=skip_first_record,
-                       fill_short_lines=fill_short_lines,
-                       truncate_long_lines=truncate_long_lines,
                        error_file=error_file,
-                       error_limit=error_limit,
-                       empty_line_action=empty_line_action,
-                       comment_line_action=comment_line_action,
-                       whitespace_line_action=whitespace_line_action,
-                       blank_node1_line_action=blank_node1_line_action,
-                       blank_node2_line_action=blank_node2_line_action,
-                       blank_id_line_action=blank_id_line_action,
-                       short_line_action=short_line_action,
-                       long_line_action=long_line_action,
-                       invalid_value_action=invalid_value_action,
-                       header_error_action=header_error_action,
-                       unsafe_column_name_action=unsafe_column_name_action,
+                       options=options,
                        value_options=value_options,
-                       compression_type=compression_type,
-                       gzip_in_parallel=gzip_in_parallel,
-                       gzip_queue_size=gzip_queue_size,
                        is_edge_file=is_edge_file,
                        is_node_file=is_node_file,
                        verbose=verbose,
@@ -404,16 +469,15 @@ def _open_compressed_file(cls,
                 raise ValueError("%s: Unexpected compression_type '%s'" % (who, compression_type))
 
     @classmethod
-    def _openfile(cls, file_path: typing.Optional[Path],
-                  compression_type: typing.Optional[str],
-                  gzip_in_parallel: bool,
-                  gzip_queue_size: int,
+    def _openfile(cls,
+                  file_path: typing.Optional[Path],
+                  options: KgtkReaderOptions, 
                   error_file: typing.TextIO,
                   verbose: bool)->ClosableIter[str]:
         who: str = cls.__name__
         if file_path is None or str(file_path) == "-":
-            if compression_type is not None and len(compression_type) > 0:
-                return ClosableIterTextIOWrapper(cls._open_compressed_file(compression_type, "-", sys.stdin, who, error_file, verbose))
+            if options.compression_type is not None and len(options.compression_type) > 0:
+                return ClosableIterTextIOWrapper(cls._open_compressed_file(options.compression_type, "-", sys.stdin, who, error_file, verbose))
             else:
                 if verbose:
                     print("%s: reading stdin" % who, file=error_file, flush=True)
@@ -423,8 +487,8 @@ def _openfile(cls, file_path: typing.Optional[Path],
             print("%s: File_path.suffix: %s" % (who, file_path.suffix), file=error_file, flush=True)
 
         gzip_file: typing.TextIO
-        if compression_type is not None and len(compression_type) > 0:
-            gzip_file = cls._open_compressed_file(compression_type, str(file_path), file_path, who, error_file, verbose)
+        if options.compression_type is not None and len(options.compression_type) > 0:
+            gzip_file = cls._open_compressed_file(options.compression_type, str(file_path), file_path, who, error_file, verbose)
         elif file_path.suffix in [".bz2", ".gz", ".lz4", ".xz"]:
             gzip_file = cls._open_compressed_file(file_path.suffix, str(file_path), file_path, who, error_file, verbose)
         else:
@@ -432,8 +496,8 @@ def _openfile(cls, file_path: typing.Optional[Path],
                 print("%s: reading file %s" % (who, str(file_path)))
             return ClosableIterTextIOWrapper(open(file_path, "r"))
 
-        if gzip_in_parallel:
-            gzip_thread: GunzipProcess = GunzipProcess(gzip_file, Queue(gzip_queue_size))
+        if options.gzip_in_parallel:
+            gzip_thread: GunzipProcess = GunzipProcess(gzip_file, Queue(options.gzip_queue_size))
             gzip_thread.start()
             return gzip_thread
         else:
@@ -443,9 +507,7 @@ def _openfile(cls, file_path: typing.Optional[Path],
     @classmethod
     def _build_column_names(cls,
                             source: ClosableIter[str],
-                            force_column_names: typing.Optional[typing.List[str]],
-                            skip_first_record: bool,
-                            column_separator: str,
+                            options: KgtkReaderOptions,
                             error_file: typing.TextIO,
                             verbose: bool = False,
     )->typing.Tuple[str, typing.List[str]]:
@@ -453,7 +515,7 @@ def _build_column_names(cls,
         Read the kgtk file header and split it into column names.
         """
         column_names: typing.List[str]
-        if force_column_names is None:
+        if options.force_column_names is None:
             # Read the column names from the first line, stripping end-of-line characters.
             #
             # TODO: if the read fails, throw a more useful exception with the line number.
@@ -465,18 +527,18 @@ def _build_column_names(cls,
                 print("header: %s" % header, file=error_file, flush=True)
 
             # Split the first line into column names.
-            return header, header.split(column_separator)
+            return header, header.split(options.column_separator)
         else:
             # Skip the first record to override the column names in the file.
             # Do not skip the first record if the file does not hae a header record.
-            if skip_first_record:
+            if options.skip_first_record:
                 try:
                     next(source)
                 except StopIteration:
                     raise ValueError("No header line to skip")
 
             # Use the forced column names.
-            return column_separator.join(force_column_names), force_column_names
+            return options.column_separator.join(options.force_column_names), options.force_column_names
 
     def close(self):
         self.source.close()
@@ -503,7 +565,7 @@ def exclude_line(self, action: ValidationAction, msg: str, line: str)->bool:
             
         print("In input data line %d, %s: %s" % (self.data_lines_read, msg, line), file=self.error_file, flush=True)
         self.data_errors_reported += 1
-        if self.error_limit > 0 and self.data_errors_reported >= self.error_limit:
+        if self.options.error_limit > 0 and self.data_errors_reported >= self.options.error_limit:
             raise ValueError("Too many data errors, exiting.")
         return result
 
@@ -534,44 +596,44 @@ def nextrow(self)-> typing.List[str]:
                 print("'%s'" % line, file=self.error_file, flush=True)
 
             # Ignore empty lines.
-            if self.empty_line_action != ValidationAction.PASS and len(line) == 0:
-                if self.exclude_line(self.empty_line_action, "saw an empty line", line):
+            if self.options.empty_line_action != ValidationAction.PASS and len(line) == 0:
+                if self.exclude_line(self.options.empty_line_action, "saw an empty line", line):
                     continue
 
             # Ignore comment lines:
-            if self.comment_line_action != ValidationAction.PASS  and line[0] == self.COMMENT_INDICATOR:
-                if self.exclude_line(self.comment_line_action, "saw a comment line", line):
+            if self.options.comment_line_action != ValidationAction.PASS  and line[0] == self.COMMENT_INDICATOR:
+                if self.exclude_line(self.options.comment_line_action, "saw a comment line", line):
                     continue
 
             # Ignore whitespace lines
-            if self.whitespace_line_action != ValidationAction.PASS and line.isspace():
-                if self.exclude_line(self.whitespace_line_action, "saw a whitespace line", line):
+            if self.options.whitespace_line_action != ValidationAction.PASS and line.isspace():
+                if self.exclude_line(self.options.whitespace_line_action, "saw a whitespace line", line):
                     continue
 
-            row = line.split(self.column_separator)
+            row = line.split(self.options.column_separator)
 
             # Optionally fill missing trailing columns with empty row:
-            if self.fill_short_lines and len(row) < self.column_count:
+            if self.options.fill_short_lines and len(row) < self.column_count:
                 while len(row) < self.column_count:
                     row.append("")
                     
             # Optionally remove extra trailing columns:
-            if self.truncate_long_lines and len(row) > self.column_count:
+            if self.options.truncate_long_lines and len(row) > self.column_count:
                 row = row[:self.column_count]
 
             # Optionally validate that the line contained the right number of columns:
             #
             # When we report line numbers in error messages, line 1 is the first line after the header line.
-            if self.short_line_action != ValidationAction.PASS and len(row) < self.column_count:
-                if self.exclude_line(self.short_line_action,
+            if self.options.short_line_action != ValidationAction.PASS and len(row) < self.column_count:
+                if self.exclude_line(self.options.short_line_action,
                                      "Required %d columns, saw %d: '%s'" % (self.column_count,
                                                                             len(row),
                                                                             line),
                                      line):
                     continue
                              
-            if self.long_line_action != ValidationAction.PASS and len(row) > self.column_count:
-                if self.exclude_line(self.long_line_action,
+            if self.options.long_line_action != ValidationAction.PASS and len(row) > self.column_count:
+                if self.exclude_line(self.options.long_line_action,
                                      "Required %d columns, saw %d (%d extra): '%s'" % (self.column_count,
                                                                                        len(row),
                                                                                        len(row) - self.column_count,
@@ -582,7 +644,7 @@ def nextrow(self)-> typing.List[str]:
             if self._ignore_if_blank_fields(row, line):
                 continue
 
-            if self.invalid_value_action != ValidationAction.PASS:
+            if self.options.invalid_value_action != ValidationAction.PASS:
                 # TODO: find a way to optionally cache the KgtkValue objects
                 # so we don't have to create them a second time in the conversion
                 # and iterator methods below.
@@ -634,11 +696,10 @@ def to_kgtk_values(self, row: typing.List[str], validate: bool = False)->typing.
 
         When validate is True, validate each KgtkValue object.
         """
-        options: KgtkValueOptions = self.value_options if self.value_options is not None else DEFAULT_KGTK_VALUE_OPTIONS
         results: typing.List[KgtkValue] = [ ]
         field: str
         for field in row:
-            kv = KgtkValue(field, options=options)
+            kv = KgtkValue(field, options=self.value_options)
             if validate:
                 kv.validate()
             results.append(kv)
@@ -663,14 +724,13 @@ def to_concise_kgtk_values(self, row: typing.List[str], validate: bool = False)-
 
         When validate is True, validate each KgtkValue object.
         """
-        options: KgtkValueOptions = self.value_options if self.value_options is not None else DEFAULT_KGTK_VALUE_OPTIONS
         results: typing.List[typing.Optional[KgtkValue]] = [ ]
         field: str
         for field in row:
             if len(field) == 0:
                 results.append(None)
             else:
-                kv = KgtkValue(field, options=options)
+                kv = KgtkValue(field, options=self.value_options)
                 if validate:
                     kv.validate()
                 results.append(kv)
@@ -733,7 +793,6 @@ def to_kgtk_value_dict(self, row: typing.List[str], validate: bool=False, concis
 
         When validate is True, validate each KgtkValue object.
         """
-        options: KgtkValueOptions = self.value_options if self.value_options is not None else DEFAULT_KGTK_VALUE_OPTIONS
         results: typing.MutableMapping[str, KgtkValue] = { }
         idx: int = 0
         field: str
@@ -741,7 +800,7 @@ def to_kgtk_value_dict(self, row: typing.List[str], validate: bool=False, concis
             if concise and len(field) == 0:
                 pass # Skip the empty field.
             else:
-                kv = KgtkValue(field, options=options)
+                kv = KgtkValue(field, options=self.value_options)
                 if validate:
                     kv.validate()
                 results[self.column_names[idx]] = kv
@@ -771,20 +830,19 @@ def _ignore_invalid_values(self, values: typing.List[str], line: str)->bool:
         Returns True to indicate that the row should be ignored (skipped).
 
         """
-        options: KgtkValueOptions = self.value_options if self.value_options is not None else DEFAULT_KGTK_VALUE_OPTIONS
         problems: typing.List[str] = [ ] # Build a list of problems.
         idx: int
         value: str
         for idx, value in enumerate(values):
             if len(value) > 0: # Optimize the common case of empty columns.
-                kv: KgtkValue = KgtkValue(value, options=options)
+                kv: KgtkValue = KgtkValue(value, options=self.value_options)
                 if not kv.is_valid():
                     problems.append("col %d (%s) value '%s'is an %s" % (idx, self.column_names[idx], value, kv.describe()))
 
         if len(problems) == 0:
             return False
 
-        return self.exclude_line(self.invalid_value_action,
+        return self.exclude_line(self.options.invalid_value_action,
                                  "; ".join(problems),
                                  line)
 
@@ -822,7 +880,7 @@ def merge_columns(self, additional_columns: typing.List[str])->typing.List[str]:
         return merged_columns
 
     @classmethod
-    def add_operation_arguments(cls, parser: ArgumentParser):
+    def add_debug_arguments(cls, parser: ArgumentParser):
         errors_to = parser.add_mutually_exclusive_group()
         errors_to.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
                                      help="Send errors to stdout instead of stderr", action="store_true")
@@ -833,129 +891,6 @@ def add_operation_arguments(cls, parser: ArgumentParser):
 
         parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
         
-    @classmethod
-    def add_file_arguments(cls,
-                           parser: ArgumentParser,
-                           node_options: bool = False,
-                           edge_options: bool = False,
-                           mode_options: bool = False,
-                           optional_file: bool = True,
-                           who: str = ""):
-        prefix1: str = "--" if len(who) == 0 else "--" + who + "-"
-        prefix2: str = "" if len(who) == 0 else who + "_"
-        prefix3: str = "" if len(who) == 0 else who + " "
-
-        if optional_file:
-            parser.add_argument(dest=prefix2 + "kgtk_file", help="The " + who + " KGTK file to read", type=Path, nargs="?")
-        else:
-            parser.add_argument(dest=prefix2 + "kgtk_file", help="The " + who + " KGTK file to read", type=Path)
-
-        fgroup: _ArgumentGroup = parser.add_argument_group(prefix3 + "File options",
-                                                           "Options affecting " + prefix3 + "processing")
-        fgroup.add_argument(prefix1 + "column-separator",
-                            dest=prefix2 + "column_separator",
-                            help="Column separator.", type=str, default=cls.COLUMN_SEPARATOR)
-
-        fgroup.add_argument(prefix1 + "compression-type",
-                            dest=prefix2 + "compression_type", help="Specify the compression type.")
-
-        fgroup.add_argument(prefix1 + "error-limit",
-                            dest=prefix2 + "error_limit",
-                            help="The maximum number of errors to report before failing", type=int, default=cls.ERROR_LIMIT_DEFAULT)
-
-        fgroup.add_argument(prefix1 + "gzip-in-parallel",
-                            dest=prefix2 + "gzip_in_parallel", help="Execute gzip in parallel.", action='store_true')
-
-        fgroup.add_argument(prefix1 + "gzip-queue-size",
-                            dest=prefix2 + "gzip_queue_size",
-                            help="Queue size for parallel gzip.", type=int, default=cls.GZIP_QUEUE_SIZE_DEFAULT)
-
-        if mode_options:
-            fgroup.add_argument(prefix1 + "mode",
-                                dest=prefix2 + "mode",
-                                help="Determine the KGTK file mode.",
-                                type=KgtkReader.Mode, action=EnumNameAction, default=KgtkReader.Mode.AUTO)
-            
-        hgroup: _ArgumentGroup = parser.add_argument_group(prefix3 + "Header parsing", "Options affecting header parsing")
-
-        hgroup.add_argument(prefix1 + "force-column-names",
-                            dest=prefix2 + "force_column_names", help="Force the column names.", nargs='+')
-
-        hgroup.add_argument(prefix1 + "header-error-action",
-                            dest=prefix2 + "header_error_action",
-                            help="The action to take when a header error is detected  Only ERROR or EXIT are supported.",
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXIT)
-
-        hgroup.add_argument(prefix1 + "skip-first-record",
-                            dest=prefix2 + "skip_first_record",
-                            help="Skip the first record when forcing column names.", action='store_true')
-
-        hgroup.add_argument(prefix1 + "unsafe-column-name-action",
-                            dest=prefix2 + "unsafe_column_name_action",
-                            help="The action to take when a column name is unsafe.",
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
-
-        lgroup: _ArgumentGroup = parser.add_argument_group("Line parsing", "Options affecting data line parsing")
-
-        if node_options:
-            lgroup.add_argument(prefix1 + "blank-id-line-action",
-                                dest=prefix2 + "blank_id_line_action",
-                                help="The action to take when a blank id field is detected.",
-                                type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-        if edge_options:
-            lgroup.add_argument(prefix1 + "blank-node1-line-action",
-                                dest=prefix2 + "blank_node1_line_action",
-                                help="The action to take when a blank node1 field is detected.",
-                                type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-            lgroup.add_argument(prefix1 + "blank-node2-line-action",
-                                dest=prefix2 + "blank_node2_line_action",
-                                help="The action to take when a blank node2 field is detected.",
-                                type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-        lgroup.add_argument(prefix1 + "blank-required-field-line-action",
-                            dest=prefix2 + "blank_line_action",
-                            help="The action to take when a line with a blank node1, node2, or id field (per mode) is detected.",
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-                                  
-        lgroup.add_argument(prefix1 + "comment-line-action",
-                            dest=prefix2 + "comment_line_action",
-                            help="The action to take when a comment line is detected.",
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-        lgroup.add_argument(prefix1 + "empty-line-action",
-                            dest=prefix2 + "empty_line_action",
-                            help="The action to take when an empty line is detected.",
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-        lgroup.add_argument(prefix1 + "fill-short-lines",
-                            dest=prefix2 + "fill_short_lines",
-                            help="Fill missing trailing columns in short lines with empty values.", action='store_true')
-
-        lgroup.add_argument(prefix1 + "invalid-value-action",
-                            dest=prefix2 + "invalid_value_action",
-                            help="The action to take when a data cell value is invalid.",
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
-
-        lgroup.add_argument(prefix1 + "long-line-action",
-                            dest=prefix2 + "long_line_action",
-                            help="The action to take when a long line is detected.",
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-        lgroup.add_argument(prefix1 + "short-line-action",
-                            dest=prefix2 + "short_line_action",
-                            help="The action to take when a short line is detected.",
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-        lgroup.add_argument(prefix1 + "truncate-long-lines",
-                            dest=prefix2 + "truncate_long_lines",
-                            help="Remove excess trailing columns in long lines.", action='store_true')
-
-        lgroup.add_argument(prefix1 + "whitespace-line-action",
-                            dest=prefix2 + "whitespace_line_action",
-                            help="The action to take when a whitespace line is detected.",
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-    
 def main():
     """
     Test the KGTK file reader.
@@ -966,50 +901,32 @@ def main():
     from kgtk.io.nodereader import NodeReader
 
     parser = ArgumentParser()
-    KgtkReader.add_operation_arguments(parser)
-    KgtkReader.add_file_arguments(parser, node_options=True, edge_options=True, mode_options=True)
-    KgtkValueOptions.add_arguments(parser)
-
+    parser.add_argument(dest="kgtk_file", help="The KGTK file to read", type=Path, nargs="?")
+    KgtkReader.add_debug_arguments(parser)
     parser.add_argument(       "--test", dest="test_method", help="The test to perform",
                                choices=["rows", "concise-rows",
                                         "kgtk-values", "concise-kgtk-values",
                                         "dicts", "concise-dicts",
                                         "kgtk-value-dicts", "concise-kgtk-value-dicts"],
                                default="rows")
-    parser.add_argument(       "--test-valdate", dest="test_validate", help="Validate KgtkValue objects in test.", action='store_true')
+    parser.add_argument(       "--test-validate", dest="test_validate", help="Validate KgtkValue objects in test.", action='store_true')
+
+    KgtkReaderOptions.add_arguments(parser, mode_options=True)
+    KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
 
     error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
 
-    # Build the value parsing option structure.
+    # Build the option structures.
+    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
     kr: KgtkReader = KgtkReader.open(args.kgtk_file,
-                                     force_column_names=args.force_column_names,
-                                     skip_first_record=args.skip_first_record,
-                                     fill_short_lines=args.fill_short_lines,
-                                     truncate_long_lines=args.truncate_long_lines,
                                      error_file = error_file,
-                                     error_limit=args.error_limit,
-                                     empty_line_action=args.empty_line_action,
-                                     comment_line_action=args.comment_line_action,
-                                     whitespace_line_action=args.whitespace_line_action,
-                                     blank_line_action=args.blank_line_action,
-                                     blank_node1_line_action=args.blank_node1_line_action,
-                                     blank_node2_line_action=args.blank_node2_line_action,
-                                     blank_id_line_action=args.blank_id_line_action,
-                                     short_line_action=args.short_line_action,
-                                     long_line_action=args.long_line_action,
-                                     invalid_value_action=args.invalid_value_action,
-                                     header_error_action=args.header_error_action,
-                                     unsafe_column_name_action=args.unsafe_column_name_action,
+                                     options=reader_options,
                                      value_options=value_options,
-                                     compression_type=args.compression_type,
-                                     gzip_in_parallel=args.gzip_in_parallel,
-                                     gzip_queue_size=args.gzip_queue_size,
-                                     column_separator=args.column_separator,
-                                     mode=args.mode,
-                                     verbose=args.verbose, very_verbose=args.very_verbose)
+                                     verbose=args.verbose,
+                                     very_verbose=args.very_verbose)
 
     line_count: int = 0
     row: typing.List[str]
@@ -1019,49 +936,49 @@ def main():
     kgtk_value_dict: typing.Mapping[str, str]
     if args.test_method == "rows":
         if args.verbose:
-            print("Testing iterating over rows.", flush=True)
+            print("Testing iterating over rows.", file=error_file, flush=True)
         for row in kr:
             line_count += 1
 
     elif args.test_method == "concise-rows":
         if args.verbose:
-            print("Testing iterating over concise rows.", flush=True)
+            print("Testing iterating over concise rows.", file=error_file, flush=True)
         for row in kr.concise_rows():
             line_count += 1
 
     elif args.test_method == "kgtk-values":
         if args.verbose:
-            print("Testing iterating over KgtkValue rows.", flush=True)
+            print("Testing iterating over KgtkValue rows.", file=error_file, flush=True)
         for kgtk_values in kr.kgtk_values(validate=args.test_validate):
             line_count += 1
 
     elif args.test_method == "concise-kgtk-values":
         if args.verbose:
-            print("Testing iterating over concise KgtkValue rows.", flush=True)
+            print("Testing iterating over concise KgtkValue rows.", file=error_file, flush=True)
         for kgtk_values in kr.concise_kgtk_values(validate=args.test_validate):
             line_count += 1
             
     elif args.test_method == "dicts":
         if args.verbose:
-            print("Testing iterating over dicts.", flush=True)
+            print("Testing iterating over dicts.", file=error_file, flush=True)
         for dict_row in kr.dicts():
             line_count += 1
             
     elif args.test_method == "concise-dicts":
         if args.verbose:
-            print("Testing iterating over concise dicts.", flush=True)
+            print("Testing iterating over concise dicts.", file=error_file, flush=True)
         for dict_row in kr.dicts(concise=True):
             line_count += 1
             
     elif args.test_method == "kgtk-value-dicts":
         if args.verbose:
-            print("Testing iterating over KgtkValue dicts.", flush=True)
+            print("Testing iterating over KgtkValue dicts.", file=error_file, flush=True)
         for kgtk_value_dict in kr.kgtk_value_dicts(validate=args.test_validate):
             line_count += 1
             
     elif args.test_method == "concise-kgtk-value-dicts":
         if args.verbose:
-            print("Testing iterating over concise KgtkValue dicts.", flush=True)
+            print("Testing iterating over concise KgtkValue dicts.", file=error_file, flush=True)
         for kgtk_value_dict in kr.kgtk_value_dicts(concise=True, validate=args.test_validate):
             line_count += 1
             
@@ -1069,3 +986,4 @@ def main():
 
 if __name__ == "__main__":
     main()
+
diff --git a/kgtk/io/nodereader.py b/kgtk/io/nodereader.py
index b65d30a07..56702a73a 100644
--- a/kgtk/io/nodereader.py
+++ b/kgtk/io/nodereader.py
@@ -10,7 +10,7 @@
 import sys
 import typing
 
-from kgtk.io.kgtkreader import KgtkReader
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.utils.closableiter import ClosableIter
 from kgtk.utils.enumnameaction import EnumNameAction
 from kgtk.utils.validationaction import ValidationAction
@@ -22,60 +22,37 @@ class NodeReader(KgtkReader):
     @classmethod
     def open_node_file(cls,
                        file_path: typing.Optional[Path],
-                       force_column_names: typing.Optional[typing.List[str]] = None, #
-                       skip_first_record: bool = False,
-                       fill_short_lines: bool = False,
-                       truncate_long_lines: bool = False,
                        error_file: typing.TextIO = sys.stderr,
-                       error_limit: int = KgtkReader.ERROR_LIMIT_DEFAULT,
-                       empty_line_action: ValidationAction = ValidationAction.EXCLUDE,
-                       comment_line_action: ValidationAction = ValidationAction.EXCLUDE,
-                       whitespace_line_action: ValidationAction = ValidationAction.EXCLUDE,
-                       blank_id_line_action: ValidationAction = ValidationAction.EXCLUDE,
-                       short_line_action: ValidationAction = ValidationAction.EXCLUDE,
-                       long_line_action: ValidationAction = ValidationAction.EXCLUDE,
-                       invalid_value_action: ValidationAction = ValidationAction.REPORT,
-                       header_error_action: ValidationAction = ValidationAction.EXIT,
-                       unsafe_column_name_action: ValidationAction = ValidationAction.REPORT,
+                       options: typing.Optional[KgtkReaderOptions] = None,
                        value_options: typing.Optional[KgtkValueOptions] = None,
-                       compression_type: typing.Optional[str] = None,
-                       gzip_in_parallel: bool = False,
-                       gzip_queue_size: int = KgtkReader.GZIP_QUEUE_SIZE_DEFAULT,
-                       column_separator: str = KgtkReader.COLUMN_SEPARATOR,
                        verbose: bool = False,
                        very_verbose: bool = False)->"NodeReader":
 
-        source: ClosableIter[str] = cls._openfile(file_path,
-                                                  compression_type=compression_type,
-                                                  gzip_in_parallel=gzip_in_parallel,
-                                                  gzip_queue_size=gzip_queue_size,
-                                                  error_file=error_file,
-                                                  verbose=verbose)
+        # Supply the default reader and value options:
+        (options, value_options) = cls._default_options(options, value_options)
 
-        # Read the node file header and split it into column names.
+        source: ClosableIter[str] = cls._openfile(file_path, options=options, error_file=error_file, verbose=verbose)
+
+        # Read the edge file header and split it into column names.
         header: str
         column_names: typing.List[str]
-        (header, column_names) = cls._build_column_names(source,
-                                                         force_column_names=force_column_names,
-                                                         skip_first_record=skip_first_record,
-                                                         column_separator=column_separator,
-                                                         error_file=error_file,
-                                                         verbose=verbose)
+        (header, column_names) = cls._build_column_names(source, options=options, error_file=error_file, verbose=verbose)
+
         # Check for unsafe column names.
         cls.check_column_names(column_names,
                                header_line=header,
-                               error_action=unsafe_column_name_action,
+                               error_action=options.unsafe_column_name_action,
                                error_file=error_file)
 
         # Build a map from column name to column index.
         column_name_map: typing.Mapping[str, int] = cls.build_column_name_map(column_names,
                                                                               header_line=header,
-                                                                              error_action=header_error_action,
+                                                                              error_action=options.header_error_action,
                                                                               error_file=error_file)
         # Get the index of the required column.
         id_column_idx: int = cls.required_node_column(column_name_map,
                                                       header_line=header,
-                                                      error_action=header_error_action,
+                                                      error_action=options.header_error_action,
                                                       error_file=error_file)
 
         if verbose:
@@ -83,44 +60,27 @@ def open_node_file(cls,
 
         return cls(file_path=file_path,
                    source=source,
-                   column_separator=column_separator,
                    column_names=column_names,
                    column_name_map=column_name_map,
                    column_count=len(column_names),
                    id_column_idx=id_column_idx,
-                   force_column_names=force_column_names,
-                   skip_first_record=skip_first_record,
-                   fill_short_lines=fill_short_lines,
-                   truncate_long_lines=truncate_long_lines,
                    error_file=error_file,
-                   error_limit=error_limit,
-                   empty_line_action=empty_line_action,
-                   comment_line_action=comment_line_action,
-                   whitespace_line_action=whitespace_line_action,
-                   blank_id_line_action=blank_id_line_action,
-                   short_line_action=short_line_action,
-                   long_line_action=long_line_action,
-                   invalid_value_action=invalid_value_action,
-                   header_error_action=header_error_action,
-                   unsafe_column_name_action=unsafe_column_name_action,
+                   options=options,
                    value_options=value_options,
-                   compression_type=compression_type,
-                   gzip_in_parallel=gzip_in_parallel,
-                   gzip_queue_size=gzip_queue_size,
                    is_edge_file=False,
                    is_node_file=True,
                    verbose=verbose,
                    very_verbose=very_verbose,
         )
 
-    def _ignore_if_blank_fields(self, values: typing.List[str], line: str)->bool:
+    def _ignore_if_blank_required_fields(self, values: typing.List[str], line: str)->bool:
         # Ignore line_action with blank id fields.  This code comes after
         # filling missing trailing columns, although it could be reworked
         # to come first.
-        if self.blank_id_line_action != ValidationAction.PASS and self.id_column_idx >= 0 and len(values) > self.id_column_idx:
+        if self.options.blank_required_field_line_action != ValidationAction.PASS and self.id_column_idx >= 0 and len(values) > self.id_column_idx:
             id_value: str = values[self.id_column_idx]
             if len(id_value) == 0 or id_value.isspace():
-                return self.exclude_line(self.blank_id_line_action, "id is blank", line)
+                return self.exclude_line(self.options.blank_required_field_line_action, "id is blank", line)
         return False # Do not ignore this line
 
     def _skip_reserved_fields(self, column_name)->bool:
@@ -133,43 +93,28 @@ def main():
     Test the KGTK node file reader.
     """
     parser = ArgumentParser()
-    KgtkReader.add_operation_arguments(parser)
-    KgtkReader.add_file_arguments(parser, node_options=True)
+    parser.add_argument(dest="kgtk_file", help="The KGTK edge file to read", type=Path, nargs="?")
+    KgtkReader.add_debug_arguments(parser)
+    KgtkReaderOptions.add_arguments(parser)
     KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
 
     error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
 
-    # Build the value parsing option structure.
+    # Build the option structures.
+    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, mode=KgtkReaderMode.NODE)
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
-    er: NodeReader = NodeReader.open(args.kgtk_file,
-                                     force_column_names=args.force_column_names,
-                                     skip_first_record=args.skip_first_record,
-                                     fill_short_lines=args.fill_short_lines,
-                                     truncate_long_lines=args.truncate_long_lines,
-                                     error_file=error_file,
-                                     error_limit=args.error_limit,
-                                     empty_line_action=args.empty_line_action,
-                                     comment_line=args.comment_line_action,
-                                     whitespace_line_action=args.whitespace_line_action,
-                                     blank_id_line_action=args.blank_id_line_action,
-                                     short_line_action=args.short_line_action,
-                                     long_line_action=args.long_line_action,
-                                     invalid_value_action=args.invalid_value_action,
-                                     header_error_action=args.header_error_action,
-                                     unsafe_column_name_action=args.unsafe_column_name_action,
-                                     value_options=value_options,
-                                     compression_type=args.compression_type,
-                                     gzip_in_parallel=args.gzip_in_parallel,
-                                     gzip_queue_size=args.gzip_queue_size,
-                                     column_separator=args.column_separator,
-                                     mode=KgtkReader.Mode.NODE,
-                                     verbose=args.verbose, very_verbose=args.very_verbose)
+    nr: NodeReader = NodeReader.open_edge_file(args.kgtk_file,
+                                               error_file=error_file,
+                                               options=reader_options,
+                                               value_options=value_options,
+                                               column_separator=args.column_separator,
+                                               verbose=args.verbose, very_verbose=args.very_verbose)
 
     line_count: int = 0
     row: typing.List[str]
-    for row in er:
+    for row in nr:
         line_count += 1
     print("Read %d lines" % line_count)
 

From 2dc66d82b3def5d3baee565f24cc3bbd2264fa4e Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 12:17:50 -0700
Subject: [PATCH 136/278] Continuing conversion to KgtkReaderOptions.

---
 kgtk/cli/clean_data.py  | 163 ++++++----------------------------------
 kgtk/cli/ifexists.py    |  84 +++++++++------------
 kgtk/cli/ifnotexists.py |  88 +++++++++-------------
 kgtk/cli/validate.py    |   6 +-
 kgtk/join/ifexists.py   | 132 ++++++++++++++++----------------
 kgtk/join/kgtkjoiner.py |  91 ++++++++--------------
 6 files changed, 197 insertions(+), 367 deletions(-)

diff --git a/kgtk/cli/clean_data.py b/kgtk/cli/clean_data.py
index bb4059d6b..e52a4f518 100644
--- a/kgtk/cli/clean_data.py
+++ b/kgtk/cli/clean_data.py
@@ -2,17 +2,17 @@
 Copy a KGTK file, validating it and producing a clean KGTK file (no
 comments, whitespace lines, etc.) as output.
 
+TODO: Need KgtkWriterOptions.
+
 """
 
 from pathlib import Path
 import sys
 import typing
 
-from kgtk.kgtkformat import KgtkFormat
-from kgtk.io.kgtkreader import KgtkReader
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
-from kgtk.utils.enumnameaction import EnumNameAction
-from kgtk.utils.validationaction import ValidationAction
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 def parser():
     return {
@@ -26,120 +26,21 @@ def add_arguments(parser):
     Args:
         parser (argparse.ArgumentParser)
     """
-    parser.add_argument(      "input_file", nargs="?", help="The KGTK file to read.  May be omitted or '-' for stdin.", type=Path,)
-    
+    parser.add_argument(      "input_file", nargs="?", help="The KGTK file to read.  May be omitted or '-' for stdin.", type=Path)
     parser.add_argument(      "output_file", nargs="?", help="The KGTK file to write.  May be omitted or '-' for stdout.", type=Path)
     
-    parser.add_argument(      "--blank-id-line-action", dest="blank_id_line_action",
-                              help="The action to take when a blank id field is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=None)
-
-    parser.add_argument(      "--blank-node1-line-action", dest="blank_node1_line_action",
-                              help="The action to take when a blank node1 field is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=None)
-
-    parser.add_argument(      "--blank-node2-line-action", dest="blank_node2_line_action",
-                              help="The action to take when a blank node2 field is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=None)
-
-    parser.add_argument(      "--blank-required-field-line-action", dest="blank_line_action",
-                              help="The action to take when a line with a blank node1, node2, or id field (per mode) is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-    parser.add_argument(      "--column-separator", dest="column_separator",
-                              help="Column separator.", type=str, default=KgtkFormat.COLUMN_SEPARATOR)
-
-    parser.add_argument(      "--comment-line-action", dest="comment_line_action",
-                              help="The action to take when a comment line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-    parser.add_argument(      "--empty-line-action", dest="empty_line_action",
-                              help="The action to take when an empty line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-    errors_to = parser.add_mutually_exclusive_group()
-    errors_to.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
-                              help="Send errors to stdout instead of stderr (default)", action="store_true")
-    errors_to.add_argument(      "--errors-to-stderr", dest="errors_to_stderr",
-                              help="Send errors to stderr instead of stdout", action="store_true")
-
-    parser.add_argument(      "--error-limit", dest="error_limit",
-                              help="The maximum number of errors to report before failing", type=int, default=KgtkReader.ERROR_LIMIT_DEFAULT)
-
-    parser.add_argument(      "--fill-short-lines", dest="fill_short_lines",
-                              help="Fill missing trailing columns in short lines with empty values.", action='store_true')
-
-    parser.add_argument(      "--force-column-names", dest="force_column_names", help="Force the column names.", nargs='+')
-
-    parser.add_argument(      "--gzip-in-parallel", dest="gzip_in_parallel", help="Execute gzip in parallel.", action='store_true')
-
-    parser.add_argument(      "--gzip-queue-size", dest="gzip_queue_size",
-                              help="Queue size for parallel gzip.", type=int, default=KgtkReader.GZIP_QUEUE_SIZE_DEFAULT)
-
-    parser.add_argument(      "--header-error-action", dest="header_error_action",
-                              help="The action to take when a header error is detected  Only ERROR or EXIT are supported.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXIT)
-
-    parser.add_argument(      "--input-compression", dest="input_compression_type", help="Specify the input file compression type, otherwise use the extension.")
-    
-    parser.add_argument(      "--input-mode", dest="input_mode",
-                              help="Determine the KGTK input file mode.", type=KgtkReader.Mode, action=EnumNameAction, default=KgtkReader.Mode.AUTO)
-
-    parser.add_argument(      "--long-line-action", dest="long_line_action",
-                              help="The action to take when a long line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-    # Not yet implemented:
-    # parser.add_argument(      "--output-compression", dest="input_compression_type", help="Specify the input file compression type, otherwise use the extension.")
-    
-    parser.add_argument(      "--output-mode", dest="output_mode",
-                              help="Determine the KGTK output file mode.", type=KgtkWriter.Mode, action=EnumNameAction, default=KgtkWriter.Mode.AUTO)
-
-    parser.add_argument(      "--short-line-action", dest="short_line_action",
-                              help="The action to take whe a short line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-    parser.add_argument(      "--skip-first-record", dest="skip_first_record", help="Skip the first record when forcing column names.", action='store_true')
-
-    parser.add_argument(      "--truncate-long-lines", dest="truncate_long_lines",
-                              help="Remove excess trailing columns in long lines.", action='store_true')
-
-    parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
-    
-    parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
-    
-    parser.add_argument(      "--whitespace-line-action", dest="whitespace_line_action",
-                              help="The action to take when a whitespace line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+    KgtkReader.add_debug_arguments(parser)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True)
+    KgtkValueOptions.add_arguments(parser)
 
 
 def run(input_file: typing.Optional[Path],
         output_file: typing.Optional[Path],
-        force_column_names: typing.Optional[typing.List[str]] = None,
-        skip_first_record: bool = False,
-        fill_short_lines: bool = False,
-        truncate_long_lines: bool = False,
         errors_to_stdout: bool = False,
-        error_limit: int = KgtkReader.ERROR_LIMIT_DEFAULT,
-        empty_line_action: ValidationAction = ValidationAction.EXCLUDE,
-        comment_line_action: ValidationAction = ValidationAction.EXCLUDE,
-        whitespace_line_action: ValidationAction = ValidationAction.EXCLUDE, 
-        blank_line_action: ValidationAction = ValidationAction.EXCLUDE,
-        blank_node1_line_action: typing.Optional[ValidationAction] = None,
-        blank_node2_line_action: typing.Optional[ValidationAction] = None,
-        blank_id_line_action: typing.Optional[ValidationAction] = None,
-        short_line_action: ValidationAction = ValidationAction.EXCLUDE,
-        long_line_action: ValidationAction = ValidationAction.EXCLUDE,
-        header_error_action: ValidationAction = ValidationAction.EXIT,
-        input_compression_type: typing.Optional[str] = None,
-        # output_compression_type: typing.Optional[str] = None, # Not yet implemented
-        gzip_in_parallel: bool = False,
-        gzip_queue_size: int = KgtkReader.GZIP_QUEUE_SIZE_DEFAULT,
-        column_separator: str = KgtkFormat.COLUMN_SEPARATOR,
-        input_mode: KgtkReader.Mode = KgtkReader.Mode.AUTO,
-        output_mode: KgtkWriter.Mode = KgtkWriter.Mode.AUTO,
+        errors_to_stderr: bool = False,
         verbose: bool = False,
         very_verbose: bool = False,
+        **kwargs # Whatever KgtkReaderOptions and KgtkValueOptions want.
 )->int:
     # import modules locally
     from kgtk.exceptions import KGTKException
@@ -147,48 +48,30 @@ def run(input_file: typing.Optional[Path],
     # Select where to send error messages, defaulting to stderr.
     error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr
 
+    # Build the option structures.
+    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
+
     if verbose:
         if input_file is not None:
-            print("Cleaning data from '%s'" % str(input_file), file=error_file)
+            print("Cleaning data from '%s'" % str(input_file), file=error_file, flush=True)
         else:
-            print ("Cleaning data from stdin", file=error_file)
+            print ("Cleaning data from stdin", file=error_file, flush=True)
         if output_file is not None:
-            print("Writing data to '%s'" % str(output_file), file=error_file)
+            print("Writing data to '%s'" % str(output_file), file=error_file, flush=True)
         else:
-            print ("Writing data to stdin", file=error_file)
+            print ("Writing data to stdin", file=error_file, flush=True)
                 
     try:
         kr: KgtkReader = KgtkReader.open(input_file,
-                                         force_column_names=force_column_names,
-                                         skip_first_record=skip_first_record,
-                                         fill_short_lines=fill_short_lines,
-                                         truncate_long_lines=truncate_long_lines,
                                          error_file=error_file,
-                                         error_limit=error_limit,
-                                         empty_line_action=empty_line_action,
-                                         comment_line_action=comment_line_action,
-                                         whitespace_line_action=whitespace_line_action,
-                                         blank_line_action=blank_line_action,
-                                         blank_node1_line_action=blank_node1_line_action,
-                                         blank_node2_line_action=blank_node2_line_action,
-                                         blank_id_line_action=blank_id_line_action,
-                                         short_line_action=short_line_action,
-                                         long_line_action=long_line_action,
-                                         compression_type=input_compression_type,
-                                         header_error_action=header_error_action,
-                                         gzip_in_parallel=gzip_in_parallel,
-                                         gzip_queue_size=gzip_queue_size,
-                                         column_separator=column_separator,
-                                         mode=input_mode,
-                                         verbose=verbose, very_verbose=very_verbose)
+                                         options=reader_options,
+                                         value_options=value_options,
+                                         verbose=verbose,
+                                         very_verbose=very_verbose)
 
         kw: KgtkWriter = KgtkWriter.open(kr.column_names,
                                          output_file,
-                                         header_error_action=header_error_action,
-                                         gzip_in_parallel=gzip_in_parallel,
-                                         gzip_queue_size=gzip_queue_size,
-                                         column_separator=column_separator,
-                                         mode=output_mode,
                                          verbose=verbose, very_verbose=very_verbose)
         
         line_count: int = 0
@@ -199,7 +82,7 @@ def run(input_file: typing.Optional[Path],
 
         kw.close()
         if verbose:
-            print("Copied %d clean data lines" % line_count, file=error_file)
+            print("Copied %d clean data lines" % line_count, file=error_file, flush=True)
         return 0
 
     except Exception as e:
diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
index d6c2a8b23..112ff7eda 100644
--- a/kgtk/cli/ifexists.py
+++ b/kgtk/cli/ifexists.py
@@ -1,17 +1,16 @@
 """Filter a KGTK file based on whether one or more records exist in a second
 KGTK file with matching values for one or more fields.
+
+TODO: Need KgtkWriterOptions
 """
 
 from pathlib import Path
 import sys
 import typing
 
-from kgtk.kgtkformat import KgtkFormat
-from kgtk.io.kgtkreader import KgtkReader
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
 from kgtk.join.ifexists import IfExists
-from kgtk.utils.enumnameaction import EnumNameAction
-from kgtk.utils.validationaction import ValidationAction
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 def parser():
@@ -26,34 +25,21 @@ def add_arguments(parser):
     Args:
         parser (argparse.ArgumentParser)
     """
-    parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter ('left' file). May be omitted or '-' for stdin.", type=Path)
+    parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
 
-    parser.add_argument(      "--filter-on", dest="filter_kgtk_file", help="The KGTK file to filter against ('right' file).", type=Path, required=True)
+    parser.add_argument(      "--filter-on", dest="_filter_kgtk_file", help="The KGTK file to filter against.", type=Path, required=True)
 
     parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write", type=Path, default=None)
 
-    parser.add_argument(      "--left-keys", dest="left_keys", help="The key columns in the file being filtered.", nargs='*')
-
-    parser.add_argument(      "--right-keys", dest="right_keys", help="The key columns in the filter-on file.", nargs='*')
-
+    parser.add_argument(      "--input-keys", "--left-keys", dest="input_keys", help="The key columns in the file being filtered.", nargs='*')
 
-    # A subset of common arguments:
-    errors_to = parser.add_mutually_exclusive_group()
-    errors_to.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
-                              help="Send errors to stdout instead of stderr (default)", action="store_true")
-    errors_to.add_argument(      "--errors-to-stderr", dest="errors_to_stderr",
-                              help="Send errors to stderr instead of stdout", action="store_true")
+    parser.add_argument(      "--filter-keys", "--right-keys", dest="filter_keys", help="The key columns in the filter-on file.", nargs='*')
 
-    parser.add_argument(      "--error-limit", dest="error_limit",
-                              help="The maximum number of errors to report before failing", type=int, default=KgtkReader.ERROR_LIMIT_DEFAULT)
+    parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=IfExists.FIELD_SEPARATOR_DEFAULT)
 
-    parser.add_argument(      "--field-separator", dest="field_separator",
-                              help="Field separator.", type=str, default=IfExists.FIELD_SEPARATOR_DEFAULT)
-
-    parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
-    
-    parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
-    
+    KgtkReader.add_file_arguments(parser, mode_options=True, who="input")
+    KgtkReader.add_file_arguments(parser, mode_options=True, who="filter")
+    KgtkValueOptions.add_arguments(parser)
 
 
     # Note: Any arguments described by KgtkValueOptions.add_arguments(...)
@@ -64,45 +50,45 @@ def add_arguments(parser):
 def run(input_kgtk_file: typing.Optional[Path],
         filter_kgtk_file: Path,
         output_kgtk_file: typing.Optional[Path],
-        left_keys: typing.Optional[typing.List[str]],
-        right_keys: typing.Optional[typing.List[str]],
+        input_keys: typing.Optional[typing.List[str]],
+        filter_keys: typing.Optional[typing.List[str]],
         
-        # Some common arguments:
-        errors_to_stdout: bool = False,
-        errors_to_stderr: bool = False,
-        error_limit: int = KgtkReader.ERROR_LIMIT_DEFAULT,
         field_separator: str = IfExists.FIELD_SEPARATOR_DEFAULT,
+
+        errors_to_stdout: bool = False,
+        errors_to_stderr: bool = True,
         verbose: bool = False,
         very_verbose: bool = False,
 
-        **kwargs # Whatever KgtkValueOptions wants.
+        **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want.
 )->int:
     # import modules locally
     from kgtk.exceptions import KGTKException
 
 
-    if input_kgtk_file is None:
-        input_kgtk_file = Path("-")
-
     # Select where to send error messages, defaulting to stderr.
-    # (Not used yet)
-    error_file: typing.TextIO = sys.stderr if errors_to_stderr else sys.stdout
+    error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr
 
-    # Build the value parsing option structure.
+    # Build the option structures.
+    input_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="input")
+    filter_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="filter")
     value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
 
     try:
-        ie: IfExists = IfExists(left_file_path=input_kgtk_file,
-                                right_file_path=filter_kgtk_file,
-                                output_path=output_kgtk_file,
-                                left_keys=left_keys,
-                                right_keys=right_keys,
-                                field_separator=field_separator,
-                                invalid_value_action=ValidationAction.PASS,
-                                value_options=value_options,
-                                error_limit=error_limit,
-                                verbose=verbose,
-                                very_verbose=very_verbose)
+        ie: IfExists = IfExists(
+            input_file_path=input_kgtk_file,
+            input_keys=input_keys,
+            filter_file_path=filter_kgtk_file,
+            filter_keys=filter_keys,
+            output_file_path=output_kgtk_file,
+            field_separator=field_separator,
+            input_reader_options=input_reader_options,
+            filter_reader_options=filter_reader_options,
+            value_options=value_options,
+            error_file=error_file,
+            verbose=verbose,
+            very_verbose=very_verbose,
+        )
         
         ie.process()
 
diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
index bd9f5a52b..481f2ccbc 100644
--- a/kgtk/cli/ifnotexists.py
+++ b/kgtk/cli/ifnotexists.py
@@ -1,18 +1,16 @@
-"""Filter a KGTK file based on whether one or more records do not exist in a
-second KGTK file with matching values for one or more fields.
+"""Filter a KGTK file based on whether one or more records exist in a second
+KGTK file with matching values for one or more fields.
 
+TODO: Need KgtkWriterOptions
 """
 
 from pathlib import Path
 import sys
 import typing
 
-from kgtk.kgtkformat import KgtkFormat
-from kgtk.io.kgtkreader import KgtkReader
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
 from kgtk.join.ifexists import IfExists
-from kgtk.utils.enumnameaction import EnumNameAction
-from kgtk.utils.validationaction import ValidationAction
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 def parser():
@@ -27,34 +25,21 @@ def add_arguments(parser):
     Args:
         parser (argparse.ArgumentParser)
     """
-    parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter ('left' file). May be omitted or '-' for stdin.", type=Path)
+    parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
 
-    parser.add_argument(      "--filter-on", dest="filter_kgtk_file", help="The KGTK file to filter against ('right' file).", type=Path, required=True)
+    parser.add_argument(      "--filter-on", dest="_filter_kgtk_file", help="The KGTK file to filter against.", type=Path, required=True)
 
     parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write", type=Path, default=None)
 
-    parser.add_argument(      "--left-keys", dest="left_keys", help="The key columns in the file being filtered.", nargs='*')
+    parser.add_argument(      "--input-keys", "--left-keys", dest="input_keys", help="The key columns in the file being filtered.", nargs='*')
 
-    parser.add_argument(      "--right-keys", dest="right_keys", help="The key columns in the filter-on file.", nargs='*')
+    parser.add_argument(      "--filter-keys", "--right-keys", dest="filter_keys", help="The key columns in the filter-on file.", nargs='*')
 
+    parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=IfExists.FIELD_SEPARATOR_DEFAULT)
 
-    # A subset of common arguments:
-    errors_to = parser.add_mutually_exclusive_group()
-    errors_to.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
-                              help="Send errors to stdout instead of stderr (default)", action="store_true")
-    errors_to.add_argument(      "--errors-to-stderr", dest="errors_to_stderr",
-                              help="Send errors to stderr instead of stdout", action="store_true")
-
-    parser.add_argument(      "--error-limit", dest="error_limit",
-                              help="The maximum number of errors to report before failing", type=int, default=KgtkReader.ERROR_LIMIT_DEFAULT)
-
-    parser.add_argument(      "--field-separator", dest="field_separator",
-                              help="Field separator.", type=str, default=IfExists.FIELD_SEPARATOR_DEFAULT)
-
-    parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
-    
-    parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
-    
+    KgtkReader.add_file_arguments(parser, mode_options=True, who="input")
+    KgtkReader.add_file_arguments(parser, mode_options=True, who="filter")
+    KgtkValueOptions.add_arguments(parser)
 
 
     # Note: Any arguments described by KgtkValueOptions.add_arguments(...)
@@ -65,45 +50,46 @@ def add_arguments(parser):
 def run(input_kgtk_file: typing.Optional[Path],
         filter_kgtk_file: Path,
         output_kgtk_file: typing.Optional[Path],
-        left_keys: typing.Optional[typing.List[str]],
-        right_keys: typing.Optional[typing.List[str]],
+        input_keys: typing.Optional[typing.List[str]],
+        filter_keys: typing.Optional[typing.List[str]],
         
-        # Some common arguments:
-        errors_to_stdout: bool = False,
-        errors_to_stderr: bool = False,
-        error_limit: int = KgtkReader.ERROR_LIMIT_DEFAULT,
         field_separator: str = IfExists.FIELD_SEPARATOR_DEFAULT,
+
+        errors_to_stdout: bool = False,
+        errors_to_stderr: bool = True,
         verbose: bool = False,
         very_verbose: bool = False,
 
-        **kwargs # Whatever KgtkValueOptions wants.
+        **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want.
 )->int:
     # import modules locally
     from kgtk.exceptions import KGTKException
 
 
-    if input_kgtk_file is None:
-        input_kgtk_file = Path("-")
-
     # Select where to send error messages, defaulting to stderr.
-    # (Not used yet)
-    error_file: typing.TextIO = sys.stderr if errors_to_stderr else sys.stdout
+    error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr
 
-    # Build the value parsing option structure.
+    # Build the option structures.
+    input_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="input")
+    filter_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="filter")
     value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
 
     try:
-        ie: IfExists = IfExists(left_file_path=input_kgtk_file,
-                                right_file_path=filter_kgtk_file,
-                                output_path=output_kgtk_file,
-                                invert=True,
-                                left_keys=left_keys,
-                                right_keys=right_keys,
-                                field_separator=field_separator,
-                                value_options=value_options,
-                                error_limit=error_limit,
-                                verbose=verbose,
-                                very_verbose=very_verbose)
+        ie: IfExists = IfExists(
+            input_file_path=input_kgtk_file,
+            input_keys=input_keys,
+            filter_file_path=filter_kgtk_file,
+            filter_keys=filter_keys,
+            output_file_path=output_kgtk_file,
+            invert=True,
+            field_separator=field_separator,
+            input_reader_options=input_reader_options,
+            filter_reader_options=filter_reader_options,
+            value_options=value_options,
+            error_file=error_file,
+            verbose=verbose,
+            very_verbose=very_verbose,
+        )
         
         ie.process()
 
diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 2b7c1a5dc..2fa013bfb 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -15,10 +15,7 @@
 import sys
 import typing
 
-from kgtk.kgtkformat import KgtkFormat
 from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
-from kgtk.utils.enumnameaction import EnumNameAction
-from kgtk.utils.validationaction import ValidationAction
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 def parser():
@@ -35,7 +32,6 @@ def add_arguments(parser):
     """
     parser.add_argument(      "kgtk_files", nargs="*", help="The KGTK file(s) to validate. May be omitted or '-' for stdin.", type=Path)
 
-
     parser.add_argument(      "--header-only", dest="header_only",
                               help="Process the only the header of the input file.", action="store_true")
 
@@ -50,7 +46,7 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
         header_only: bool = False,
         verbose: bool = False,
         very_verbose: bool = False,
-        **kwargs # Whatever KgtkValueOptions wants.
+        **kwargs # Whatever KgtkReaderOptions and KgtkValueOptions want.
 )->int:
     # import modules locally
     from kgtk.exceptions import KGTKException
diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index 9a19e5ef8..1e85e44fa 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -25,7 +25,7 @@
 import typing
 
 from kgtk.kgtkformat import KgtkFormat
-from kgtk.io.kgtkreader import KgtkReader
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
 from kgtk.utils.enumnameaction import EnumNameAction
 from kgtk.utils.validationaction import ValidationAction
@@ -33,11 +33,11 @@
 
 @attr.s(slots=True, frozen=True)
 class IfExists(KgtkFormat):
-    input_reader_args: typing.Mapping[str, typing.Any] = attr.ib()
+    input_file_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
     input_keys: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.optional(attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
                                                                                                                             iterable_validator=attr.validators.instance_of(list))))
 
-    filter_reader_args: typing.Mapping[str, typing.Any] = attr.ib()
+    filter_file_path: Path = attr.ib(validator=attr.validators.instance_of(Path))
     filter_keys: typing.Optional[typing.List[str]] = attr.ib(validator=attr.validators.optional(attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
                                                                                                                              iterable_validator=attr.validators.instance_of(list))))
 
@@ -48,10 +48,13 @@ class IfExists(KgtkFormat):
 
     invert: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
-    # TODO: find a working validator
+    # TODO: find working validators
     # value_options: typing.Optional[KgtkValueOptions] = attr.ib(attr.validators.optional(attr.validators.instance_of(KgtkValueOptions)), default=None)
+    input_reader_options: typing.Optional[KgtkReaderOptions]= attr.ib(default=None)
+    filter_reader_options: typing.Optional[KgtkReaderOptions]= attr.ib(default=None)
     value_options: typing.Optional[KgtkValueOptions] = attr.ib(default=None)
 
+    error_file: typing.TextIO = attr.ib(default=sys.stderr)
     verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     very_verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
@@ -74,11 +77,11 @@ def get_edge_key_columns(self, kr: KgtkReader, who: str)-> typing.List[int]:
         if not kr.is_edge_file:
             raise ValueError("get_edge_keys called on %s at wrong time." % who)
         if kr.node1_column_idx < 0:
-            raise ValueError("The node1 column is missing from the %s node file." % who)
+            raise ValueError("The node1 column is missing from the %s edge file." % who)
         if kr.label_column_idx < 0:
-            raise ValueError("The label column is missing from the %s node file." % who)
+            raise ValueError("The label column is missing from the %s edge file." % who)
         if kr.node2_column_idx < 0:
-            raise ValueError("The node2 column is missing from the %s node file." % who)
+            raise ValueError("The node2 column is missing from the %s edge file." % who)
         return [ kr.node1_column_idx, kr.label_column_idx, kr.node2_column_idx ]
 
     def get_supplied_key_columns(self, supplied_keys: typing.List[str], kr: KgtkReader, who: str)->typing.List[int]:
@@ -89,7 +92,7 @@ def get_supplied_key_columns(self, supplied_keys: typing.List[str], kr: KgtkRead
                 raise ValueError("Column %s is not in the %s file" % (key, who))
             result.append(kr.column_name_map[key])
         return result
-
+    
     def get_key_columns(self, supplied_keys: typing.Optional[typing.List[str]], kr: KgtkReader, other_kr: KgtkReader, who: str)->typing.List[int]:
         if supplied_keys is not None and len(supplied_keys) > 0:
             return self.get_supplied_key_columns(supplied_keys, kr, who)
@@ -121,79 +124,76 @@ def extract_key_set(self, kr: KgtkReader, who: str, key_columns: typing.List[int
     def process(self):
         # Open the input files once.
         if self.verbose:
-            print("Opening the input file: %s" % self.left_file_path, flush=True)
-            left_kr: KgtkReader =  KgtkReader.open(self.left_file_path,
-                                               short_line_action=self.short_line_action,
-                                               long_line_action=self.long_line_action,
-                                               fill_short_lines=self.fill_short_lines,
-                                               truncate_long_lines=self.truncate_long_lines,
-                                               invalid_value_action=self.invalid_value_action,
+            if self.left_file_path is not None:
+                print("Opening the input file: %s" % self.input_file_path, file=self.error_file, flush=True)
+            else:
+                print("Reading the input data from stdin", file=self.error_file, flush=True)
+
+        input_kr: KgtkReader =  KgtkReader.open(self.input_file_path,
+                                               error_file=self.error_file,
+                                               options=self.input_reader_options,
                                                value_options = self.value_options,
-                                               error_limit=self.error_limit,
                                                verbose=self.verbose,
                                                very_verbose=self.very_verbose,
         )
 
         if self.verbose:
-            print("Opening the right input file: %s" % self.right_file_path, flush=True)
-        right_kr: KgtkReader = KgtkReader.open(self.right_file_path,
-                                               short_line_action=self.short_line_action,
-                                               long_line_action=self.long_line_action,
-                                               fill_short_lines=self.fill_short_lines,
-                                               truncate_long_lines=self.truncate_long_lines,
-                                               invalid_value_action=self.invalid_value_action,
-                                               value_options = self.value_options,
-                                               error_limit=self.error_limit,
-                                               verbose=self.verbose,
-                                               very_verbose=self.very_verbose,
+            print("Opening the filter input file: %s" % self.filter_file_path, flush=True)
+        filter_kr: KgtkReader = KgtkReader.open(self.filter_file_path,
+                                                error_file=self.error_file,
+                                                options=self.filter_reader_options,
+                                                value_options=self.value_options,
+                                                verbose=self.verbose,
+                                                very_verbose=self.very_verbose,
         )
 
-        left_key_columns: typing.List[int] = self.get_key_columns(self.left_keys, left_kr, right_kr, "left")
-        right_key_columns: typing.List[int] = self.get_key_columns(self.right_keys, right_kr, left_kr, "right")
+        input_key_columns: typing.List[int] = self.get_key_columns(self.input_keys, input_kr, filter_kr, "input")
+        filter_key_columns: typing.List[int] = self.get_key_columns(self.filter_keys, filter_kr, input_kr, "filter")
 
-        if len(left_key_columns) != len(right_key_columns):
-            print("There are %d left key columns but %d right key columns.  Exiting." % (len(left_key_columns), len(right_key_columns)), flush=True)
+        if len(input_key_columns) != len(filter_key_columns):
+            print("There are %d input key columns but %d filter key columns.  Exiting." % (len(input_key_columns), len(filter_key_columns)),
+                  file=self.error_file, flush=True)
             return
 
         if self.verbose:
-            print("Building the input key set from %s" % self.right_file_path, flush=True)
-        key_set: typint.Set[str] = self.extract_key_set(right_kr, "right", right_key_columns)
+            print("Building the filter key set from %s" % self.filter_file_path, file=self.error_file, flush=True)
+        key_set: typint.Set[str] = self.extract_key_set(filter_kr, "fitler", filter_key_columns)
         if self.verbose or self.very_verbose:
-            print("There are %d entries in the key set." % len(key_set))
+            print("There are %d entries in the filter key set." % len(key_set), file=self.error_file, flush=True)
             if self.very_verbose:
-                print("Keys: %s" % " ".join(key_set))
+                print("Keys: %s" % " ".join(key_set), file=self.error_file, flush=True)
 
         if self.verbose:
-            print("Opening the output file: %s" % self.output_path, flush=True)
+            print("Opening the output file: %s" % self.output_path, file=self.error_file, flush=True)
         ew: KgtkWriter = KgtkWriter.open(left_kr.column_names,
                                          self.output_path,
                                          require_all_columns=False,
                                          prohibit_extra_columns=True,
                                          fill_missing_columns=True,
-                                         gzip_in_parallel=self.gzip_in_parallel,
+                                         gzip_in_parallel=False,
                                          verbose=self.verbose,
                                          very_verbose=self.very_verbose)
 
         if self.verbose:
-            print("Filtering records from %s" % self.left_file_path, flush=True)
+            print("Filtering records from %s" % self.input_file_path, file=self.error_file, flush=True)
         input_line_count: int = 0
         output_line_count: int = 0;
 
         row: typing.list[str]
-        for row in left_kr:
+        for row in input_kr:
             input_line_count += 1
-            left_key: str = self.build_key(row, left_key_columns)
+            input_key: str = self.build_key(row, input_key_columns)
             if self.invert:
-                if left_key not in key_set:
+                if input_key not in key_set:
                     ew.write(row)
                     output_line_count += 1
             else:
-                if left_key in key_set:
+                if input_key in key_set:
                     ew.write(row)
                     output_line_count += 1
 
         if self.verbose:
-            print("Read %d records, wrote %d records." % (input_line_count, output_line_count), flush=True)
+            print("Read %d records, wrote %d records." % (input_line_count, output_line_count), file=self.error_file, flush=True)
         
         ew.close()
 
@@ -202,7 +202,11 @@ def main():
     Test the KGTK file joiner.
     """
     parser: ArgumentParser = ArgumentParser()
-    KgtkReader.add_operation_arguments(parser)
+    KgtkReader.add_debug_arguments(parser)
+
+    parser.add_argument(dest="input_file", help="The KGTK file with the input data", type=Path, nargs="?")
+
+    parser.add_argument(      "--filter-on", dest="filter_file_path", help="The KGTK file with the filter data", type=Path, required=True)
 
     parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to read", type=Path, default=None)
     
@@ -210,34 +214,36 @@ def main():
    
     parser.add_argument(      "--invert", dest="invert", help="Invert the test (if not exists).", action='store_true')
 
-    parser.add_argument(      "--input-keys", dest="_input_keys", help="The key columns in the input file.", nargs='*')
-    parser.add_argument(      "--filter-keys", dest="_filter_keys", help="The key columns in the filter file.", nargs='*')
+    parser.add_argument(      "--input-keys", dest="input_keys", help="The key columns in the input file.", nargs='*')
+    parser.add_argument(      "--filter-keys", dest="filter_keys", help="The key columns in the filter file.", nargs='*')
 
     KgtkReader.add_file_arguments(parser, mode_options=True, who="input")
-
-    # TODO: Find a way to use "--filter-on"
-    KgtkReader.add_file_arguments(parser, mode_options=True, who="filter", optional_file=True)
-
+    KgtkReader.add_file_arguments(parser, mode_options=True, who="filter")
     KgtkValueOptions.add_arguments(parser)
 
     args: Namespace = parser.parse_args()
 
-    input_args: typing.Mapping[str, typing.Any] = dict(((item[0][len("input_"):], item[1]) for item in vars(args) if item[0].startswith("input_")))
-    filter_args: typing.Mapping[str, typing.Any] = dict(((item[0][len("filter_"):], item[1]) for item in vars(args) if item[0].startswith("filter_")))
+    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
 
-    # Build the value parsing option structure.
+    # Build the option structures.                                                                                                                          
+    input_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, who="input")
+    filter_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, who="filter")
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
-    ie: IfExists = IfExists(input_reader_args=input_args,
-                            input_keys=args._input_keys,
-                            filter_reader_args=filter_args,
-                            filter_keys=args._filter_keys,
-                            output_file_path=args.output_file_path,
-                            field_separator=args.field_separator,
-                            invert=args.invert,
-                            value_options=value_options,
-                            verbose=args.verbose,
-                            very_verbose=args.very_verbose)
+    ie: IfExists = IfExists(
+        input_file_path=args.input_file_path,
+        input_keys=args.input_keys,
+        filter_file_path=args.filter_file_path,
+        filter_keys=args.filter_keys,
+        output_file_path=args.output_file_path,
+        field_separator=args.field_separator,
+        invert=args.invert,
+        input_reader_options=input_reader_options,
+        filter_reader_options=filter_reader_options,
+        value_options=value_options,
+        error_file=error_file,
+        verbose=args.verbose,
+        very_verbose=args.very_verbose)
 
     ie.process()
 
diff --git a/kgtk/join/kgtkjoiner.py b/kgtk/join/kgtkjoiner.py
index 4855ac774..58f597740 100644
--- a/kgtk/join/kgtkjoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -15,7 +15,7 @@
 import typing
 
 from kgtk.kgtkformat import KgtkFormat
-from kgtk.io.kgtkreader import KgtkReader
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
 from kgtk.utils.enumnameaction import EnumNameAction
 from kgtk.utils.validationaction import ValidationAction
@@ -45,31 +45,24 @@ class KgtkJoiner(KgtkFormat):
     # The prefix applied to right file column names in the output file:
     prefix: typing.Optional[str] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(str)), default=None)
 
-    # The field separator used in multifield joins.  The KGHT list character should be safe.
+    # The field separator used in multifield joins.  The KGTK list character should be safe.
     # TODO: USE THE COLUMN SEPARATOR !!!!!
     field_separator: str = attr.ib(validator=attr.validators.instance_of(str), default=KgtkFormat.LIST_SEPARATOR)
 
-    # Ignore records with too many or too few fields?
-    short_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
-    long_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
-
-    # Require or fill trailing fields?
-    fill_short_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-    truncate_long_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-
-    # TODO: find a working validator
+    # TODO: find working validators:
+    left_reader_options: typing.Optional[KgtkReaderOptions] = attr.ib(default=None)
+    right_reader_options: typing.Optional[KgtkReaderOptions] = attr.ib(default=None)
     # value_options: typing.Optional[KgtkValueOptions] = attr.ib(attr.validators.optional(attr.validators.instance_of(KgtkValueOptions)), default=None)
     value_options: typing.Optional[KgtkValueOptions] = attr.ib(default=None)
 
-    gzip_in_parallel: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
-
-    error_limit: int = attr.ib(validator=attr.validators.instance_of(int), default=KgtkReader.ERROR_LIMIT_DEFAULT)
-
     verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     very_verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
     FIELD_SEPARATOR_DEFAULT: str = KgtkFormat.LIST_SEPARATOR
 
+    LEFT: str = "left"
+    RIGHT: str = "right"
+
     def node1_column_idx(self, kr: KgtkReader, who: str)->int:
         idx: int = kr.node1_column_idx
         if idx < 0:
@@ -163,14 +156,15 @@ def build_join_idx_list(self, kr: KgtkReader, who: str, join_columns: typing.Opt
     def extract_join_key_set(self, file_path: Path, who: str, join_idx_list: typing.List[int])->typing.Set[str]:
         if self.verbose:
             print("Extracting the join key set from the %s input file: %s" % (who, str(file_path)), flush=True)
+        reader_options: typing.Optional[KgtkReaderOptions]
+        if who == self.LEFT:
+            reader_options = self.left_reader_options
+        else:
+            reader_options = self.right_reader_options
+            
         kr: KgtkReader = KgtkReader.open(file_path,
-                                         short_line_action=self.short_line_action,
-                                         long_line_action=self.long_line_action,
-                                         fill_short_lines=self.fill_short_lines,
-                                         truncate_long_lines=self.truncate_long_lines,
+                                         options=reader_options,
                                          value_options = self.value_options,
-                                         gzip_in_parallel=self.gzip_in_parallel,
-                                         error_limit=self.error_limit,
                                          verbose=self.verbose,
                                          very_verbose=self.very_verbose)
 
@@ -196,7 +190,7 @@ def join_key_sets(self, left_join_idx_list: typing.List[int], right_join_idx_lis
         elif self.left_join and not self.right_join:
             if self.verbose:
                 print("Computing the left join key set", flush=True)
-            join_key_set = self.extract_join_key_set(self.left_file_path, "left", left_join_idx_list).copy()
+            join_key_set = self.extract_join_key_set(self.left_file_path, self.LEFT, left_join_idx_list).copy()
             if self.verbose:
                 print("There are %d keys in the left join key set." % len(join_key_set), flush=True)
             return join_key_set
@@ -204,7 +198,7 @@ def join_key_sets(self, left_join_idx_list: typing.List[int], right_join_idx_lis
         elif self.right_join and not self.left_join:
             if self.verbose:
                 print("Computing the right join key set", flush=True)
-            join_key_set = self.extract_join_key_set(self.right_file_path, "right", right_join_idx_list).copy()
+            join_key_set = self.extract_join_key_set(self.right_file_path, self.RIGHT, right_join_idx_list).copy()
             if self.verbose:
                 print("There are %d keys in the right join key set." % len(join_key_set), flush=True)
             return join_key_set
@@ -212,10 +206,10 @@ def join_key_sets(self, left_join_idx_list: typing.List[int], right_join_idx_lis
         else:
             if self.verbose:
                 print("Computing the inner join key set", flush=True)
-            left_join_key_set: typing.Set[str] = self.extract_join_key_set(self.left_file_path, "left", left_join_idx_list)
+            left_join_key_set: typing.Set[str] = self.extract_join_key_set(self.left_file_path, self.LEFT, left_join_idx_list)
             if self.verbose:
                 print("There are %d keys in the left file key set." % len(left_join_key_set), flush=True)
-            right_join_key_set: typing.Set[str] = self.extract_join_key_set(self.right_file_path, "right", right_join_idx_list)
+            right_join_key_set: typing.Set[str] = self.extract_join_key_set(self.right_file_path, self.RIGHT, right_join_idx_list)
             if self.verbose:
                 print("There are %d keys in the right file key set." % len(right_join_key_set), flush=True)
             join_key_set = left_join_key_set.intersection(right_join_key_set)
@@ -264,10 +258,7 @@ def process(self):
         if self.verbose:
             print("Opening the left edge file: %s" % str(self.left_file_path), flush=True)
         left_kr: KgtkReader = KgtkReader.open(self.left_file_path,
-                                              short_line_action=self.short_line_action,
-                                              long_line_action=self.long_line_action,
-                                              fill_short_lines=self.fill_short_lines,
-                                              truncate_long_lines=self.truncate_long_lines,
+                                              options=self.left_reader_options,
                                               value_options = self.value_options,
                                               error_limit=self.error_limit)
 
@@ -275,10 +266,7 @@ def process(self):
         if self.verbose:
             print("Opening the right edge file: %s" % str(self.right_file_path), flush=True)
         right_kr: KgtkReader = KgtkReader.open(self.right_file_path,
-                                               short_line_action=self.short_line_action,
-                                               long_line_action=self.long_line_action,
-                                               fill_short_lines=self.fill_short_lines,
-                                               truncate_long_lines=self.truncate_long_lines,
+                                               options=self.right_reader_options,
                                                value_options = self.value_options,
                                                error_limit=self.error_limit)
 
@@ -292,8 +280,8 @@ def process(self):
             print("Cannot join edge and node files.", flush=True)
             return
 
-        left_join_idx_list: typing.List[int] = self.build_join_idx_list(left_kr, "left", self.left_join_columns)
-        right_join_idx_list: typing.List[int] = self.build_join_idx_list(right_kr, "right", self.right_join_columns)
+        left_join_idx_list: typing.List[int] = self.build_join_idx_list(left_kr, self.LEFT, self.left_join_columns)
+        right_join_idx_list: typing.List[int] = self.build_join_idx_list(right_kr, self.RIGHT, self.right_join_columns)
         if len(left_join_idx_list) != len(right_join_idx_list):
             print("the left join key has %d components, the right join key has %d columns. Exiting." % (len(left_join_idx_list), len(right_join_idx_list)), flush=True)
             left_kr.close()
@@ -322,7 +310,7 @@ def process(self):
                                          require_all_columns=False,
                                          prohibit_extra_columns=True,
                                          fill_missing_columns=True,
-                                         gzip_in_parallel=self.gzip_in_parallel,
+                                         gzip_in_parallel=False,
                                          verbose=self.verbose,
                                          very_verbose=self.very_verbose)
 
@@ -385,41 +373,30 @@ def main():
     parser = ArgumentParser()
     parser.add_argument(dest="left_file_path", help="The left KGTK file to join", type=Path)
     parser.add_argument(dest="right_file_path", help="The right KGTK file to join", type=Path)
-    parser.add_argument(      "--error-limit", dest="error_limit",
-                              help="The maximum number of errors to report before failing", type=int, default=KgtkReader.ERROR_LIMIT_DEFAULT)
-
     parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=KgtkJoiner.FIELD_SEPARATOR_DEFAULT)
-    parser.add_argument(      "--fill-short-lines", dest="fill_short_lines",
-                              help="Fill missing trailing columns in short lines with empty values.", action='store_true')
+
     parser.add_argument(      "--join-on-label", dest="join_on_label", help="If both input files are edge files, include the label column in the join.", action='store_true')
     parser.add_argument(      "--join-on-node2", dest="join_on_node2", help="If both input files are edge files, include the node2 column in the join.", action='store_true')
-    parser.add_argument(      "--gzip-in-parallel", dest="gzip_in_parallel", help="Execute gzip in parallel.", action='store_true')
     parser.add_argument(      "--left-file-join-columns", dest="left_join_columns", help="Left file join columns.", nargs='+')
     parser.add_argument(      "--left-join", dest="left_join", help="Perform a left outer join.", action='store_true')
 
-    parser.add_argument(      "--long-line-action", dest="long_line_action",
-                              help="The action to take when a long line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
     parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to read", type=Path, default=None)
     parser.add_argument(      "--prefix", dest="prefix", help="The prefix applied to right file column names in the output file.")
     parser.add_argument(      "--right-file-join-columns", dest="right_join_columns", help="Right file join columns.", nargs='+')
     parser.add_argument(      "--right-join", dest="right_join", help="Perform a right outer join.", action='store_true')
 
-    parser.add_argument(      "--short-line-action", dest="short_line_action",
-                              help="The action to take whe a short line is detected.",
-                              type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
-
-    parser.add_argument(      "--truncate-long-lines", dest="truncate_long_lines",
-                              help="Remove excess trailing columns in long lines.", action='store_true')
     parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
     parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
 
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who=KgtkJoiner.LEFT)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who=KgtkJoiner.RIGHT)
     KgtkValueOptions.add_arguments(parser)
 
     args = parser.parse_args()
 
-    # Build the value parsing option structure.
+    # Build the option structures.
+    left_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, who=KgtkJoiner.LEFT)
+    right_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, who=KgtkJoiner.RIGHT)
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
     ej: KgtkJoiner = KgtkJoiner(left_file_path=args.left_file_path,
@@ -433,13 +410,9 @@ def main():
                                 right_join_columns=args.right_join_columns,
                                 prefix=args.prefix,
                                 field_separator=args.field_separator,
-                                short_line_action=args.short_line_action,
-                                long_line_action=args.long_line_action,
-                                fill_short_lines=args.fill_short_lines,
-                                truncate_long_lines=args.truncate_long_lines,
+                                left_reader_options=left_reader_options,
+                                right_reader_options=right_reader_options,
                                 value_options=value_options,
-                                gzip_in_parallel=args.gzip_in_parallel,
-                                error_limit=args.error_limit,
                                 verbose=args.verbose,
                                 very_verbose=args.very_verbose)
 

From 743590fba06199de06320cf01d633e5dc4e5c76b Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 12:20:36 -0700
Subject: [PATCH 137/278] Fix a namespace issue.

---
 kgtk/io/kgtkreader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index 7a358d53b..6672ef05d 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -63,7 +63,7 @@ class KgtkReaderOptions():
     skip_first_record: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
     # How do we handle errors?
-    error_limit: int = attr.ib(validator=attr.validators.instance_of(int), default=KgtkReaderOptions.ERROR_LIMIT_DEFAULT) # >0 ==> limit error reports
+    error_limit: int = attr.ib(validator=attr.validators.instance_of(int), default=ERROR_LIMIT_DEFAULT) # >0 ==> limit error reports
 
     # Ignore empty lines, comments, and all whitespace lines, etc.?
     empty_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)

From 642f9bf6b5672c506cf054b3010c0835990e3674 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 12:30:21 -0700
Subject: [PATCH 138/278] Add defaults to help.

---
 kgtk/io/kgtkreader.py | 46 +++++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index 6672ef05d..9ddd0b33f 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -107,91 +107,99 @@ def add_arguments(cls,
                                                            "Options affecting " + prefix4 + "processing")
         fgroup.add_argument(prefix1 + "column-separator",
                             dest=prefix2 + "column_separator",
-                            help=prefix3 + "Column separator.", type=str, default=KgtkFormat.COLUMN_SEPARATOR)
+                            help=prefix3 + "Column separator (default=<TAB>).", # TODO: provide the default with escapes, e.g. \t
+                            type=str, default=KgtkFormat.COLUMN_SEPARATOR)
 
         fgroup.add_argument(prefix1 + "compression-type",
-                            dest=prefix2 + "compression_type", help=prefix3 + "Specify the compression type.")
+                            dest=prefix2 + "compression_type",
+                            help=prefix3 + "Specify the compression type (default=%(default)s).")
 
         fgroup.add_argument(prefix1 + "error-limit",
                             dest=prefix2 + "error_limit",
-                            help=prefix3 + "The maximum number of errors to report before failing", type=int, default=cls.ERROR_LIMIT_DEFAULT)
+                            help=prefix3 + "The maximum number of errors to report before failing (default=%(default)s)",
+                            type=int, default=cls.ERROR_LIMIT_DEFAULT)
 
         fgroup.add_argument(prefix1 + "gzip-in-parallel",
-                            dest=prefix2 + "gzip_in_parallel", help=prefix3 + "Execute gzip in parallel.", action='store_true')
+                            dest=prefix2 + "gzip_in_parallel",
+                            help=prefix3 + "Execute gzip in parallel (default=%(default)s).", action='store_true')
 
         fgroup.add_argument(prefix1 + "gzip-queue-size",
                             dest=prefix2 + "gzip_queue_size",
-                            help=prefix3 + "Queue size for parallel gzip.", type=int, default=cls.GZIP_QUEUE_SIZE_DEFAULT)
+                            help=prefix3 + "Queue size for parallel gzip (default=%(default)s).",
+                            type=int, default=cls.GZIP_QUEUE_SIZE_DEFAULT)
 
         if mode_options:
             fgroup.add_argument(prefix1 + "mode",
                                 dest=prefix2 + "mode",
-                                help=prefix3 + "Determine the KGTK file mode.",
+                                help=prefix3 + "Determine the KGTK file mode (default=%(default)s).",
                                 type=KgtkReaderMode, action=EnumNameAction, default=KgtkReaderMode.AUTO)
             
         hgroup: _ArgumentGroup = parser.add_argument_group(prefix3 + "Header parsing", "Options affecting " + prefix4 + "header parsing")
 
         hgroup.add_argument(prefix1 + "force-column-names",
                             dest=prefix2 + "force_column_names",
-                            help=prefix3 + "Force the column names.", nargs='+')
+                            help=prefix3 + "Force the column names (default=None).",
+                            nargs='+')
 
         hgroup.add_argument(prefix1 + "header-error-action",
                             dest=prefix2 + "header_error_action",
-                            help=prefix3 + "The action to take when a header error is detected  Only ERROR or EXIT are supported.",
+                            help=prefix3 + "The action to take when a header error is detected.  Only ERROR or EXIT are supported (default=%(default)s).",
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXIT)
 
         hgroup.add_argument(prefix1 + "skip-first-record",
                             dest=prefix2 + "skip_first_record",
-                            help=prefix3 + "Skip the first record when forcing column names.", action='store_true')
+                            help=prefix3 + "Skip the first record when forcing column names (default=%(default)s).", action='store_true')
 
         hgroup.add_argument(prefix1 + "unsafe-column-name-action",
                             dest=prefix2 + "unsafe_column_name_action",
-                            help=prefix3 + "The action to take when a column name is unsafe.",
+                            help=prefix3 + "The action to take when a column name is unsafe (default=%(default)s).",
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
 
         lgroup: _ArgumentGroup = parser.add_argument_group("Line parsing", "Options affecting " + prefix4 + "data line parsing")
 
         lgroup.add_argument(prefix1 + "blank-required-field-line-action",
                             dest=prefix2 + "blank_required_field_line_action",
-                            help=prefix3 + "The action to take when a line with a blank node1, node2, or id field (per mode) is detected.",
+                            help=prefix3 + "The action to take when a line with a blank node1, node2, or id field (per mode) is detected (default=%(default)s).",
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
                                   
         lgroup.add_argument(prefix1 + "comment-line-action",
                             dest=prefix2 + "comment_line_action",
-                            help=prefix3 + "The action to take when a comment line is detected.",
+                            help=prefix3 + "The action to take when a comment line is detected (default=%(default)s).",
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
 
         lgroup.add_argument(prefix1 + "empty-line-action",
                             dest=prefix2 + "empty_line_action",
-                            help=prefix3 + "The action to take when an empty line is detected.",
+                            help=prefix3 + "The action to take when an empty line is detected (default=%(default)s).",
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
 
         lgroup.add_argument(prefix1 + "fill-short-lines",
                             dest=prefix2 + "fill_short_lines",
-                            help=prefix3 + "Fill missing trailing columns in short lines with empty values.", action='store_true')
+                            help=prefix3 + "Fill missing trailing columns in short lines with empty values (default=%(default)s).",
+                            action='store_true')
 
         lgroup.add_argument(prefix1 + "invalid-value-action",
                             dest=prefix2 + "invalid_value_action",
-                            help=prefix3 + "The action to take when a data cell value is invalid.",
+                            help=prefix3 + "The action to take when a data cell value is invalid (default=%(default)s).",
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
 
         lgroup.add_argument(prefix1 + "long-line-action",
                             dest=prefix2 + "long_line_action",
-                            help=prefix3 + "The action to take when a long line is detected.",
+                            help=prefix3 + "The action to take when a long line is detected (default=%(default)s).",
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
 
         lgroup.add_argument(prefix1 + "short-line-action",
                             dest=prefix2 + "short_line_action",
-                            help=prefix3 + "The action to take when a short line is detected.",
+                            help=prefix3 + "The action to take when a short line is detected (default=%(default)s).",
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
 
         lgroup.add_argument(prefix1 + "truncate-long-lines",
                             dest=prefix2 + "truncate_long_lines",
-                            help=prefix3 + "Remove excess trailing columns in long lines.", action='store_true')
+                            help=prefix3 + "Remove excess trailing columns in long lines (default=%(default)s).",
+                            action='store_true')
 
         lgroup.add_argument(prefix1 + "whitespace-line-action",
                             dest=prefix2 + "whitespace_line_action",
-                            help=prefix3 + "The action to take when a whitespace line is detected.",
+                            help=prefix3 + "The action to take when a whitespace line is detected (default=%(default)s).",
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
     
     @classmethod

From c50bb9cb92c793885b0e6d30d87bbdff7a67cfeb Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 12:38:46 -0700
Subject: [PATCH 139/278] Add default values to help message.

---
 kgtk/value/kgtkvalueoptions.py | 46 ++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/kgtk/value/kgtkvalueoptions.py b/kgtk/value/kgtkvalueoptions.py
index a51b16dc7..b54046b29 100644
--- a/kgtk/value/kgtkvalueoptions.py
+++ b/kgtk/value/kgtkvalueoptions.py
@@ -77,25 +77,30 @@ def add_arguments(cls, parser: ArgumentParser, who: str = "", desc: str = "."):
         
         vgroup = parser.add_argument_group(prefix3 + "Data value parsing", "Options controlling the parsing and processing of KGTK data values" + desc)
         vgroup.add_argument(      prefix1 + "additional-language-codes", dest=prefix2 + "additional_language_codes",
-                                  help=prefix3 + "Additional language codes.", nargs="*", default=None)
+                                  help=prefix3 + "Additional language codes (default=None).",
+                                  nargs="*", default=None)
 
         lsgroup= vgroup.add_mutually_exclusive_group()
         lsgroup.add_argument(      prefix1 + "allow-language-suffixes", dest=prefix2 + "allow_language_suffixes",
-                                   help=prefix3 + "Allow language identifier suffixes starting with a dash.", action='store_true', default=True)
+                                   help=prefix3 + "Allow language identifier suffixes starting with a dash (default=%(default)s).",
+                                   action='store_true', default=True)
 
         lsgroup.add_argument(      prefix1 + "disallow-language-suffixes", dest=prefix2 + "allow_language_suffixes",
-                                   help=prefix3 + "Disallow language identifier suffixes starting with a dash.", action='store_false')
+                                   help=prefix3 + "Disallow language identifier suffixes starting with a dash.",
+                                   action='store_false')
 
         laxgroup= vgroup.add_mutually_exclusive_group()
         laxgroup.add_argument(      prefix1 + "allow-lax-strings", dest=prefix2 + "allow_lax_strings",
-                                    help=prefix3 + "Do not check if double quotes are backslashed inside strings.", action='store_true', default=False)
+                                    help=prefix3 + "Do not check if double quotes are backslashed inside strings (default=%(default)s).",
+                                    action='store_true', default=False)
 
         laxgroup.add_argument(      prefix1 + "disallow-lax-strings", dest=prefix2 + "allow_lax_strings",
-                                    help=prefix3 + "Check if double quotes are backslashed inside strings.", action='store_false')
+                                    help=prefix3 + "Check if double quotes are backslashed inside strings.",
+                                    action='store_false')
 
         lqgroup= vgroup.add_mutually_exclusive_group()
         lqgroup.add_argument(      prefix1 + "allow-lax-lq-strings", dest=prefix2 + "allow_lax_lq_strings",
-                                   help=prefix3 + "Do not check if single quotes are backslashed inside language qualified strings.",
+                                   help=prefix3 + "Do not check if single quotes are backslashed inside language qualified strings (default=%(default)s).",
                                    action='store_true', default=False)
 
         lqgroup.add_argument(      prefix1 + "disallow-lax-lq-strings", dest=prefix2 + "allow_lax_lq_strings",
@@ -104,39 +109,48 @@ def add_arguments(cls, parser: ArgumentParser, who: str = "", desc: str = "."):
 
         amd0group= vgroup.add_mutually_exclusive_group()
         amd0group.add_argument(      prefix1 + "allow-month-or-day-zero", dest=prefix2 + "allow_month_or_day_zero",
-                                    help=prefix3 + "Allow month or day zero in dates.", action='store_true', default=False)
+                                     help=prefix3 + "Allow month or day zero in dates (default=%(default)s).", action='store_true', default=False)
 
         amd0group.add_argument(      prefix1 + "disallow-month-or-day-zero", dest=prefix2 + "allow_month_or_day_zero",
-                                    help=prefix3 + "Allow month or day zero in dates.", action='store_false')
+                                     help=prefix3 + "Allow month or day zero in dates.",
+                                     action='store_false')
 
         rmd0group= vgroup.add_mutually_exclusive_group()
         rmd0group.add_argument(      prefix1 + "repair-month-or-day-zero", dest=prefix2 + "repair_month_or_day_zero",
-                                    help=prefix3 + "Repair month or day zero in dates.", action='store_true', default=False)
+                                    help=prefix3 + "Repair month or day zero in dates (default=%(default)s).",
+                                     action='store_true', default=False)
 
         rmd0group.add_argument(      prefix1 + "no-repair-month-or-day-zero", dest=prefix2 + "repair_month_or_day_zero",
                                     help=prefix3 + "Do not repair month or day zero in dates.", action='store_false')
 
         vgroup.add_argument(      prefix1 + "minimum-valid-year", dest=prefix2 + "minimum_valid_year",
-                                  help=prefix3 + "The minimum valid year in dates.", type=int, default=cls.MINIMUM_VALID_YEAR)
+                                  help=prefix3 + "The minimum valid year in dates (default=%(default)d).",
+                                  type=int, default=cls.MINIMUM_VALID_YEAR)
 
         vgroup.add_argument(      prefix1 + "maximum-valid-year", dest=prefix2 + "maximum_valid_year",
-                                  help=prefix3 + "The maximum valid year in dates.", type=int, default=cls.MAXIMUM_VALID_YEAR)
+                                  help=prefix3 + "The maximum valid year in dates (default=%(default)d).",
+                                  type=int, default=cls.MAXIMUM_VALID_YEAR)
 
         vgroup.add_argument(      prefix1 + "minimum-valid-lat", dest=prefix2 + "minimum_valid_lat",
-                                  help=prefix3 + "The minimum valid latitude.", type=int, default=cls.MINIMUM_VALID_LAT)
+                                  help=prefix3 + "The minimum valid latitude (default=%(default)d).",
+                                  type=int, default=cls.MINIMUM_VALID_LAT)
 
         vgroup.add_argument(      prefix1 + "maximum-valid-lat", dest=prefix2 + "maximum_valid_lat",
-                                  help=prefix3 + "The maximum valid latitude.", type=int, default=cls.MAXIMUM_VALID_LAT)
+                                  help=prefix3 + "The maximum valid latitude (default=%(default)d).",
+                                  type=int, default=cls.MAXIMUM_VALID_LAT)
 
         vgroup.add_argument(      prefix1 + "minimum-valid-lon", dest=prefix2 + "minimum_valid_lon",
-                                  help=prefix3 + "The minimum valid longitude.", type=int, default=cls.MINIMUM_VALID_LON)
+                                  help=prefix3 + "The minimum valid longitude (default=%(default)d).",
+                                  type=int, default=cls.MINIMUM_VALID_LON)
 
         vgroup.add_argument(      prefix1 + "maximum-valid-lon", dest=prefix2 + "maximum_valid_lon",
-                                  help=prefix3 + "The maximum valid longitude.", type=int, default=cls.MAXIMUM_VALID_LON)
+                                  help=prefix3 + "The maximum valid longitude (default=%(default)d).",
+                                  type=int, default=cls.MAXIMUM_VALID_LON)
 
         elsgroup= vgroup.add_mutually_exclusive_group()
         elsgroup.add_argument(      prefix1 + "escape-list-separators", dest=prefix2 + "escape_list_separators",
-                                    help=prefix3 + "Escape all list separators instead of splitting on them.", action='store_true', default=False)
+                                    help=prefix3 + "Escape all list separators instead of splitting on them (default=%(default)s).",
+                                    action='store_true', default=False)
 
         elsgroup.add_argument(      prefix1 + "no-escape-list-separators", dest=prefix2 + "escape_list_separators",
                                     help=prefix3 + "Do not escape list separators.", action='store_false')

From 2f01aae704f843ec33608788b5557005d4ddffae Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 13:40:05 -0700
Subject: [PATCH 140/278] SHow defaults.  Fixed some bugs.

---
 kgtk/cli/ifexists.py    |  10 +--
 kgtk/cli/ifnotexists.py |  10 +--
 kgtk/cli/validate.py    |   2 +-
 kgtk/io/edgereader.py   |   2 +-
 kgtk/io/kgtkreader.py   | 136 +++++++++++++++++++++++++---------------
 kgtk/io/nodereader.py   |   2 +-
 kgtk/join/ifexists.py   |   4 +-
 7 files changed, 95 insertions(+), 71 deletions(-)

diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
index 112ff7eda..eb7d12801 100644
--- a/kgtk/cli/ifexists.py
+++ b/kgtk/cli/ifexists.py
@@ -37,16 +37,10 @@ def add_arguments(parser):
 
     parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=IfExists.FIELD_SEPARATOR_DEFAULT)
 
-    KgtkReader.add_file_arguments(parser, mode_options=True, who="input")
-    KgtkReader.add_file_arguments(parser, mode_options=True, who="filter")
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input")
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter")
     KgtkValueOptions.add_arguments(parser)
 
-
-    # Note: Any arguments described by KgtkValueOptions.add_arguments(...)
-    # need to be included in the arguments to run(...), below.
-    KgtkValueOptions.add_arguments(parser)
-
-
 def run(input_kgtk_file: typing.Optional[Path],
         filter_kgtk_file: Path,
         output_kgtk_file: typing.Optional[Path],
diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
index 481f2ccbc..c3789e984 100644
--- a/kgtk/cli/ifnotexists.py
+++ b/kgtk/cli/ifnotexists.py
@@ -37,16 +37,10 @@ def add_arguments(parser):
 
     parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=IfExists.FIELD_SEPARATOR_DEFAULT)
 
-    KgtkReader.add_file_arguments(parser, mode_options=True, who="input")
-    KgtkReader.add_file_arguments(parser, mode_options=True, who="filter")
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input")
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter")
     KgtkValueOptions.add_arguments(parser)
 
-
-    # Note: Any arguments described by KgtkValueOptions.add_arguments(...)
-    # need to be included in the arguments to run(...), below.
-    KgtkValueOptions.add_arguments(parser)
-
-
 def run(input_kgtk_file: typing.Optional[Path],
         filter_kgtk_file: Path,
         output_kgtk_file: typing.Optional[Path],
diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 2fa013bfb..f61db403a 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -36,7 +36,7 @@ def add_arguments(parser):
                               help="Process the only the header of the input file.", action="store_true")
 
     KgtkReader.add_debug_arguments(parser)
-    KgtkReaderOptions.add_arguments(parser, mode_options=True)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, validate=True)
     KgtkValueOptions.add_arguments(parser)
 
 
diff --git a/kgtk/io/edgereader.py b/kgtk/io/edgereader.py
index 837085269..835177a1d 100644
--- a/kgtk/io/edgereader.py
+++ b/kgtk/io/edgereader.py
@@ -111,7 +111,7 @@ def main():
     parser = ArgumentParser()
     parser.add_argument(dest="kgtk_file", help="The KGTK edge file to read", type=Path, nargs="?")
     KgtkReader.add_debug_arguments(parser)
-    KgtkReaderOptions.add_arguments(parser)
+    KgtkReaderOptions.add_arguments(parser, validate=True)
     KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
 
diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index 9ddd0b33f..153447899 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -65,6 +65,10 @@ class KgtkReaderOptions():
     # How do we handle errors?
     error_limit: int = attr.ib(validator=attr.validators.instance_of(int), default=ERROR_LIMIT_DEFAULT) # >0 ==> limit error reports
 
+    # Top-level validation controls:
+    repair_and_validate_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+    repair_and_validate_values: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+
     # Ignore empty lines, comments, and all whitespace lines, etc.?
     empty_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
     comment_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
@@ -97,6 +101,7 @@ class KgtkReaderOptions():
     def add_arguments(cls,
                       parser: ArgumentParser,
                       mode_options: bool = False,
+                      validate: bool = False,
                       who: str = ""):
         prefix1: str = "--" if len(who) == 0 else "--" + who + "-"
         prefix2: str = "" if len(who) == 0 else who + "_"
@@ -157,6 +162,26 @@ def add_arguments(cls,
 
         lgroup: _ArgumentGroup = parser.add_argument_group("Line parsing", "Options affecting " + prefix4 + "data line parsing")
 
+        lgroup.add_argument(prefix1 + "repair-and-validate-lines",
+                            dest=prefix2 + "repair_and_validate_lines",
+                            help=prefix3 + "Repair and validate lines (default=%(default)s).",
+                            action='store_true', default=validate)
+
+        lgroup.add_argument(prefix1 + "do-not-repair-and-validate-lines",
+                            dest=prefix2 + "repair_and_validate_lines",
+                            help=prefix3 + "Do not repair and validate lines.",
+                            action='store_false')
+
+        lgroup.add_argument(prefix1 + "repair-and-validate-values",
+                            dest=prefix2 + "repair_and_validate_values",
+                            help=prefix3 + "Repair and validate values (default=%(default)s).",
+                            action='store_true', default=validate)
+
+        lgroup.add_argument(prefix1 + "do-not-repair-and-validate-values",
+                            dest=prefix2 + "repair-and-validate_values",
+                            help=prefix3 + "Do not repair and validate values.",
+                            action='store_false')
+
         lgroup.add_argument(prefix1 + "blank-required-field-line-action",
                             dest=prefix2 + "blank_required_field_line_action",
                             help=prefix3 + "The action to take when a line with a blank node1, node2, or id field (per mode) is detected (default=%(default)s).",
@@ -234,6 +259,8 @@ def from_dict(cls,
             invalid_value_action=d.get(prefix + "invalid_value_action", ValidationAction.REPORT),
             long_line_action=d.get(prefix + "long_line_action", ValidationAction.EXCLUDE),
             mode=reader_mode,
+            repair_and_validate_lines=d.get(prefix + "repair_and_validate_lines", False),
+            repair_and_validate_values=d.get(prefix + "repair_and_validate_values", False),
             short_line_action=d.get(prefix + "short_line_action", ValidationAction.EXCLUDE),
             skip_first_record=d.get(prefix + "skip_first_recordb", False),
             truncate_long_lines=d.get(prefix + "truncate_long_lines", False),
@@ -258,7 +285,9 @@ class KgtkReader(KgtkBase, ClosableIter[typing.List[str]]):
     file_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
     source: ClosableIter[str] = attr.ib() # Todo: validate
 
-    options: KgtkReaderOptions = attr.ib(validator=attr.validators.instance_of(KgtkReaderOptions))
+    # TODO: Fix this validator:
+    # options: KgtkReaderOptions = attr.ib(validator=attr.validators.instance_of(KgtkReaderOptions))
+    options: KgtkReaderOptions = attr.ib()
 
     value_options: KgtkValueOptions = attr.ib(validator=attr.validators.instance_of(KgtkValueOptions))
 
@@ -581,6 +610,9 @@ def exclude_line(self, action: ValidationAction, msg: str, line: str)->bool:
     def nextrow(self)-> typing.List[str]:
         row: typing.List[str]
 
+        repair_and_validate_lines: bool = self.options.repair_and_validate_lines
+        repair_and_validate_values: bool = self.options.repair_and_validate_values
+
         # This loop accomodates lines that are ignored.
         while (True):
             line: str
@@ -600,69 +632,73 @@ def nextrow(self)-> typing.List[str]:
             # Strip the end-of-line characters:
             line = line.rstrip("\r\n")
 
-            if self.very_verbose:
-                print("'%s'" % line, file=self.error_file, flush=True)
+            if repair_and_validate_lines:
+                # TODO: Use a sepearate option to control this.
+                if self.very_verbose:
+                    print("'%s'" % line, file=self.error_file, flush=True)
+                
+                # Ignore empty lines.
+                if self.options.empty_line_action != ValidationAction.PASS and len(line) == 0:
+                    if self.exclude_line(self.options.empty_line_action, "saw an empty line", line):
+                        continue
 
-            # Ignore empty lines.
-            if self.options.empty_line_action != ValidationAction.PASS and len(line) == 0:
-                if self.exclude_line(self.options.empty_line_action, "saw an empty line", line):
-                    continue
+                # Ignore comment lines:
+                if self.options.comment_line_action != ValidationAction.PASS  and line[0] == self.COMMENT_INDICATOR:
+                    if self.exclude_line(self.options.comment_line_action, "saw a comment line", line):
+                        continue
 
-            # Ignore comment lines:
-            if self.options.comment_line_action != ValidationAction.PASS  and line[0] == self.COMMENT_INDICATOR:
-                if self.exclude_line(self.options.comment_line_action, "saw a comment line", line):
-                    continue
-
-            # Ignore whitespace lines
-            if self.options.whitespace_line_action != ValidationAction.PASS and line.isspace():
-                if self.exclude_line(self.options.whitespace_line_action, "saw a whitespace line", line):
-                    continue
+                # Ignore whitespace lines
+                if self.options.whitespace_line_action != ValidationAction.PASS and line.isspace():
+                    if self.exclude_line(self.options.whitespace_line_action, "saw a whitespace line", line):
+                        continue
 
             row = line.split(self.options.column_separator)
 
-            # Optionally fill missing trailing columns with empty row:
-            if self.options.fill_short_lines and len(row) < self.column_count:
-                while len(row) < self.column_count:
-                    row.append("")
+            if repair_and_validate_lines:
+                # Optionally fill missing trailing columns with empty row:
+                if self.options.fill_short_lines and len(row) < self.column_count:
+                    while len(row) < self.column_count:
+                        row.append("")
                     
-            # Optionally remove extra trailing columns:
-            if self.options.truncate_long_lines and len(row) > self.column_count:
-                row = row[:self.column_count]
-
-            # Optionally validate that the line contained the right number of columns:
-            #
-            # When we report line numbers in error messages, line 1 is the first line after the header line.
-            if self.options.short_line_action != ValidationAction.PASS and len(row) < self.column_count:
-                if self.exclude_line(self.options.short_line_action,
-                                     "Required %d columns, saw %d: '%s'" % (self.column_count,
-                                                                            len(row),
-                                                                            line),
-                                     line):
-                    continue
+                # Optionally remove extra trailing columns:
+                if self.options.truncate_long_lines and len(row) > self.column_count:
+                    row = row[:self.column_count]
+                            
+                # Optionally validate that the line contained the right number of columns:
+                #
+                # When we report line numbers in error messages, line 1 is the first line after the header line.
+                if self.options.short_line_action != ValidationAction.PASS and len(row) < self.column_count:
+                    if self.exclude_line(self.options.short_line_action,
+                                         "Required %d columns, saw %d: '%s'" % (self.column_count,
+                                                                                len(row),
+                                                                                line),
+                                         line):
+                        continue
                              
-            if self.options.long_line_action != ValidationAction.PASS and len(row) > self.column_count:
-                if self.exclude_line(self.options.long_line_action,
-                                     "Required %d columns, saw %d (%d extra): '%s'" % (self.column_count,
-                                                                                       len(row),
-                                                                                       len(row) - self.column_count,
-                                                                                       line),
-                                     line):
+                if self.options.long_line_action != ValidationAction.PASS and len(row) > self.column_count:
+                    if self.exclude_line(self.options.long_line_action,
+                                         "Required %d columns, saw %d (%d extra): '%s'" % (self.column_count,
+                                                                                           len(row),
+                                                                                           len(row) - self.column_count,
+                                                                                           line),
+                                         line):
+                        continue
+
+                if self._ignore_if_blank_fields(row, line):
                     continue
 
-            if self._ignore_if_blank_fields(row, line):
-                continue
-
-            if self.options.invalid_value_action != ValidationAction.PASS:
+            if repair_and_validate_values and self.options.invalid_value_action != ValidationAction.PASS:
                 # TODO: find a way to optionally cache the KgtkValue objects
                 # so we don't have to create them a second time in the conversion
                 # and iterator methods below.
                 if self._ignore_invalid_values(row, line):
                     continue
 
-            self.data_lines_passed += 1
-            if self.very_verbose:
-                sys.stdout.write(".")
-                sys.stdout.flush()
+                self.data_lines_passed += 1
+                # TODO: User a seperate option to control this.
+                # if self.very_verbose:
+                #     self.error_file.write(".")
+                #    self.error_file.flush()
             
             return row
 
@@ -919,7 +955,7 @@ def main():
                                default="rows")
     parser.add_argument(       "--test-validate", dest="test_validate", help="Validate KgtkValue objects in test.", action='store_true')
 
-    KgtkReaderOptions.add_arguments(parser, mode_options=True)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, validate=True)
     KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
 
diff --git a/kgtk/io/nodereader.py b/kgtk/io/nodereader.py
index 56702a73a..8fee4bd03 100644
--- a/kgtk/io/nodereader.py
+++ b/kgtk/io/nodereader.py
@@ -94,7 +94,7 @@ def main():
     """
     parser = ArgumentParser()
     parser.add_argument(dest="kgtk_file", help="The KGTK edge file to read", type=Path, nargs="?")
-    KgtkReader.add_debug_arguments(parser)
+    KgtkReader.add_debug_arguments(parser, validate=True)
     KgtkReaderOptions.add_arguments(parser)
     KgtkValueOptions.add_arguments(parser)
     args = parser.parse_args()
diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index 1e85e44fa..7ec3878d6 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -217,8 +217,8 @@ def main():
     parser.add_argument(      "--input-keys", dest="input_keys", help="The key columns in the input file.", nargs='*')
     parser.add_argument(      "--filter-keys", dest="filter_keys", help="The key columns in the filter file.", nargs='*')
 
-    KgtkReader.add_file_arguments(parser, mode_options=True, who="input")
-    KgtkReader.add_file_arguments(parser, mode_options=True, who="filter")
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input")
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter")
     KgtkValueOptions.add_arguments(parser)
 
     args: Namespace = parser.parse_args()

From 401fbde11515a78d09570f7a579f6586d49fd96e Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 13:44:39 -0700
Subject: [PATCH 141/278] Finish conversion to more consistent names.

---
 kgtk/join/ifexists.py                  | 10 +++++-----
 kgtk/join/test/ifexists-test1-node1.sh |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index 7ec3878d6..2242a0935 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -124,7 +124,7 @@ def extract_key_set(self, kr: KgtkReader, who: str, key_columns: typing.List[int
     def process(self):
         # Open the input files once.
         if self.verbose:
-            if self.left_file_path is not None:
+            if self.input_file_path is not None:
                 print("Opening the input file: %s" % self.input_file_path, file=self.error_file, flush=True)
             else:
                 print("Reading the input data from stdin", file=self.error_file, flush=True)
@@ -164,9 +164,9 @@ def process(self):
                 print("Keys: %s" % " ".join(key_set), file=self.error_file, flush=True)
 
         if self.verbose:
-            print("Opening the output file: %s" % self.output_path, file=self.error_file, flush=True)
-        ew: KgtkWriter = KgtkWriter.open(left_kr.column_names,
-                                         self.output_path,
+            print("Opening the output file: %s" % self.output_file_path, file=self.error_file, flush=True)
+        ew: KgtkWriter = KgtkWriter.open(input_kr.column_names,
+                                         self.output_file_path,
                                          require_all_columns=False,
                                          prohibit_extra_columns=True,
                                          fill_missing_columns=True,
@@ -204,7 +204,7 @@ def main():
     parser: ArgumentParser = ArgumentParser()
     KgtkReader.add_debug_arguments(parser)
 
-    parser.add_argument(dest="input_file", help="The KGTK file with the input data", type=Path, nargs="?")
+    parser.add_argument(dest="input_file_path", help="The KGTK file with the input data", type=Path, nargs="?")
 
     parser.add_argument(      "--filter-on", dest="filter_file_path", help="The KGTK file with the filter data", type=Path, required=True)
 
diff --git a/kgtk/join/test/ifexists-test1-node1.sh b/kgtk/join/test/ifexists-test1-node1.sh
index 7ff31e58f..b11a66710 100755
--- a/kgtk/join/test/ifexists-test1-node1.sh
+++ b/kgtk/join/test/ifexists-test1-node1.sh
@@ -1,7 +1,7 @@
 #! /bin/sh
 python3 kgtk/join/ifexists.py \
 	kgtk/join/test/ifexists-test1-file1.tsv \
-	kgtk/join/test/ifexists-test1-file2.tsv \
-	--left-keys node1 \
-	--right-keys node1 \
+	--input-keys node1 \
+	--filter-on kgtk/join/test/ifexists-test1-file2.tsv \
+	--filter-keys node1 \
 	--output-file kgtk/join/test/ifexists-test1-node1-output.tsv

From c71fb1fb2916b277ef2f514b9ca416205805dd21 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 13:50:48 -0700
Subject: [PATCH 142/278] Use the debug options.

---
 kgtk/cli/ifexists.py    | 3 ++-
 kgtk/cli/ifnotexists.py | 1 +
 kgtk/join/ifexists.py   | 2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
index eb7d12801..1588f1d4e 100644
--- a/kgtk/cli/ifexists.py
+++ b/kgtk/cli/ifexists.py
@@ -27,7 +27,7 @@ def add_arguments(parser):
     """
     parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
 
-    parser.add_argument(      "--filter-on", dest="_filter_kgtk_file", help="The KGTK file to filter against.", type=Path, required=True)
+    parser.add_argument(      "--filter-on", dest="filter_kgtk_file", help="The KGTK file to filter against.", type=Path, required=True)
 
     parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write", type=Path, default=None)
 
@@ -37,6 +37,7 @@ def add_arguments(parser):
 
     parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=IfExists.FIELD_SEPARATOR_DEFAULT)
 
+    KgtkReader.add_debug_arguments(parser)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input")
     KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter")
     KgtkValueOptions.add_arguments(parser)
diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
index c3789e984..971bdb3d8 100644
--- a/kgtk/cli/ifnotexists.py
+++ b/kgtk/cli/ifnotexists.py
@@ -37,6 +37,7 @@ def add_arguments(parser):
 
     parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=IfExists.FIELD_SEPARATOR_DEFAULT)
 
+    KgtkReader.add_debug_arguments(parser)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input")
     KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter")
     KgtkValueOptions.add_arguments(parser)
diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index 2242a0935..9e5a788c0 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -202,7 +202,6 @@ def main():
     Test the KGTK file joiner.
     """
     parser: ArgumentParser = ArgumentParser()
-    KgtkReader.add_debug_arguments(parser)
 
     parser.add_argument(dest="input_file_path", help="The KGTK file with the input data", type=Path, nargs="?")
 
@@ -217,6 +216,7 @@ def main():
     parser.add_argument(      "--input-keys", dest="input_keys", help="The key columns in the input file.", nargs='*')
     parser.add_argument(      "--filter-keys", dest="filter_keys", help="The key columns in the filter file.", nargs='*')
 
+    KgtkReader.add_debug_arguments(parser)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input")
     KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter")
     KgtkValueOptions.add_arguments(parser)

From 155af7bbfc3c5605c95b421b97afb78c1b82463d Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 13:52:32 -0700
Subject: [PATCH 143/278] Ad a missing prefix for a --help feedback message.

---
 kgtk/io/kgtkreader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index 153447899..2d7e1de45 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -160,7 +160,7 @@ def add_arguments(cls,
                             help=prefix3 + "The action to take when a column name is unsafe (default=%(default)s).",
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
 
-        lgroup: _ArgumentGroup = parser.add_argument_group("Line parsing", "Options affecting " + prefix4 + "data line parsing")
+        lgroup: _ArgumentGroup = parser.add_argument_group(prefix3 + "Line parsing", "Options affecting " + prefix4 + "data line parsing")
 
         lgroup.add_argument(prefix1 + "repair-and-validate-lines",
                             dest=prefix2 + "repair_and_validate_lines",

From 50b732a092ce951a060803a33a298400e855d720 Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Mon, 11 May 2020 16:33:41 -0700
Subject: [PATCH 144/278] remove unused function

---
 kgtk/cli/gt_loader.py | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/kgtk/cli/gt_loader.py b/kgtk/cli/gt_loader.py
index 00fa84404..400ea8fab 100644
--- a/kgtk/cli/gt_loader.py
+++ b/kgtk/cli/gt_loader.py
@@ -9,20 +9,6 @@ def parser():
     }
 
 
-def convert_scientific_notation(num):
-    if isinstance(num, float):
-        num = str(num)
-        if 'e' in num:
-            vals = num.split('e')
-            formatter = int(vals[1].replace('-', '')) + 2
-            try:
-                return "{:.{formatter}f}".format(float(num), formatter=formatter)
-            except:
-                print(num, vals, formatter)
-                raise
-    return num
-
-
 def add_arguments(parser):
     """
     Parse arguments
@@ -177,7 +163,7 @@ def infer_predicate(h, options=[]):
                 for vprop in G2.vertex_properties.keys():
                     if vprop == id_col: continue
                     sys.stdout.write(
-                        '%s\t%s\t%s\t%s\n' % (v_id, v_prop_dict[vprop], convert_scientific_notation(G2.vp[vprop][v]),
+                        '%s\t%s\t%s\t%s\n' % (v_id, v_prop_dict[vprop], G2.vp[vprop][v],
                                               '{}-{}-{}'.format(v_id, v_prop_dict[vprop], id_count)))
                     id_count += 1
 

From 0bb56ec96c84bba19770df79f2c4343c360b0b1c Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Mon, 11 May 2020 16:33:59 -0700
Subject: [PATCH 145/278] choose type wisely

---
 kgtk/triple_generator.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 1ed5b5414..2afaff4e9 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -19,6 +19,7 @@
     ExternalIdentifier,
     URLValue
 )
+from etk.knowledge_graph.node import LiteralType
 
 BAD_CHARS = [":", "-", "&", ",", " ",
              "(", ")", "\'", '\"', "/", "\\", "[", "]", ";", "|"]
@@ -199,6 +200,12 @@ def generate_prop_declaration_triple(self, node1: str, label: str, node2: str) -
         self.doc.kg.add_subject(prop)
         return True
 
+    @staticmethod
+    def xsd_number_type(num):
+        if isinstance(num, float) and 'e' in str(num).lower():
+            return LiteralType.double
+        return LiteralType.decimal
+
     def generate_normal_triple(
             self, node1: str, label: str, node2: str, is_qualifier_edge: bool, e_id: str) -> bool:
         if self.use_id:
@@ -268,20 +275,22 @@ def generate_normal_triple(
             amount, lower_bound, upper_bound, unit = res
 
             amount = TripleGenerator.clean_number_string(amount)
+            num_type = self.xsd_number_type(amount)
+            print(amount, num_type)
             lower_bound = TripleGenerator.clean_number_string(lower_bound)
             upper_bound = TripleGenerator.clean_number_string(upper_bound)
             if unit != None:
                 if upper_bound != None and lower_bound != None:
                     object = QuantityValue(amount, unit=Item(
-                        unit), upper_bound=upper_bound, lower_bound=lower_bound)
+                        unit), upper_bound=upper_bound, lower_bound=lower_bound, type=num_type)
                 else:
-                    object = QuantityValue(amount, unit=Item(unit))
+                    object = QuantityValue(amount, unit=Item(unit), type=num_type)
             else:
                 if upper_bound != None and lower_bound != None:
                     object = QuantityValue(
-                        amount, upper_bound=upper_bound, lower_bound=lower_bound)
+                        amount, upper_bound=upper_bound, lower_bound=lower_bound, type=num_type)
                 else:
-                    object = QuantityValue(amount)
+                    object = QuantityValue(amount, type=num_type)
 
         elif edge_type == MonolingualText:
             text_string, lang = TripleGenerator.process_text_string(node2)

From 1a4b38cc209165845bb4d77ae43bf395c3361c7c Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Mon, 11 May 2020 16:34:25 -0700
Subject: [PATCH 146/278] remove strict version for etk and rdflib

---
 requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 27e52a2c2..54ba2f65e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,8 +8,8 @@ sh
 sklearn
 SPARQLWrapper
 tqdm
-rdflib==5.0.0
-etk==2.2.1
+rdflib
+etk
 simplejson
 pyrallel.lib
 attrs

From 6173cbaac5a3a1298e2defa80ae1dd3fa4aa50b1 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 17:07:47 -0700
Subject: [PATCH 147/278] Implement expert mode.

---
 kgtk/cli/clean_data.py         |  12 ++--
 kgtk/cli/ifexists.py           |  29 ++++++---
 kgtk/cli/ifnotexists.py        |  26 ++++++--
 kgtk/cli/validate.py           |  12 ++--
 kgtk/cli_entry.py              |   6 +-
 kgtk/io/edgereader.py          |   6 +-
 kgtk/io/kgtkreader.py          | 106 +++++++++++++++++++++------------
 kgtk/io/nodereader.py          |   6 +-
 kgtk/value/kgtkvalueoptions.py |  68 +++++++++++++--------
 9 files changed, 180 insertions(+), 91 deletions(-)

diff --git a/kgtk/cli/clean_data.py b/kgtk/cli/clean_data.py
index e52a4f518..bb497eea4 100644
--- a/kgtk/cli/clean_data.py
+++ b/kgtk/cli/clean_data.py
@@ -6,10 +6,12 @@
 
 """
 
+from argparse import Namespace, SUPPRESS
 from pathlib import Path
 import sys
 import typing
 
+from kgtk.cli_argparse import KGTKArgumentParser
 from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions
@@ -20,18 +22,20 @@ def parser():
     }
 
 
-def add_arguments(parser):
+def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace):
     """
     Parse arguments
     Args:
         parser (argparse.ArgumentParser)
     """
+    _expert: bool = parsed_shared_args._expert
+
     parser.add_argument(      "input_file", nargs="?", help="The KGTK file to read.  May be omitted or '-' for stdin.", type=Path)
     parser.add_argument(      "output_file", nargs="?", help="The KGTK file to write.  May be omitted or '-' for stdout.", type=Path)
     
-    KgtkReader.add_debug_arguments(parser)
-    KgtkReaderOptions.add_arguments(parser, mode_options=True)
-    KgtkValueOptions.add_arguments(parser)
+    KgtkReader.add_debug_arguments(parser, expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=True)
+    KgtkValueOptions.add_arguments(parser, expert=True)
 
 
 def run(input_file: typing.Optional[Path],
diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
index 1588f1d4e..9e62ec60e 100644
--- a/kgtk/cli/ifexists.py
+++ b/kgtk/cli/ifexists.py
@@ -4,10 +4,12 @@
 TODO: Need KgtkWriterOptions
 """
 
+from argparse import Namespace, SUPPRESS
 from pathlib import Path
 import sys
 import typing
 
+from kgtk.cli_argparse import KGTKArgumentParser
 from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
 from kgtk.join.ifexists import IfExists
@@ -15,16 +17,29 @@
 
 def parser():
     return {
-        'help': 'Filter a KGTK file based on whether one or more records exist in a second KGTK file with matching values for one or more fields.'
+        'help': 'Filter a KGTK file',
+        'description': 'Filter a KGTK file based on whether one or more records exist in a second KGTK file with matching values for one or more fields.'
     }
 
 
-def add_arguments(parser):
+def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace):
     """
     Parse arguments
     Args:
         parser (argparse.ArgumentParser)
     """
+
+    _expert: bool = parsed_shared_args._expert
+
+    # This helper function makes it easy to suppress options from
+    # The help message.  The options are still there, and initialize
+    # what they need to initialize.
+    def h(msg: str)->str:
+        if _expert:
+            return msg
+        else:
+            return SUPPRESS
+
     parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
 
     parser.add_argument(      "--filter-on", dest="filter_kgtk_file", help="The KGTK file to filter against.", type=Path, required=True)
@@ -35,12 +50,12 @@ def add_arguments(parser):
 
     parser.add_argument(      "--filter-keys", "--right-keys", dest="filter_keys", help="The key columns in the filter-on file.", nargs='*')
 
-    parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=IfExists.FIELD_SEPARATOR_DEFAULT)
+    parser.add_argument(      "--field-separator", dest="field_separator", help=h("Separator for multifield keys"), default=IfExists.FIELD_SEPARATOR_DEFAULT)
 
-    KgtkReader.add_debug_arguments(parser)
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input")
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter")
-    KgtkValueOptions.add_arguments(parser)
+    KgtkReader.add_debug_arguments(parser, expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input", expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter", expert=_expert)
+    KgtkValueOptions.add_arguments(parser, expert=_expert)
 
 def run(input_kgtk_file: typing.Optional[Path],
         filter_kgtk_file: Path,
diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
index 971bdb3d8..964f77dc0 100644
--- a/kgtk/cli/ifnotexists.py
+++ b/kgtk/cli/ifnotexists.py
@@ -4,10 +4,12 @@
 TODO: Need KgtkWriterOptions
 """
 
+from argparse import Namespace, SUPPRESS
 from pathlib import Path
 import sys
 import typing
 
+from kgtk.cli_argparse import KGTKArgumentParser
 from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
 from kgtk.join.ifexists import IfExists
@@ -19,12 +21,24 @@ def parser():
     }
 
 
-def add_arguments(parser):
+def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace):
     """
     Parse arguments
     Args:
         parser (argparse.ArgumentParser)
     """
+    _expert: bool = parsed_shared_args._expert
+
+    # This helper function makes it easy to suppress options from
+    # The help message.  The options are still there, and initialize
+    # what they need to initialize.
+    def h(msg: str)->str:
+        if not _expert:
+            return SUPPRESS
+        else:
+            return msg
+
+
     parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
 
     parser.add_argument(      "--filter-on", dest="_filter_kgtk_file", help="The KGTK file to filter against.", type=Path, required=True)
@@ -35,12 +49,12 @@ def add_arguments(parser):
 
     parser.add_argument(      "--filter-keys", "--right-keys", dest="filter_keys", help="The key columns in the filter-on file.", nargs='*')
 
-    parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=IfExists.FIELD_SEPARATOR_DEFAULT)
+    parser.add_argument(      "--field-separator", dest="field_separator", help=h("Separator for multifield keys"), default=IfExists.FIELD_SEPARATOR_DEFAULT)
 
-    KgtkReader.add_debug_arguments(parser)
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input")
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter")
-    KgtkValueOptions.add_arguments(parser)
+    KgtkReader.add_debug_arguments(parser, expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input", expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter", expert=_expert)
+    KgtkValueOptions.add_arguments(parser, expert=_expert)
 
 def run(input_kgtk_file: typing.Optional[Path],
         filter_kgtk_file: Path,
diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index f61db403a..9e16bd90d 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -11,10 +11,12 @@
 This program does not validate individual fields.
 """
 
+from argparse import Namespace
 from pathlib import Path
 import sys
 import typing
 
+from kgtk.cli_argparse import KGTKArgumentParser
 from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
@@ -24,20 +26,22 @@ def parser():
     }
 
 
-def add_arguments(parser):
+def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace):
     """
     Parse arguments
     Args:
         parser (argparse.ArgumentParser)
     """
+    _expert: bool = parsed_shared_args._expert
+
     parser.add_argument(      "kgtk_files", nargs="*", help="The KGTK file(s) to validate. May be omitted or '-' for stdin.", type=Path)
 
     parser.add_argument(      "--header-only", dest="header_only",
                               help="Process the only the header of the input file.", action="store_true")
 
-    KgtkReader.add_debug_arguments(parser)
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, validate=True)
-    KgtkValueOptions.add_arguments(parser)
+    KgtkReader.add_debug_arguments(parser, expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=True)
+    KgtkValueOptions.add_arguments(parser, expert=True)
 
 
 def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
diff --git a/kgtk/cli_entry.py b/kgtk/cli_entry.py
index 5baa9976b..007bca01f 100644
--- a/kgtk/cli_entry.py
+++ b/kgtk/cli_entry.py
@@ -49,6 +49,7 @@ def cli_entry(*args):
     )
     shared_args = base_parser.add_argument_group('shared optional arguments')
     shared_args.add_argument('--debug', dest='_debug', action='store_true', default=False, help='enable debug mode')
+    shared_args.add_argument('--expert', dest='_expert', action='store_true', default=False, help='enable expert mode')
     add_shared_arguments(shared_args)
 
     # parse shared arguments
@@ -70,7 +71,10 @@ def cli_entry(*args):
         mod = importlib.import_module('.{}'.format(h), 'kgtk.cli')
         sub_parser = sub_parsers.add_parser(h, **mod.parser())
         add_default_arguments(sub_parser)  # call this before adding other arguments
-        mod.add_arguments(sub_parser)
+        if hasattr(mod, "add_arguments_extended"):
+            mod.add_arguments_extended(sub_parser, parsed_shared_args)
+        else:
+            mod.add_arguments(sub_parser)
 
     # add root level usage after sub-parsers are created
     # this won't pollute help info in sub-parsers
diff --git a/kgtk/io/edgereader.py b/kgtk/io/edgereader.py
index 835177a1d..c1234ea78 100644
--- a/kgtk/io/edgereader.py
+++ b/kgtk/io/edgereader.py
@@ -110,9 +110,9 @@ def main():
     """
     parser = ArgumentParser()
     parser.add_argument(dest="kgtk_file", help="The KGTK edge file to read", type=Path, nargs="?")
-    KgtkReader.add_debug_arguments(parser)
-    KgtkReaderOptions.add_arguments(parser, validate=True)
-    KgtkValueOptions.add_arguments(parser)
+    KgtkReader.add_debug_arguments(parser, expert=True)
+    KgtkReaderOptions.add_arguments(parser, validate_by_default=True, expert=True)
+    KgtkValueOptions.add_arguments(parser, expert=True)
     args = parser.parse_args()
 
     error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index 2d7e1de45..80518a5e8 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -16,7 +16,7 @@
 
 """
 
-from argparse import ArgumentParser, _ArgumentGroup, Namespace
+from argparse import ArgumentParser, _ArgumentGroup, Namespace, SUPPRESS
 import attr
 import bz2
 from enum import Enum
@@ -101,130 +101,145 @@ class KgtkReaderOptions():
     def add_arguments(cls,
                       parser: ArgumentParser,
                       mode_options: bool = False,
-                      validate: bool = False,
+                      validate_by_default: bool = False,
+                      expert: bool = False,
                       who: str = ""):
+
+        # This helper function makes it easy to suppress options from
+        # The help message.  The options are still there, and initialize
+        # what they need to initialize.
+        def h(msg: str)->str:
+            if expert:
+                return msg
+            else:
+                return SUPPRESS
+
         prefix1: str = "--" if len(who) == 0 else "--" + who + "-"
         prefix2: str = "" if len(who) == 0 else who + "_"
         prefix3: str = "" if len(who) == 0 else who + ": "
         prefix4: str = "" if len(who) == 0 else who + " file "
 
-        fgroup: _ArgumentGroup = parser.add_argument_group(prefix3 + "File options",
-                                                           "Options affecting " + prefix4 + "processing")
+        fgroup: _ArgumentGroup = parser.add_argument_group(h(prefix3 + "File options"),
+                                                           h("Options affecting " + prefix4 + "processing"))
         fgroup.add_argument(prefix1 + "column-separator",
                             dest=prefix2 + "column_separator",
-                            help=prefix3 + "Column separator (default=<TAB>).", # TODO: provide the default with escapes, e.g. \t
+                            help=h(prefix3 + "Column separator (default=<TAB>)."), # TODO: provide the default with escapes, e.g. \t
                             type=str, default=KgtkFormat.COLUMN_SEPARATOR)
 
         fgroup.add_argument(prefix1 + "compression-type",
                             dest=prefix2 + "compression_type",
-                            help=prefix3 + "Specify the compression type (default=%(default)s).")
+                            help=h(prefix3 + "Specify the compression type (default=%(default)s)."))
 
         fgroup.add_argument(prefix1 + "error-limit",
                             dest=prefix2 + "error_limit",
-                            help=prefix3 + "The maximum number of errors to report before failing (default=%(default)s)",
+                            help=h(prefix3 + "The maximum number of errors to report before failing (default=%(default)s)"),
                             type=int, default=cls.ERROR_LIMIT_DEFAULT)
 
         fgroup.add_argument(prefix1 + "gzip-in-parallel",
                             dest=prefix2 + "gzip_in_parallel",
-                            help=prefix3 + "Execute gzip in parallel (default=%(default)s).", action='store_true')
+                            help=h(prefix3 + "Execute gzip in parallel (default=%(default)s)."),
+                            action='store_true')
 
         fgroup.add_argument(prefix1 + "gzip-queue-size",
                             dest=prefix2 + "gzip_queue_size",
-                            help=prefix3 + "Queue size for parallel gzip (default=%(default)s).",
+                            help=h(prefix3 + "Queue size for parallel gzip (default=%(default)s)."),
                             type=int, default=cls.GZIP_QUEUE_SIZE_DEFAULT)
 
         if mode_options:
             fgroup.add_argument(prefix1 + "mode",
                                 dest=prefix2 + "mode",
-                                help=prefix3 + "Determine the KGTK file mode (default=%(default)s).",
+                                help=h(prefix3 + "Determine the KGTK file mode (default=%(default)s)."),
                                 type=KgtkReaderMode, action=EnumNameAction, default=KgtkReaderMode.AUTO)
             
-        hgroup: _ArgumentGroup = parser.add_argument_group(prefix3 + "Header parsing", "Options affecting " + prefix4 + "header parsing")
+        hgroup: _ArgumentGroup = parser.add_argument_group(h(prefix3 + "Header parsing"),
+                                                           h("Options affecting " + prefix4 + "header parsing"))
 
         hgroup.add_argument(prefix1 + "force-column-names",
                             dest=prefix2 + "force_column_names",
-                            help=prefix3 + "Force the column names (default=None).",
+                            help=h(prefix3 + "Force the column names (default=None)."),
                             nargs='+')
 
         hgroup.add_argument(prefix1 + "header-error-action",
                             dest=prefix2 + "header_error_action",
-                            help=prefix3 + "The action to take when a header error is detected.  Only ERROR or EXIT are supported (default=%(default)s).",
+                            help=h(prefix3 + "The action to take when a header error is detected.  Only ERROR or EXIT are supported (default=%(default)s)."),
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXIT)
 
         hgroup.add_argument(prefix1 + "skip-first-record",
                             dest=prefix2 + "skip_first_record",
-                            help=prefix3 + "Skip the first record when forcing column names (default=%(default)s).", action='store_true')
+                            help=h(prefix3 + "Skip the first record when forcing column names (default=%(default)s)."),
+                            action='store_true')
 
         hgroup.add_argument(prefix1 + "unsafe-column-name-action",
                             dest=prefix2 + "unsafe_column_name_action",
-                            help=prefix3 + "The action to take when a column name is unsafe (default=%(default)s).",
+                            help=h(prefix3 + "The action to take when a column name is unsafe (default=%(default)s)."),
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
 
-        lgroup: _ArgumentGroup = parser.add_argument_group(prefix3 + "Line parsing", "Options affecting " + prefix4 + "data line parsing")
+        lgroup: _ArgumentGroup = parser.add_argument_group(h(prefix3 + "Line parsing"),
+                                                           h("Options affecting " + prefix4 + "data line parsing"))
 
         lgroup.add_argument(prefix1 + "repair-and-validate-lines",
                             dest=prefix2 + "repair_and_validate_lines",
-                            help=prefix3 + "Repair and validate lines (default=%(default)s).",
-                            action='store_true', default=validate)
+                            help=h(prefix3 + "Repair and validate lines (default=%(default)s)."),
+                            action='store_true', default=validate_by_default)
 
         lgroup.add_argument(prefix1 + "do-not-repair-and-validate-lines",
                             dest=prefix2 + "repair_and_validate_lines",
-                            help=prefix3 + "Do not repair and validate lines.",
+                            help=h(prefix3 + "Do not repair and validate lines."),
                             action='store_false')
 
         lgroup.add_argument(prefix1 + "repair-and-validate-values",
                             dest=prefix2 + "repair_and_validate_values",
-                            help=prefix3 + "Repair and validate values (default=%(default)s).",
-                            action='store_true', default=validate)
+                            help=h(prefix3 + "Repair and validate values (default=%(default)s)."),
+                            action='store_true', default=validate_by_default)
 
         lgroup.add_argument(prefix1 + "do-not-repair-and-validate-values",
                             dest=prefix2 + "repair-and-validate_values",
-                            help=prefix3 + "Do not repair and validate values.",
+                            help=h(prefix3 + "Do not repair and validate values."),
                             action='store_false')
 
         lgroup.add_argument(prefix1 + "blank-required-field-line-action",
                             dest=prefix2 + "blank_required_field_line_action",
-                            help=prefix3 + "The action to take when a line with a blank node1, node2, or id field (per mode) is detected (default=%(default)s).",
+                            help=h(prefix3 + "The action to take when a line with a blank node1, node2, or id field (per mode) is detected (default=%(default)s)."),
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
                                   
         lgroup.add_argument(prefix1 + "comment-line-action",
                             dest=prefix2 + "comment_line_action",
-                            help=prefix3 + "The action to take when a comment line is detected (default=%(default)s).",
+                            help=h(prefix3 + "The action to take when a comment line is detected (default=%(default)s)."),
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
 
         lgroup.add_argument(prefix1 + "empty-line-action",
                             dest=prefix2 + "empty_line_action",
-                            help=prefix3 + "The action to take when an empty line is detected (default=%(default)s).",
+                            help=h(prefix3 + "The action to take when an empty line is detected (default=%(default)s)."),
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
 
         lgroup.add_argument(prefix1 + "fill-short-lines",
                             dest=prefix2 + "fill_short_lines",
-                            help=prefix3 + "Fill missing trailing columns in short lines with empty values (default=%(default)s).",
+                            help=h(prefix3 + "Fill missing trailing columns in short lines with empty values (default=%(default)s)."),
                             action='store_true')
 
         lgroup.add_argument(prefix1 + "invalid-value-action",
                             dest=prefix2 + "invalid_value_action",
-                            help=prefix3 + "The action to take when a data cell value is invalid (default=%(default)s).",
+                            help=h(prefix3 + "The action to take when a data cell value is invalid (default=%(default)s)."),
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
 
         lgroup.add_argument(prefix1 + "long-line-action",
                             dest=prefix2 + "long_line_action",
-                            help=prefix3 + "The action to take when a long line is detected (default=%(default)s).",
+                            help=h(prefix3 + "The action to take when a long line is detected (default=%(default)s)."),
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
 
         lgroup.add_argument(prefix1 + "short-line-action",
                             dest=prefix2 + "short_line_action",
-                            help=prefix3 + "The action to take when a short line is detected (default=%(default)s).",
+                            help=h(prefix3 + "The action to take when a short line is detected (default=%(default)s)."),
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
 
         lgroup.add_argument(prefix1 + "truncate-long-lines",
                             dest=prefix2 + "truncate_long_lines",
-                            help=prefix3 + "Remove excess trailing columns in long lines (default=%(default)s).",
+                            help=h(prefix3 + "Remove excess trailing columns in long lines (default=%(default)s)."),
                             action='store_true')
 
         lgroup.add_argument(prefix1 + "whitespace-line-action",
                             dest=prefix2 + "whitespace_line_action",
-                            help=prefix3 + "The action to take when a whitespace line is detected (default=%(default)s).",
+                            help=h(prefix3 + "The action to take when a whitespace line is detected (default=%(default)s)."),
                             type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
     
     @classmethod
@@ -924,16 +939,31 @@ def merge_columns(self, additional_columns: typing.List[str])->typing.List[str]:
         return merged_columns
 
     @classmethod
-    def add_debug_arguments(cls, parser: ArgumentParser):
+    def add_debug_arguments(cls, parser: ArgumentParser, expert: bool = False):
+        # This helper function makes it easy to suppress options from
+        # The help message.  The options are still there, and initialize
+        # what they need to initialize.
+        def h(msg: str)->str:
+            if expert:
+                return msg
+            else:
+                return SUPPRESS
+
+        # TODO: Fix the argparse bug that prevents these two arguments from
+        # having their help messages suppressed.
         errors_to = parser.add_mutually_exclusive_group()
         errors_to.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
-                                     help="Send errors to stdout instead of stderr", action="store_true")
+                                     help="Send errors to stdout instead of stderr",
+                                     action="store_true")
         errors_to.add_argument(      "--errors-to-stderr", dest="errors_to_stderr",
-                                     help="Send errors to stderr instead of stdout", action="store_true")
+                                     help="Send errors to stderr instead of stdout",
+                                     action="store_true")
 
         parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
 
-        parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
+        parser.add_argument(      "--very-verbose", dest="very_verbose",
+                                  help=h("Print additional progress messages."),
+                                  action='store_true')
         
 def main():
     """
@@ -946,7 +976,7 @@ def main():
 
     parser = ArgumentParser()
     parser.add_argument(dest="kgtk_file", help="The KGTK file to read", type=Path, nargs="?")
-    KgtkReader.add_debug_arguments(parser)
+    KgtkReader.add_debug_arguments(parser, expert=True)
     parser.add_argument(       "--test", dest="test_method", help="The test to perform",
                                choices=["rows", "concise-rows",
                                         "kgtk-values", "concise-kgtk-values",
@@ -955,8 +985,8 @@ def main():
                                default="rows")
     parser.add_argument(       "--test-validate", dest="test_validate", help="Validate KgtkValue objects in test.", action='store_true')
 
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, validate=True)
-    KgtkValueOptions.add_arguments(parser)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=True)
+    KgtkValueOptions.add_arguments(parser, expert=True)
     args = parser.parse_args()
 
     error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
diff --git a/kgtk/io/nodereader.py b/kgtk/io/nodereader.py
index 8fee4bd03..4d73ca3a6 100644
--- a/kgtk/io/nodereader.py
+++ b/kgtk/io/nodereader.py
@@ -94,9 +94,9 @@ def main():
     """
     parser = ArgumentParser()
     parser.add_argument(dest="kgtk_file", help="The KGTK edge file to read", type=Path, nargs="?")
-    KgtkReader.add_debug_arguments(parser, validate=True)
-    KgtkReaderOptions.add_arguments(parser)
-    KgtkValueOptions.add_arguments(parser)
+    KgtkReader.add_debug_arguments(parser, expert=True)
+    KgtkReaderOptions.add_arguments(parser, validate_by_default=True, expert=True)
+    KgtkValueOptions.add_arguments(parser, expert=True)
     args = parser.parse_args()
 
     error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
diff --git a/kgtk/value/kgtkvalueoptions.py b/kgtk/value/kgtkvalueoptions.py
index b54046b29..5cb8e7526 100644
--- a/kgtk/value/kgtkvalueoptions.py
+++ b/kgtk/value/kgtkvalueoptions.py
@@ -2,7 +2,7 @@
 KGTK value processing options.
 """
 
-from argparse import ArgumentParser, Namespace
+from argparse import ArgumentParser, Namespace, SUPPRESS
 import attr
 import typing
 
@@ -59,7 +59,12 @@ class KgtkValueOptions:
     
 
     @classmethod
-    def add_arguments(cls, parser: ArgumentParser, who: str = "", desc: str = "."):
+    def add_arguments(cls,
+                      parser: ArgumentParser,
+                      who: str = "",
+                      desc: str = ".",
+                      expert: bool = False,
+    ):
         """Add arguments for KgtkValue option processing.
 
         When "who" is not empty, it prefixes the options, destinations, and
@@ -75,85 +80,98 @@ def add_arguments(cls, parser: ArgumentParser, who: str = "", desc: str = "."):
             prefix2 = who + "_"
             prefix3 = who + ": "
         
-        vgroup = parser.add_argument_group(prefix3 + "Data value parsing", "Options controlling the parsing and processing of KGTK data values" + desc)
+        # This helper function makes it easy to suppress options from
+        # The help message.  The options are still there, and initialize
+        # what they need to initialize.
+        def h(msg: str)->str:
+            if expert:
+                return msg
+            else:
+                return SUPPRESS
+
+        vgroup = parser.add_argument_group(h(prefix3 + "Data value parsing"),
+                                           h("Options controlling the parsing and processing of KGTK data values" + desc))
         vgroup.add_argument(      prefix1 + "additional-language-codes", dest=prefix2 + "additional_language_codes",
-                                  help=prefix3 + "Additional language codes (default=None).",
+                                  help=h(prefix3 + "Additional language codes (default=None)."),
                                   nargs="*", default=None)
 
         lsgroup= vgroup.add_mutually_exclusive_group()
         lsgroup.add_argument(      prefix1 + "allow-language-suffixes", dest=prefix2 + "allow_language_suffixes",
-                                   help=prefix3 + "Allow language identifier suffixes starting with a dash (default=%(default)s).",
+                                   help=h(prefix3 + "Allow language identifier suffixes starting with a dash (default=%(default)s)."),
                                    action='store_true', default=True)
 
         lsgroup.add_argument(      prefix1 + "disallow-language-suffixes", dest=prefix2 + "allow_language_suffixes",
-                                   help=prefix3 + "Disallow language identifier suffixes starting with a dash.",
+                                   help=h(prefix3 + "Disallow language identifier suffixes starting with a dash."),
                                    action='store_false')
 
         laxgroup= vgroup.add_mutually_exclusive_group()
         laxgroup.add_argument(      prefix1 + "allow-lax-strings", dest=prefix2 + "allow_lax_strings",
-                                    help=prefix3 + "Do not check if double quotes are backslashed inside strings (default=%(default)s).",
+                                    help=h(prefix3 + "Do not check if double quotes are backslashed inside strings (default=%(default)s)."),
                                     action='store_true', default=False)
 
         laxgroup.add_argument(      prefix1 + "disallow-lax-strings", dest=prefix2 + "allow_lax_strings",
-                                    help=prefix3 + "Check if double quotes are backslashed inside strings.",
+                                    help=h(prefix3 + "Check if double quotes are backslashed inside strings."),
                                     action='store_false')
 
         lqgroup= vgroup.add_mutually_exclusive_group()
         lqgroup.add_argument(      prefix1 + "allow-lax-lq-strings", dest=prefix2 + "allow_lax_lq_strings",
-                                   help=prefix3 + "Do not check if single quotes are backslashed inside language qualified strings (default=%(default)s).",
+                                   help=h(prefix3 + "Do not check if single quotes are backslashed inside language qualified strings (default=%(default)s)."),
                                    action='store_true', default=False)
 
         lqgroup.add_argument(      prefix1 + "disallow-lax-lq-strings", dest=prefix2 + "allow_lax_lq_strings",
-                                   help=prefix3 + "Check if single quotes are backslashed inside language qualified strings.",
+                                   help=h(prefix3 + "Check if single quotes are backslashed inside language qualified strings."),
                                    action='store_false')
 
         amd0group= vgroup.add_mutually_exclusive_group()
         amd0group.add_argument(      prefix1 + "allow-month-or-day-zero", dest=prefix2 + "allow_month_or_day_zero",
-                                     help=prefix3 + "Allow month or day zero in dates (default=%(default)s).", action='store_true', default=False)
+                                     help=h(prefix3 + "Allow month or day zero in dates (default=%(default)s)."),
+                                     action='store_true', default=False)
 
         amd0group.add_argument(      prefix1 + "disallow-month-or-day-zero", dest=prefix2 + "allow_month_or_day_zero",
-                                     help=prefix3 + "Allow month or day zero in dates.",
+                                     help=h(prefix3 + "Allow month or day zero in dates."),
                                      action='store_false')
 
         rmd0group= vgroup.add_mutually_exclusive_group()
         rmd0group.add_argument(      prefix1 + "repair-month-or-day-zero", dest=prefix2 + "repair_month_or_day_zero",
-                                    help=prefix3 + "Repair month or day zero in dates (default=%(default)s).",
+                                     help=h(prefix3 + "Repair month or day zero in dates (default=%(default)s)."),
                                      action='store_true', default=False)
 
         rmd0group.add_argument(      prefix1 + "no-repair-month-or-day-zero", dest=prefix2 + "repair_month_or_day_zero",
-                                    help=prefix3 + "Do not repair month or day zero in dates.", action='store_false')
+                                     help=h(prefix3 + "Do not repair month or day zero in dates."),
+                                     action='store_false')
 
         vgroup.add_argument(      prefix1 + "minimum-valid-year", dest=prefix2 + "minimum_valid_year",
-                                  help=prefix3 + "The minimum valid year in dates (default=%(default)d).",
+                                  help=h(prefix3 + "The minimum valid year in dates (default=%(default)d)."),
                                   type=int, default=cls.MINIMUM_VALID_YEAR)
 
         vgroup.add_argument(      prefix1 + "maximum-valid-year", dest=prefix2 + "maximum_valid_year",
-                                  help=prefix3 + "The maximum valid year in dates (default=%(default)d).",
+                                  help=h(prefix3 + "The maximum valid year in dates (default=%(default)d)."),
                                   type=int, default=cls.MAXIMUM_VALID_YEAR)
 
         vgroup.add_argument(      prefix1 + "minimum-valid-lat", dest=prefix2 + "minimum_valid_lat",
-                                  help=prefix3 + "The minimum valid latitude (default=%(default)d).",
+                                  help=h(prefix3 + "The minimum valid latitude (default=%(default)d)."),
                                   type=int, default=cls.MINIMUM_VALID_LAT)
 
         vgroup.add_argument(      prefix1 + "maximum-valid-lat", dest=prefix2 + "maximum_valid_lat",
-                                  help=prefix3 + "The maximum valid latitude (default=%(default)d).",
+                                  help=h(prefix3 + "The maximum valid latitude (default=%(default)d)."),
                                   type=int, default=cls.MAXIMUM_VALID_LAT)
 
         vgroup.add_argument(      prefix1 + "minimum-valid-lon", dest=prefix2 + "minimum_valid_lon",
-                                  help=prefix3 + "The minimum valid longitude (default=%(default)d).",
+                                  help=h(prefix3 + "The minimum valid longitude (default=%(default)d)."),
                                   type=int, default=cls.MINIMUM_VALID_LON)
 
         vgroup.add_argument(      prefix1 + "maximum-valid-lon", dest=prefix2 + "maximum_valid_lon",
-                                  help=prefix3 + "The maximum valid longitude (default=%(default)d).",
+                                  help=h(prefix3 + "The maximum valid longitude (default=%(default)d)."),
                                   type=int, default=cls.MAXIMUM_VALID_LON)
 
         elsgroup= vgroup.add_mutually_exclusive_group()
         elsgroup.add_argument(      prefix1 + "escape-list-separators", dest=prefix2 + "escape_list_separators",
-                                    help=prefix3 + "Escape all list separators instead of splitting on them (default=%(default)s).",
+                                    help=h(prefix3 + "Escape all list separators instead of splitting on them (default=%(default)s)."),
                                     action='store_true', default=False)
 
         elsgroup.add_argument(      prefix1 + "no-escape-list-separators", dest=prefix2 + "escape_list_separators",
-                                    help=prefix3 + "Do not escape list separators.", action='store_false')
+                                    help=h(prefix3 + "Do not escape list separators."),
+                                    action='store_false')
 
     @classmethod
     # Build the value parsing option structure.
@@ -184,9 +202,9 @@ def main():
     Test the KGTK value options.
     """
     parser: ArgumentParser = ArgumentParser()
-    KgtkValueOptions.add_arguments(parser)
-    KgtkValueOptions.add_arguments(parser, who="left", desc=" for the left file.")
-    KgtkValueOptions.add_arguments(parser, who="right", desc=" for the right file.")
+    KgtkValueOptions.add_arguments(parser, expert=True)
+    KgtkValueOptions.add_arguments(parser, who="left", desc=" for the left file.", expert=True)
+    KgtkValueOptions.add_arguments(parser, who="right", desc=" for the right file.", expert=True)
     args: Namespace = parser.parse_args()
 
     # Build the value parsing option structure.

From 41b8544cdc35d2e6790bd9fd8f69b1e4e73de31b Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 17:36:08 -0700
Subject: [PATCH 148/278] Implement lookup fallbacks to assist with maintaining
 APi compatability.

---
 kgtk/cli/ifexists.py    | 19 ++++++++-----
 kgtk/cli/ifnotexists.py | 13 ++++++---
 kgtk/io/kgtkreader.py   | 60 ++++++++++++++++++++++++-----------------
 3 files changed, 59 insertions(+), 33 deletions(-)

diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
index 9e62ec60e..69718fbc7 100644
--- a/kgtk/cli/ifexists.py
+++ b/kgtk/cli/ifexists.py
@@ -42,15 +42,22 @@ def h(msg: str)->str:
 
     parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
 
+    parser.add_argument(      "--input-keys", "--left-keys", dest="input_keys", help="The key columns in the file being filtered.", nargs='*')
+
     parser.add_argument(      "--filter-on", dest="filter_kgtk_file", help="The KGTK file to filter against.", type=Path, required=True)
 
-    parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write", type=Path, default=None)
+    parser.add_argument(      "--filter-keys", "--right-keys", dest="filter_keys", help="The key columns in the filter-on file.", nargs='*')
 
-    parser.add_argument(      "--input-keys", "--left-keys", dest="input_keys", help="The key columns in the file being filtered.", nargs='*')
+    parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write", type=Path, default=None)
 
-    parser.add_argument(      "--filter-keys", "--right-keys", dest="filter_keys", help="The key columns in the filter-on file.", nargs='*')
+    # This argument is retained for compatability with earlier versions of this command.
+    parser.add_argument(      "--error-limit", dest="error_limit",
+                              help=h("The maximum number of errors per input fule (default=%(default)s)"),
+                              default=KgtkReaderOptions.ERROR_LIMIT_DEFAULT)
 
-    parser.add_argument(      "--field-separator", dest="field_separator", help=h("Separator for multifield keys"), default=IfExists.FIELD_SEPARATOR_DEFAULT)
+    parser.add_argument(      "--field-separator", dest="field_separator",
+                              help=h("Separator for multifield keys (default=%(default)s)")
+                              , default=IfExists.FIELD_SEPARATOR_DEFAULT)
 
     KgtkReader.add_debug_arguments(parser, expert=_expert)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input", expert=_expert)
@@ -80,8 +87,8 @@ def run(input_kgtk_file: typing.Optional[Path],
     error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr
 
     # Build the option structures.
-    input_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="input")
-    filter_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="filter")
+    input_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="input", fallback=True)
+    filter_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="filter", fallback=True)
     value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
 
     try:
diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
index 964f77dc0..e49a481f7 100644
--- a/kgtk/cli/ifnotexists.py
+++ b/kgtk/cli/ifnotexists.py
@@ -49,7 +49,14 @@ def h(msg: str)->str:
 
     parser.add_argument(      "--filter-keys", "--right-keys", dest="filter_keys", help="The key columns in the filter-on file.", nargs='*')
 
-    parser.add_argument(      "--field-separator", dest="field_separator", help=h("Separator for multifield keys"), default=IfExists.FIELD_SEPARATOR_DEFAULT)
+    # This argument is retained for compatability with earlier versions of this command.
+    parser.add_argument(      "--error-limit", dest="error_limit",
+                              help=h("The maximum number of errors per input fule (default=%(default)s)"),
+                              default=KgtkReaderOptions.ERROR_LIMIT_DEFAULT)
+
+    parser.add_argument(      "--field-separator", dest="field_separator",
+                              help=h("Separator for multifield keys"),
+                              default=IfExists.FIELD_SEPARATOR_DEFAULT)
 
     KgtkReader.add_debug_arguments(parser, expert=_expert)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input", expert=_expert)
@@ -79,8 +86,8 @@ def run(input_kgtk_file: typing.Optional[Path],
     error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr
 
     # Build the option structures.
-    input_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="input")
-    filter_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="filter")
+    input_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="input", fallback=True)
+    filter_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="filter", fallback=True)
     value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
 
     try:
diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index 80518a5e8..ed6de11d9 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -248,39 +248,50 @@ def from_dict(cls,
                   d: dict,
                   who: str = "",
                   mode: typing.Optional[KgtkReaderMode] = None,
+                  fallback: bool = False,
     )->'KgtkReaderOptions':
         prefix: str = ""   # The destination name prefix.
         if len(who) > 0:
             prefix = who + "_"
 
+        # TODO: Figure out how to type check this method.
+        def lookup(name: str, default):
+            prefixed_name = prefix + name
+            if prefixed_name in d:
+                return d[prefixed_name]
+            elif fallback and name in d:
+                return d[name]
+            else:
+                return default
+            
         reader_mode: KgtkReaderMode
-        if mode is None:
-            reader_mode = d.get(prefix + "mode", KgtkReaderMode.AUTO)
-        else:
+        if mode is not None:
             reader_mode = mode
+        else:
+            reader_mode = lookup("mode", KgtkReaderMode.AUTO)
 
         return cls(
-            blank_required_field_line_action=d.get(prefix + "blank_required_field_line_action", ValidationAction.EXCLUDE),
-            column_separator=d.get(prefix + "column_separator", KgtkFormat.COLUMN_SEPARATOR),
-            comment_line_action=d.get(prefix + "comment_line_action", ValidationAction.EXCLUDE),
-            compression_type=d.get(prefix + "compression_type", None),
-            empty_line_action=d.get(prefix + "empty_line_action", ValidationAction.EXCLUDE),
-            error_limit=d.get(prefix + "error_limit", cls.ERROR_LIMIT_DEFAULT),
-            fill_short_lines=d.get(prefix + "fill_short_lines", False),
-            force_column_names=d.get(prefix + "force_column_names", None),
-            gzip_in_parallel=d.get(prefix + "gzip_in_parallel", False),
-            gzip_queue_size=d.get(prefix + "gzip_queue_size", KgtkReaderOptions.GZIP_QUEUE_SIZE_DEFAULT),
-            header_error_action=d.get(prefix + "header_error_action", ValidationAction.EXCLUDE),
-            invalid_value_action=d.get(prefix + "invalid_value_action", ValidationAction.REPORT),
-            long_line_action=d.get(prefix + "long_line_action", ValidationAction.EXCLUDE),
+            blank_required_field_line_action=lookup("blank_required_field_line_action", ValidationAction.EXCLUDE),
+            column_separator=lookup("column_separator", KgtkFormat.COLUMN_SEPARATOR),
+            comment_line_action=lookup("comment_line_action", ValidationAction.EXCLUDE),
+            compression_type=lookup("compression_type", None),
+            empty_line_action=lookup("empty_line_action", ValidationAction.EXCLUDE),
+            error_limit=lookup("error_limit", cls.ERROR_LIMIT_DEFAULT),
+            fill_short_lines=lookup("fill_short_lines", False),
+            force_column_names=lookup("force_column_names", None),
+            gzip_in_parallel=lookup("gzip_in_parallel", False),
+            gzip_queue_size=lookup("gzip_queue_size", KgtkReaderOptions.GZIP_QUEUE_SIZE_DEFAULT),
+            header_error_action=lookup("header_error_action", ValidationAction.EXCLUDE),
+            invalid_value_action=lookup("invalid_value_action", ValidationAction.REPORT),
+            long_line_action=lookup("long_line_action", ValidationAction.EXCLUDE),
             mode=reader_mode,
-            repair_and_validate_lines=d.get(prefix + "repair_and_validate_lines", False),
-            repair_and_validate_values=d.get(prefix + "repair_and_validate_values", False),
-            short_line_action=d.get(prefix + "short_line_action", ValidationAction.EXCLUDE),
-            skip_first_record=d.get(prefix + "skip_first_recordb", False),
-            truncate_long_lines=d.get(prefix + "truncate_long_lines", False),
-            unsafe_column_name_action=d.get(prefix + "unsafe_column_name_action", ValidationAction.REPORT),
-            whitespace_line_action=d.get(prefix + "whitespace_line_action", ValidationAction.EXCLUDE),
+            repair_and_validate_lines=lookup("repair_and_validate_lines", False),
+            repair_and_validate_values=lookup("repair_and_validate_values", False),
+            short_line_action=lookup("short_line_action", ValidationAction.EXCLUDE),
+            skip_first_record=lookup("skip_first_recordb", False),
+            truncate_long_lines=lookup("truncate_long_lines", False),
+            unsafe_column_name_action=lookup("unsafe_column_name_action", ValidationAction.REPORT),
+            whitespace_line_action=lookup("whitespace_line_action", ValidationAction.EXCLUDE),
         )
 
     @classmethod
@@ -289,8 +300,9 @@ def from_args(cls,
                   args: Namespace,
                   who: str = "",
                   mode: typing.Optional[KgtkReaderMode] = None,
+                  fallback: bool = False,
     )->'KgtkReaderOptions':
-        return cls.from_dict(vars(args), who=who, mode=mode)
+        return cls.from_dict(vars(args), who=who, mode=mode, fallback=fallback)
 
 DEFAULT_KGTK_READER_OPTIONS: KgtkReaderOptions = KgtkReaderOptions()
 

From b5a14ce1e59e7016666371e9f92492fc86b16e0d Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Mon, 11 May 2020 17:53:25 -0700
Subject: [PATCH 149/278] remove print

---
 kgtk/triple_generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 2afaff4e9..6eb538add 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -276,7 +276,7 @@ def generate_normal_triple(
 
             amount = TripleGenerator.clean_number_string(amount)
             num_type = self.xsd_number_type(amount)
-            print(amount, num_type)
+
             lower_bound = TripleGenerator.clean_number_string(lower_bound)
             upper_bound = TripleGenerator.clean_number_string(upper_bound)
             if unit != None:

From b03678c5f3e46e5e970c51277917f112e2078c49 Mon Sep 17 00:00:00 2001
From: Naren <naren954@gmail.com>
Date: Mon, 11 May 2020 18:19:15 -0700
Subject: [PATCH 150/278] improve speed of reachibility command

---
 kgtk/cli/reachable_nodes.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/kgtk/cli/reachable_nodes.py b/kgtk/cli/reachable_nodes.py
index fa988ae5b..5998ab361 100644
--- a/kgtk/cli/reachable_nodes.py
+++ b/kgtk/cli/reachable_nodes.py
@@ -25,7 +25,7 @@ def add_arguments(parser):
     parser.add_argument("--subj", action="store", type=int, dest="sub", help='Column in which the subject is given, default 0', default=0)
     parser.add_argument("--obj", action="store", type=int, dest="obj", help='Column in which the subject is given, default 2', default=2)
     parser.add_argument("--pred",action="store" ,type=int, dest="pred",help='Column in which predicate is given, default 1',default=1)
-    parser.add_argument("--props", action="store", type=str, dest="props",help='Properties to consider while finding reachable nodes - comma-separated string',default=None)
+    parser.add_argument("--props", action="store", type=str, dest="props",help='Properties to consider while finding reachable nodes - comma-separated string,default all properties',default=None)
     parser.add_argument('--undirected', action='store_true', dest="undirected", help="Option to specify graph as undirected?")
 
 
@@ -56,8 +56,8 @@ def get_edges_by_edge_prop(g, p, v):
     label='c'+str(find_pred_position(sub,pred,obj))
     header=['node1','label','node2']
     root_set=set()
-    root_list=[]
     property_list=[]
+
     if (rootfile):
         tsv_file = open(rootfile)
         read_tsv = csv.reader(tsv_file, delimiter="\t")
@@ -71,23 +71,24 @@ def get_edges_by_edge_prop(g, p, v):
     if (root):
         for r in root.split(','):
             root_set.add(r)
-    root_list=list(root_set)
-    property_list = [item for item in props.split(',')]
+
     G = load_graph_from_csv(filename,not(undirected),skip_first=not(header_bool),hashed=True,csv_options={'delimiter': '\t'},ecols=(sub,obj))
 
     name = G.vp["name"]
 
     index_list = []
     for v in G.vertices():
-        if name[v] in root_list:
+        if name[v] in root_set:
             index_list.append(v)
 
     edge_filter_set = set()
-    for prop in property_list:
-        edge_filter_set.update(get_edges_by_edge_prop(G, label,prop));
-        
-    G.clear_edges()
-    G.add_edge_list(list(edge_filter_set))
+    if props:
+        property_list = [item for item in props.split(',')]
+        for prop in property_list:
+            edge_filter_set.update(get_edges_by_edge_prop(G, label,prop));        
+        G.clear_edges()
+        G.add_edge_list(list(edge_filter_set))
+
     if output:
         f=open(output,'w')
         tsv_writer = csv.writer(f, quoting=csv.QUOTE_NONE,delimiter="\t",escapechar="\n",quotechar='')

From f2fff9ce7e7d6a506733e0531feb43d5e1a54ba1 Mon Sep 17 00:00:00 2001
From: greatyyx <bigyyx@gmail.com>
Date: Mon, 11 May 2020 18:45:37 -0700
Subject: [PATCH 151/278] fix broken pipe while calling sh in filter

---
 kgtk/cli/filter.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/kgtk/cli/filter.py b/kgtk/cli/filter.py
index 8eb4282e6..23fc3232f 100644
--- a/kgtk/cli/filter.py
+++ b/kgtk/cli/filter.py
@@ -62,5 +62,8 @@ def prepare_filter(property, prop_pattern):
             elif not sys.stdin.isatty():
                 sh.mlr('--%slite' % datatype, 'filter', filter_str, 
                         _in=sys.stdin, _out=sys.stdout, _err=sys.stderr)
-    except:
-        raise KGTKException
+    except sh.SignalException_SIGPIPE:
+        # handles SIGPIPE, if it raises to upper level, it will cause another error
+        pass
+    except Exception as e:
+        raise KGTKException(e)

From fb0351e2276625f4327c18a3cbfbbce10d68dd90 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 20:06:54 -0700
Subject: [PATCH 152/278] Add error_file to kgtkjoiner.

---
 kgtk/join/kgtkjoiner.py                       | 86 ++++++++++---------
 kgtk/join/test/edgejoiner-test1-inner.sh      |  5 --
 ...1-file1.tsv => kgtkjoiner-test1-file1.tsv} |  0
 ...1-file2.tsv => kgtkjoiner-test1-file2.tsv} |  0
 kgtk/join/test/kgtkjoiner-test1-inner.sh      |  5 ++
 5 files changed, 49 insertions(+), 47 deletions(-)
 delete mode 100755 kgtk/join/test/edgejoiner-test1-inner.sh
 rename kgtk/join/test/{edgejoiner-test1-file1.tsv => kgtkjoiner-test1-file1.tsv} (100%)
 rename kgtk/join/test/{edgejoiner-test1-file2.tsv => kgtkjoiner-test1-file2.tsv} (100%)
 create mode 100755 kgtk/join/test/kgtkjoiner-test1-inner.sh

diff --git a/kgtk/join/kgtkjoiner.py b/kgtk/join/kgtkjoiner.py
index 58f597740..0e7c9dcc8 100644
--- a/kgtk/join/kgtkjoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -55,6 +55,7 @@ class KgtkJoiner(KgtkFormat):
     # value_options: typing.Optional[KgtkValueOptions] = attr.ib(attr.validators.optional(attr.validators.instance_of(KgtkValueOptions)), default=None)
     value_options: typing.Optional[KgtkValueOptions] = attr.ib(default=None)
 
+    error_file: typing.TextIO = attr.ib(default=sys.stderr)
     verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     very_verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
@@ -111,26 +112,26 @@ def build_join_idx_list(self, kr: KgtkReader, who: str, join_columns: typing.Opt
         col_num: int = 1
         if join_columns is not None and len(join_columns) > 0:
             if self.verbose:
-                print("Using %s file join columns: %s" % (who, " ".join(join_columns)), flush=True)
+                print("Using %s file join columns: %s" % (who, " ".join(join_columns)), file=self.error_file, flush=True)
             join_column:str
             for join_column in join_columns:
                 if join_column not in kr.column_name_map:
                     raise ValueError("Join column %s not found in in the %s input file" % (join_column, who))
                 join_idx = kr.column_name_map[join_column]
                 if self.verbose:
-                    print("Join column %d: %s (index %d in the %s input file)" % (col_num, join_column, join_idx, who), flush=True)
+                    print("Join column %d: %s (index %d in the %s input file)" % (col_num, join_column, join_idx, who), file=self.error_file, flush=True)
                 join_idx_list.append(join_idx)
             return join_idx_list
 
         if kr.is_edge_file:
             join_idx = self.node1_column_idx(kr, who)
             if self.verbose:
-                print("Joining on node1 (index %s in the %s input file)" % (join_idx, who), flush=True)
+                print("Joining on node1 (index %s in the %s input file)" % (join_idx, who), file=self.error_file, flush=True)
             join_idx_list.append(join_idx)
         elif kr.is_node_file:
             join_idx = self.id_column_idx(kr, who)
             if self.verbose:
-                print("Joining on id (index %s in the %s input file)" % (join_idx, who), flush=True)
+                print("Joining on id (index %s in the %s input file)" % (join_idx, who), file=self.error_file, flush=True)
             join_idx_list.append(join_idx)
         else:
             raise ValueError("Unknown file type in build_join_idx_list(...)")
@@ -141,21 +142,21 @@ def build_join_idx_list(self, kr: KgtkReader, who: str, join_columns: typing.Opt
                 if kr.label_column_idx < 0:
                     raise ValueError("join_on_label may not be used because the %s input file does not have a label column." % who)
                 if self.verbose:
-                    print("Joining on label (index %s in the %s input file)" % (kr.label_column_idx, who), flush=True)
+                    print("Joining on label (index %s in the %s input file)" % (kr.label_column_idx, who), file=self.error_file, flush=True)
                 join_idx_list.append(kr.label_column_idx)
                 
             if self.join_on_node2:
                 if kr.node2_column_idx < 0:
                     raise ValueError("join_on_node2 may not be used because the %s input file does not have a node2 column." % who)
                 if self.verbose:
-                    print("Joining on node2 (index %s in the %s input file)" % (kr.node2_column_idx, who), flush=True)
+                    print("Joining on node2 (index %s in the %s input file)" % (kr.node2_column_idx, who), file=self.error_file, flush=True)
                 join_idx_list.append(kr.node2_column_idx)
         return join_idx_list
         
 
     def extract_join_key_set(self, file_path: Path, who: str, join_idx_list: typing.List[int])->typing.Set[str]:
         if self.verbose:
-            print("Extracting the join key set from the %s input file: %s" % (who, str(file_path)), flush=True)
+            print("Extracting the join key set from the %s input file: %s" % (who, str(file_path)), file=self.error_file, flush=True)
         reader_options: typing.Optional[KgtkReaderOptions]
         if who == self.LEFT:
             reader_options = self.left_reader_options
@@ -185,36 +186,36 @@ def join_key_sets(self, left_join_idx_list: typing.List[int], right_join_idx_lis
         join_key_set: typing.Set[str]
         if self.left_join and self.right_join:
             if self.verbose:
-                print("Outer join, no need to compute join keys.", flush=True)
+                print("Outer join, no need to compute join keys.", file=self.error_file, flush=True)
             return None
         elif self.left_join and not self.right_join:
             if self.verbose:
-                print("Computing the left join key set", flush=True)
+                print("Computing the left join key set", file=self.error_file, flush=True)
             join_key_set = self.extract_join_key_set(self.left_file_path, self.LEFT, left_join_idx_list).copy()
             if self.verbose:
-                print("There are %d keys in the left join key set." % len(join_key_set), flush=True)
+                print("There are %d keys in the left join key set." % len(join_key_set), file=self.error_file, flush=True)
             return join_key_set
 
         elif self.right_join and not self.left_join:
             if self.verbose:
-                print("Computing the right join key set", flush=True)
+                print("Computing the right join key set", file=self.error_file, flush=True)
             join_key_set = self.extract_join_key_set(self.right_file_path, self.RIGHT, right_join_idx_list).copy()
             if self.verbose:
-                print("There are %d keys in the right join key set." % len(join_key_set), flush=True)
+                print("There are %d keys in the right join key set." % len(join_key_set), file=self.error_file, flush=True)
             return join_key_set
 
         else:
             if self.verbose:
-                print("Computing the inner join key set", flush=True)
+                print("Computing the inner join key set", file=self.error_file, flush=True)
             left_join_key_set: typing.Set[str] = self.extract_join_key_set(self.left_file_path, self.LEFT, left_join_idx_list)
             if self.verbose:
-                print("There are %d keys in the left file key set." % len(left_join_key_set), flush=True)
+                print("There are %d keys in the left file key set." % len(left_join_key_set), file=self.error_file, flush=True)
             right_join_key_set: typing.Set[str] = self.extract_join_key_set(self.right_file_path, self.RIGHT, right_join_idx_list)
             if self.verbose:
-                print("There are %d keys in the right file key set." % len(right_join_key_set), flush=True)
+                print("There are %d keys in the right file key set." % len(right_join_key_set), file=self.error_file, flush=True)
             join_key_set = left_join_key_set.intersection(right_join_key_set)
             if self.verbose:
-                print("There are %d keys in the inner join key set." % len(join_key_set), flush=True)
+                print("There are %d keys in the inner join key set." % len(join_key_set), file=self.error_file, flush=True)
             return join_key_set
     
     def merge_columns(self, left_kr: KgtkReader, right_kr: KgtkReader)->typing.Tuple[typing.List[str], typing.List[str]]:
@@ -256,34 +257,34 @@ def merge_columns(self, left_kr: KgtkReader, right_kr: KgtkReader)->typing.Tuple
 
     def process(self):
         if self.verbose:
-            print("Opening the left edge file: %s" % str(self.left_file_path), flush=True)
+            print("Opening the left edge file: %s" % str(self.left_file_path), file=self.error_file, flush=True)
         left_kr: KgtkReader = KgtkReader.open(self.left_file_path,
                                               options=self.left_reader_options,
                                               value_options = self.value_options,
-                                              error_limit=self.error_limit)
+        )
 
 
         if self.verbose:
-            print("Opening the right edge file: %s" % str(self.right_file_path), flush=True)
+            print("Opening the right edge file: %s" % str(self.right_file_path), file=self.error_file, flush=True)
         right_kr: KgtkReader = KgtkReader.open(self.right_file_path,
                                                options=self.right_reader_options,
                                                value_options = self.value_options,
-                                               error_limit=self.error_limit)
+        )
 
         if left_kr.is_edge_file and right_kr.is_edge_file:
             if self.verbose:
-                print("Both input files are edge files.", flush=True)
+                print("Both input files are edge files.", file=self.error_file, flush=True)
         elif left_kr.is_node_file and right_kr.is_node_file:
             if self.verbose:
-                print("Both input files are node files.", flush=True)
+                print("Both input files are node files.", file=self.error_file, flush=True)
         else:
-            print("Cannot join edge and node files.", flush=True)
+            print("Cannot join edge and node files.", file=self.error_file, flush=True)
             return
 
         left_join_idx_list: typing.List[int] = self.build_join_idx_list(left_kr, self.LEFT, self.left_join_columns)
         right_join_idx_list: typing.List[int] = self.build_join_idx_list(right_kr, self.RIGHT, self.right_join_columns)
         if len(left_join_idx_list) != len(right_join_idx_list):
-            print("the left join key has %d components, the right join key has %d columns. Exiting." % (len(left_join_idx_list), len(right_join_idx_list)), flush=True)
+            print("the left join key has %d components, the right join key has %d columns. Exiting." % (len(left_join_idx_list), len(right_join_idx_list)), file=self.error_file, flush=True)
             left_kr.close()
             right_kr.close()
             return
@@ -292,19 +293,19 @@ def process(self):
         joined_key_set: typing.Optional[typing.Set[str]] = self.join_key_sets(left_join_idx_list, right_join_idx_list)
 
         if self.verbose:
-            print("Mapping the column names for the join.", flush=True)
+            print("Mapping the column names for the join.", file=self.error_file, flush=True)
         joined_column_names: typing.List[str]
         right_column_names: typing.List[str]
         (joined_column_names, right_column_names)  = self.merge_columns(left_kr, right_kr)
 
         if self.verbose:
-            print("       left   columns: %s" % " ".join(left_kr.column_names), flush=True)
-            print("       right  columns: %s" % " ".join(right_kr.column_names), flush=True)
-            print("mapped right  columns: %s" % " ".join(right_column_names), flush=True)
-            print("       joined columns: %s" % " ".join(joined_column_names), flush=True)
+            print("       left   columns: %s" % " ".join(left_kr.column_names), file=self.error_file, flush=True)
+            print("       right  columns: %s" % " ".join(right_kr.column_names), file=self.error_file, flush=True)
+            print("mapped right  columns: %s" % " ".join(right_column_names), file=self.error_file, flush=True)
+            print("       joined columns: %s" % " ".join(joined_column_names), file=self.error_file, flush=True)
         
         if self.verbose:
-            print("Opening the output edge file: %s" % str(self.output_path), flush=True)
+            print("Opening the output edge file: %s" % str(self.output_path), file=self.error_file, flush=True)
         ew: KgtkWriter = KgtkWriter.open(joined_column_names,
                                          self.output_path,
                                          require_all_columns=False,
@@ -321,7 +322,7 @@ def process(self):
         right_data_lines_kept: int = 0
         
         if self.verbose:
-            print("Processing the left input file: %s" % str(self.left_file_path), flush=True)
+            print("Processing the left input file: %s" % str(self.left_file_path), file=self.error_file, flush=True)
         row: typing.list[str]
         for row in left_kr:
             left_data_lines_read += 1
@@ -339,7 +340,7 @@ def process(self):
         ew.flush()
 
         if self.verbose:
-            print("Processing the right input file: %s" % str(self.right_file_path), flush=True)
+            print("Processing the right input file: %s" % str(self.right_file_path), file=self.error_file, flush=True)
         right_shuffle_list: typing.List[int] = ew.build_shuffle_list(right_column_names)
         for row in right_kr:
             right_data_lines_read += 1
@@ -356,10 +357,10 @@ def process(self):
             
         ew.close()
         if self.verbose:
-            print("The join is complete", flush=True)
-            print("%d left input data lines read, %d kept" % (left_data_lines_read, left_data_lines_kept), flush=True)
-            print("%d right input data lines read, %d kept" % (right_data_lines_read, right_data_lines_kept), flush=True)
-            print("%d data lines written." % output_data_lines, flush=True)
+            print("The join is complete", file=self.error_file, flush=True)
+            print("%d left input data lines read, %d kept" % (left_data_lines_read, left_data_lines_kept), file=self.error_file, flush=True)
+            print("%d right input data lines read, %d kept" % (right_data_lines_read, right_data_lines_kept), file=self.error_file, flush=True)
+            print("%d data lines written." % output_data_lines, file=self.error_file, flush=True)
         
 def main():
     """
@@ -385,15 +386,15 @@ def main():
     parser.add_argument(      "--right-file-join-columns", dest="right_join_columns", help="Right file join columns.", nargs='+')
     parser.add_argument(      "--right-join", dest="right_join", help="Perform a right outer join.", action='store_true')
 
-    parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
-    parser.add_argument(      "--very-verbose", dest="very_verbose", help="Print additional progress messages.", action='store_true')
-
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, who=KgtkJoiner.LEFT)
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, who=KgtkJoiner.RIGHT)
-    KgtkValueOptions.add_arguments(parser)
+    KgtkReader.add_debug_arguments(parser, expert=True)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who=KgtkJoiner.LEFT, expert=True)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who=KgtkJoiner.RIGHT, expert=True)
+    KgtkValueOptions.add_arguments(parser, expert=True)
 
     args = parser.parse_args()
 
+    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
+
     # Build the option structures.
     left_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, who=KgtkJoiner.LEFT)
     right_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, who=KgtkJoiner.RIGHT)
@@ -413,6 +414,7 @@ def main():
                                 left_reader_options=left_reader_options,
                                 right_reader_options=right_reader_options,
                                 value_options=value_options,
+                                error_file=error_file,
                                 verbose=args.verbose,
                                 very_verbose=args.very_verbose)
 
diff --git a/kgtk/join/test/edgejoiner-test1-inner.sh b/kgtk/join/test/edgejoiner-test1-inner.sh
deleted file mode 100755
index a4461d54d..000000000
--- a/kgtk/join/test/edgejoiner-test1-inner.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#! /bin/sh
-python3 kgtk/join/edgejoiner.py \
-	kgtk/join/test/edgejoiner-test1-file1.tsv \
-	kgtk/join/test/edgejoiner-test1-file2.tsv \
-	--output-file kgtk/join/test/edgejoiner-test1-inner-output.tsv
diff --git a/kgtk/join/test/edgejoiner-test1-file1.tsv b/kgtk/join/test/kgtkjoiner-test1-file1.tsv
similarity index 100%
rename from kgtk/join/test/edgejoiner-test1-file1.tsv
rename to kgtk/join/test/kgtkjoiner-test1-file1.tsv
diff --git a/kgtk/join/test/edgejoiner-test1-file2.tsv b/kgtk/join/test/kgtkjoiner-test1-file2.tsv
similarity index 100%
rename from kgtk/join/test/edgejoiner-test1-file2.tsv
rename to kgtk/join/test/kgtkjoiner-test1-file2.tsv
diff --git a/kgtk/join/test/kgtkjoiner-test1-inner.sh b/kgtk/join/test/kgtkjoiner-test1-inner.sh
new file mode 100755
index 000000000..4b02bcd7e
--- /dev/null
+++ b/kgtk/join/test/kgtkjoiner-test1-inner.sh
@@ -0,0 +1,5 @@
+#! /bin/sh
+python3 kgtk/join/kgtkjoiner.py \
+	kgtk/join/test/kgtkjoiner-test1-file1.tsv \
+	kgtk/join/test/kgtkjoiner-test1-file2.tsv \
+	--output-file kgtk/join/test/kgtkjoiner-test1-inner-output.tsv

From 23a134ed142ae7bc68ba93368897549c5434ed53 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 20:46:46 -0700
Subject: [PATCH 153/278] Remove obsolete test.

---
 kgtk/join/kgtkjoiner.py                                     | 3 ---
 kgtk/join/test/edgejoiner-test1-left.sh                     | 6 ------
 kgtk/join/test/kgtkjoiner-test1-left.sh                     | 6 ++++++
 ...odejoiner-test1-file1.tsv => kgtkjoiner-test2-file1.tsv} | 0
 ...odejoiner-test1-file2.tsv => kgtkjoiner-test2-file2.tsv} | 0
 kgtk/join/test/kgtkjoiner-test2-inner.sh                    | 5 +++++
 kgtk/join/test/nodejoiner-test1-inner.sh                    | 5 -----
 7 files changed, 11 insertions(+), 14 deletions(-)
 delete mode 100755 kgtk/join/test/edgejoiner-test1-left.sh
 create mode 100755 kgtk/join/test/kgtkjoiner-test1-left.sh
 rename kgtk/join/test/{nodejoiner-test1-file1.tsv => kgtkjoiner-test2-file1.tsv} (100%)
 rename kgtk/join/test/{nodejoiner-test1-file2.tsv => kgtkjoiner-test2-file2.tsv} (100%)
 create mode 100755 kgtk/join/test/kgtkjoiner-test2-inner.sh
 delete mode 100755 kgtk/join/test/nodejoiner-test1-inner.sh

diff --git a/kgtk/join/kgtkjoiner.py b/kgtk/join/kgtkjoiner.py
index 0e7c9dcc8..1f5aafd1d 100644
--- a/kgtk/join/kgtkjoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -169,9 +169,6 @@ def extract_join_key_set(self, file_path: Path, who: str, join_idx_list: typing.
                                          verbose=self.verbose,
                                          very_verbose=self.very_verbose)
 
-        if not kr.is_edge_file:
-            raise ValueError("The %s file is not an edge file" % who)
-
         if len(join_idx_list) == 1:
             # This uses optimized code:
             return self.single_column_key_set(kr, join_idx_list[0]) # closes er file
diff --git a/kgtk/join/test/edgejoiner-test1-left.sh b/kgtk/join/test/edgejoiner-test1-left.sh
deleted file mode 100755
index 97ddc3f6f..000000000
--- a/kgtk/join/test/edgejoiner-test1-left.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#! /bin/sh
-python3 kgtk/join/edgejoiner.py \
-	kgtk/join/test/edgejoiner-test1-file1.tsv \
-	kgtk/join/test/edgejoiner-test1-file2.tsv \
-	--left-join \
-	--output-file kgtk/join/test/edgejoiner-test1-left-output.tsv
diff --git a/kgtk/join/test/kgtkjoiner-test1-left.sh b/kgtk/join/test/kgtkjoiner-test1-left.sh
new file mode 100755
index 000000000..5db9e4c31
--- /dev/null
+++ b/kgtk/join/test/kgtkjoiner-test1-left.sh
@@ -0,0 +1,6 @@
+#! /bin/sh
+python3 kgtk/join/kgtkjoiner.py \
+	kgtk/join/test/kgtkjoiner-test1-file1.tsv \
+	kgtk/join/test/kgtkjoiner-test1-file2.tsv \
+	--left-join \
+	--output-file kgtk/join/test/kgtkjoiner-test1-left-output.tsv
diff --git a/kgtk/join/test/nodejoiner-test1-file1.tsv b/kgtk/join/test/kgtkjoiner-test2-file1.tsv
similarity index 100%
rename from kgtk/join/test/nodejoiner-test1-file1.tsv
rename to kgtk/join/test/kgtkjoiner-test2-file1.tsv
diff --git a/kgtk/join/test/nodejoiner-test1-file2.tsv b/kgtk/join/test/kgtkjoiner-test2-file2.tsv
similarity index 100%
rename from kgtk/join/test/nodejoiner-test1-file2.tsv
rename to kgtk/join/test/kgtkjoiner-test2-file2.tsv
diff --git a/kgtk/join/test/kgtkjoiner-test2-inner.sh b/kgtk/join/test/kgtkjoiner-test2-inner.sh
new file mode 100755
index 000000000..299b4b089
--- /dev/null
+++ b/kgtk/join/test/kgtkjoiner-test2-inner.sh
@@ -0,0 +1,5 @@
+#! /bin/sh
+python3 kgtk/join/kgtkjoiner.py \
+	kgtk/join/test/kgtkjoiner-test2-file1.tsv \
+	kgtk/join/test/kgtkjoiner-test2-file2.tsv \
+	--output-file kgtk/join/test/kgtkjoiner-test2-inner-output.tsv
diff --git a/kgtk/join/test/nodejoiner-test1-inner.sh b/kgtk/join/test/nodejoiner-test1-inner.sh
deleted file mode 100755
index 6827347f2..000000000
--- a/kgtk/join/test/nodejoiner-test1-inner.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#! /bin/sh
-python3 kgtk/join/nodejoiner.py \
-	kgtk/join/test/nodejoiner-test1-file1.tsv \
-	kgtk/join/test/nodejoiner-test1-file2.tsv \
-	--output-file kgtk/join/test/nodejoiner-test1-inner-output.tsv

From 1cc18e335356628c90662c71927d2db291f6b7d6 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 20:48:07 -0700
Subject: [PATCH 154/278] Updated scripts.

---
 kgtk/join/test/kgtkjoiner-test2-left.sh | 6 ++++++
 kgtk/join/test/nodejoiner-test1-left.sh | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)
 create mode 100755 kgtk/join/test/kgtkjoiner-test2-left.sh
 delete mode 100755 kgtk/join/test/nodejoiner-test1-left.sh

diff --git a/kgtk/join/test/kgtkjoiner-test2-left.sh b/kgtk/join/test/kgtkjoiner-test2-left.sh
new file mode 100755
index 000000000..37ade789e
--- /dev/null
+++ b/kgtk/join/test/kgtkjoiner-test2-left.sh
@@ -0,0 +1,6 @@
+#! /bin/sh
+python3 kgtk/join/kgtkjoiner.py \
+	kgtk/join/test/kgtkjoiner-test2-file1.tsv \
+	kgtk/join/test/kgtkjoiner-test2-file2.tsv \
+	--left-join \
+	--output-file kgtk/join/test/kgtkjoiner-test2-left-output.tsv
diff --git a/kgtk/join/test/nodejoiner-test1-left.sh b/kgtk/join/test/nodejoiner-test1-left.sh
deleted file mode 100755
index c5bee612c..000000000
--- a/kgtk/join/test/nodejoiner-test1-left.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#! /bin/sh
-python3 kgtk/join/nodejoiner.py \
-	kgtk/join/test/nodejoiner-test1-file1.tsv \
-	kgtk/join/test/nodejoiner-test1-file2.tsv \
-	--left-join \
-	--output-file kgtk/join/test/nodejoiner-test1-left-output.tsv

From dda432753ab59e40b191008f7f51d1d1841dc7c2 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 20:50:19 -0700
Subject: [PATCH 155/278] Update the command syntax.

---
 kgtk/join/test/ifexists-test1-default.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/join/test/ifexists-test1-default.sh b/kgtk/join/test/ifexists-test1-default.sh
index a03a36cc0..3a3da6cd0 100755
--- a/kgtk/join/test/ifexists-test1-default.sh
+++ b/kgtk/join/test/ifexists-test1-default.sh
@@ -1,5 +1,5 @@
 #! /bin/sh
 python3 kgtk/join/ifexists.py \
 	kgtk/join/test/ifexists-test1-file1.tsv \
-	kgtk/join/test/ifexists-test1-file2.tsv \
+	--filter-on kgtk/join/test/ifexists-test1-file2.tsv \
 	--output-file kgtk/join/test/ifexists-test1-default-output.tsv

From 088b938864f4775f6d56077d645474a2e1b3dac4 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Mon, 11 May 2020 20:58:18 -0700
Subject: [PATCH 156/278] enable users to define properties in the input kgtk
 file

---
 kgtk/triple_generator.py | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 1ed5b5414..04db7d733 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -43,6 +43,16 @@ def __init__(
     ):
         from etk.wikidata.statement import Rank
         self.ignore = ignore
+        self.datatype_mapping = {
+            "item": Item,
+            "time": TimeValue,
+            "globe-coordinate": GlobeCoordinate,
+            "quantity": QuantityValue,
+            "monolingualtext": MonolingualText,
+            "string": StringValue,
+            "external-identifier": ExternalIdentifier,
+            "url": URLValue
+        }
         self.prop_types = self.set_properties(prop_file)
         self.label_set, self.alias_set, self.description_set = self.set_sets(
             label_set, alias_set, description_set
@@ -68,6 +78,7 @@ def __init__(
         self.order_map = {}
         self.use_id = use_id
 
+
     def _node_2_entity(self, node: str):
         '''
         A node can be Qxxx or Pxxx, return the proper entity.
@@ -79,23 +90,13 @@ def _node_2_entity(self, node: str):
         return entity
 
     def set_properties(self, prop_file: str):
-        datatype_mapping = {
-            "item": Item,
-            "time": TimeValue,
-            "globe-coordinate": GlobeCoordinate,
-            "quantity": QuantityValue,
-            "monolingualtext": MonolingualText,
-            "string": StringValue,
-            "external-identifier": ExternalIdentifier,
-            "url": URLValue
-        }
         with open(prop_file, "r") as fp:
             props = fp.readlines()
         prop_types = {}
         for line in props[1:]:
             node1, _, node2 = line.split("\t")
             try:
-                prop_types[node1] = datatype_mapping[node2.strip()]
+                prop_types[node1] = self.datatype_mapping[node2.strip()]
             except:
                 if not self.ignore:
                     raise KGTKException(
@@ -195,7 +196,11 @@ def generate_alias_triple(self, node1: str, label: str, node2: str) -> bool:
         return True
 
     def generate_prop_declaration_triple(self, node1: str, label: str, node2: str) -> bool:
-        prop = WDProperty(node1, self.prop_types[node1])
+        # update the known prop_types
+        if node1 in self.prop_types:
+            raise KGTKException("Duplicated property definition of {} found!".format(node1))
+        self.prop_types[node1] = node2
+        prop = WDProperty(node1, self.datatype_mapping[node2])
         self.doc.kg.add_subject(prop)
         return True
 
@@ -420,7 +425,7 @@ def entry_point(self, line_number: int, edge: str):
             success = self.generate_description_triple(node1, prop, node2)
         elif prop in self.alias_set:
             success = self.generate_alias_triple(node1, prop, node2)
-        elif prop == "type":
+        elif prop == "data_type":
             # special edge of prop declaration
             success = self.generate_prop_declaration_triple(
                 node1, prop, node2)

From b83a945cc1f44d6bff461c3a914788a986012a41 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 20:59:21 -0700
Subject: [PATCH 157/278] Demonstrate filterint on a quasi-KGTK file without a
 node1 column.

---
 kgtk/join/test/ifexists-test2-file1.tsv          | 7 +++++++
 kgtk/join/test/ifexists-test2-file2.tsv          | 4 ++++
 kgtk/join/test/ifexists-test2-label-and-node2.sh | 8 ++++++++
 3 files changed, 19 insertions(+)
 create mode 100644 kgtk/join/test/ifexists-test2-file1.tsv
 create mode 100644 kgtk/join/test/ifexists-test2-file2.tsv
 create mode 100755 kgtk/join/test/ifexists-test2-label-and-node2.sh

diff --git a/kgtk/join/test/ifexists-test2-file1.tsv b/kgtk/join/test/ifexists-test2-file1.tsv
new file mode 100644
index 000000000..0e648d2c3
--- /dev/null
+++ b/kgtk/join/test/ifexists-test2-file1.tsv
@@ -0,0 +1,7 @@
+node1	label	node2	location
+john	zipcode	12345	home
+john	zipcode	12346	work
+peter	zipcode	12040	home
+peter	zipcode	12041	work
+steve	zipcode	45601	home
+steve	zipcode	45602	work
diff --git a/kgtk/join/test/ifexists-test2-file2.tsv b/kgtk/join/test/ifexists-test2-file2.tsv
new file mode 100644
index 000000000..721f644c0
--- /dev/null
+++ b/kgtk/join/test/ifexists-test2-file2.tsv
@@ -0,0 +1,4 @@
+label	node2
+zipcode	12345
+zipcode	12040
+zipcode	45601
diff --git a/kgtk/join/test/ifexists-test2-label-and-node2.sh b/kgtk/join/test/ifexists-test2-label-and-node2.sh
new file mode 100755
index 000000000..d2fea5d62
--- /dev/null
+++ b/kgtk/join/test/ifexists-test2-label-and-node2.sh
@@ -0,0 +1,8 @@
+#! /bin/sh
+python3 kgtk/join/ifexists.py \
+	kgtk/join/test/ifexists-test2-file1.tsv \
+	--input-keys label node2 \
+	--filter-on kgtk/join/test/ifexists-test2-file2.tsv \
+	--filter-keys label node2 \
+	--filter-mode NONE \
+	--output-file kgtk/join/test/ifexists-test1-node1-output.tsv

From ea67db5e16e7eeac2e3a9d3f2b7e24393e79cc1f Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 21:03:17 -0700
Subject: [PATCH 158/278] Demonstrate kgtk ifexists with a quasi-KGTK filter
 file.

---
 kgtk/join/test/kgtk-ifexists-test2-label-and-node2.sh | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100755 kgtk/join/test/kgtk-ifexists-test2-label-and-node2.sh

diff --git a/kgtk/join/test/kgtk-ifexists-test2-label-and-node2.sh b/kgtk/join/test/kgtk-ifexists-test2-label-and-node2.sh
new file mode 100755
index 000000000..5e8c295ce
--- /dev/null
+++ b/kgtk/join/test/kgtk-ifexists-test2-label-and-node2.sh
@@ -0,0 +1,8 @@
+#! /bin/sh
+python3 -m kgtk ifexists \
+	kgtk/join/test/ifexists-test2-file1.tsv \
+	--input-keys label node2 \
+	--filter-on kgtk/join/test/ifexists-test2-file2.tsv \
+	--filter-keys label node2 \
+	--filter-mode NONE \
+	--output-file kgtk/join/test/ifexists-test1-node1-output.tsv

From f4599401489684b75172d4f6e29a373f49dedc85 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Mon, 11 May 2020 21:09:14 -0700
Subject: [PATCH 159/278] moved datatype_mapping to class attributes

---
 kgtk/triple_generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 04db7d733..e646d6fa2 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -84,7 +84,7 @@ def _node_2_entity(self, node: str):
         A node can be Qxxx or Pxxx, return the proper entity.
         '''
         if node in self.prop_types:
-            entity = WDProperty(node, self.prop_types[node])
+            entity = WDProperty(node, self.datatype_mapping[self.prop_types[node]])
         else:
             entity = WDItem(TripleGenerator.replace_illegal_string(node))
         return entity

From d3358c4ee052bfcfa7714533ebcd741af6623f21 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 11 May 2020 21:15:47 -0700
Subject: [PATCH 160/278] Update column mapping to handle node files as well as
 edge files.

---
 kgtk/join/kgtkjoiner.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/kgtk/join/kgtkjoiner.py b/kgtk/join/kgtkjoiner.py
index 1f5aafd1d..d0d2f0f4c 100644
--- a/kgtk/join/kgtkjoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -226,14 +226,12 @@ def merge_columns(self, left_kr: KgtkReader, right_kr: KgtkReader)->typing.Tuple
 
         idx: int = 0
         for column_name in right_kr.column_names:
-            if idx == right_kr.node1_column_idx:
-                # The right file is an edge file and this is its node1 column index.
-                if left_kr.node1_column_idx >= 0:
-                    # The left file has a node1 column.  Map to that.
-                    column_name = left_kr.column_names[left_kr.node1_column_idx]
-                else:
-                    # Apparently we don't have a destination in the left file.  Punt.
-                    raise ValueError("Can't map right join column name to the left file #2.")
+            if idx == right_kr.id_column_idx and left_kr.id_column_idx >= 0:
+                # Map the id columns to the name used in the left file.
+                column_name = left_kr.column_names[left_kr.id_column_idx]
+            elif idx == right_kr.node1_column_idx and left_kr.node1_column_idx >= 0:
+                # Map the node1 columns to the name used in the left file,
+                column_name = left_kr.column_names[left_kr.node1_column_idx]
             elif idx == right_kr.label_column_idx and left_kr.label_column_idx >= 0:
                 # Map the right file's label column to the left file's label column.
                 column_name = left_kr.column_names[left_kr.label_column_idx]

From 9ad07e98c6fce54a32116338066777ecb3f2a6d1 Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Tue, 12 May 2020 11:04:48 -0700
Subject: [PATCH 161/278] update regex pattern

---
 kgtk/triple_generator.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 61133179c..ddc5d051c 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -73,13 +73,13 @@ def __init__(
         self.yyyy_mm_dd_pattern = re.compile(
             "[12]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])")
         self.yyyy_pattern = re.compile("[12]\d{3}")
+        # self.quantity_pattern = re.compile("([\+|\-]?[0-9]+\.?[0-9]*)(?:\[([\+|\-]?[0-9]+\.?[0-9]*),([\+|\-]?[0-9]+\.?[0-9]*)\])?([U|Q](?:[0-9]+))?")
         self.quantity_pattern = re.compile(
-            "([\+|\-]?[0-9]+\.?[0-9]*)(?:\[([\+|\-]?[0-9]+\.?[0-9]*),([\+|\-]?[0-9]+\.?[0-9]*)\])?([U|Q](?:[0-9]+))?")
+            "([\+|\-]?[0-9]+\.?[0-9]*[e|E]?[\-]?[0-9]*)(?:\[([\+|\-]?[0-9]+\.?[0-9]*),([\+|\-]?[0-9]+\.?[0-9]*)\])?([U|Q](?:[0-9]+))?")
         # order map, know the column index of ["node1","property","node2",id]
         self.order_map = {}
         self.use_id = use_id
 
-
     def _node_2_entity(self, node: str):
         '''
         A node can be Qxxx or Pxxx, return the proper entity.
@@ -276,12 +276,13 @@ def generate_normal_triple(
 
         elif edge_type == QuantityValue:
             # +70[+60,+80]Q743895
+
             res = self.quantity_pattern.match(node2).groups()
             amount, lower_bound, upper_bound, unit = res
 
             amount = TripleGenerator.clean_number_string(amount)
             num_type = self.xsd_number_type(amount)
-
+            
             lower_bound = TripleGenerator.clean_number_string(lower_bound)
             upper_bound = TripleGenerator.clean_number_string(upper_bound)
             if unit != None:

From 599ae0badc76926164eecfe349584b6af6124715 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 12 May 2020 12:38:19 -0700
Subject: [PATCH 162/278] Ensure that all special column indexes are looked up.
  Share code better between KgtkReader and its subclasses.

---
 kgtk/io/edgereader.py |  71 +++++++----------------
 kgtk/io/kgtkbase.py   |  51 +++++++++--------
 kgtk/io/kgtkreader.py | 128 ++++++++++++++++++++----------------------
 kgtk/io/kgtkwriter.py |  20 +++----
 kgtk/io/nodereader.py |  67 +++++++---------------
 5 files changed, 135 insertions(+), 202 deletions(-)

diff --git a/kgtk/io/edgereader.py b/kgtk/io/edgereader.py
index c1234ea78..3225c4579 100644
--- a/kgtk/io/edgereader.py
+++ b/kgtk/io/edgereader.py
@@ -10,7 +10,7 @@
 import sys
 import typing
 
-from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderMode, KgtkReaderOptions
 from kgtk.utils.closableiter import ClosableIter
 from kgtk.utils.enumnameaction import EnumNameAction
 from kgtk.utils.validationaction import ValidationAction
@@ -28,56 +28,24 @@ def open_edge_file(cls,
                        verbose: bool = False,
                        very_verbose: bool = False)->"EdgeReader":
 
-        # Supply the default reader and value options:
-        (options, value_options) = cls._default_options(options, value_options)
-
-        source: ClosableIter[str] = cls._openfile(file_path, options=options, error_file=error_file, verbose=verbose)
-
-        # Read the edge file header and split it into column names.
-        header: str
-        column_names: typing.List[str]
-        (header, column_names) = cls._build_column_names(source, options=options, error_file=error_file, verbose=verbose)
-
-        # Check for unsafe column names.
-        cls.check_column_names(column_names,
-                               header_line=header,
-                               error_action=options.unsafe_column_name_action,
-                               error_file=error_file)
-
-        # Build a map from column name to column index.
-        column_name_map: typing.Mapping[str, int] = cls.build_column_name_map(column_names,
-                                                                              header_line=header,
-                                                                              error_action=options.header_error_action,
-                                                                              error_file=error_file)
-        # Get the indices of the required columns.
-        node1_column_idx: int
-        node2_column_idx: int
-        label_column_idx: int
-        (node1_column_idx, node2_column_idx, label_column_idx) = cls.required_edge_columns(column_name_map,
-                                                                                           header_line=header,
-                                                                                           error_action=options.header_error_action,
-                                                                                           error_file=error_file)
-
-        if verbose:
-            print("EdgeReader: Reading an edge file. node1=%d label=%d node2=%d" % (node1_column_idx, label_column_idx, node2_column_idx))
-
-
-        return cls(file_path=file_path,
-                   source=source,
-                   column_names=column_names,
-                   column_name_map=column_name_map,
-                   column_count=len(column_names),
-                   node1_column_idx=node1_column_idx,
-                   node2_column_idx=node2_column_idx,
-                   label_column_idx=label_column_idx,
-                   error_file=error_file,
-                   options=options,
-                   value_options=value_options,
-                   is_edge_file=True,
-                   is_node_file=False,
-                   verbose=verbose,
-                   very_verbose=very_verbose,
-        )
+        result: KgtkReader = cls.open(file_path=file_path,
+                                      error_file=error_file,
+                                      mode=KgtkReaderMode.EDGE,
+                                      options=options,
+                                      value_options=value_options,
+                                      verbose=verbose,
+                                      very_verbose=very_verbose)
+        # This doesn't work because the EdgeReader imported inside KgtkReader
+        # is a different class than this one!
+        #
+        # TODO: Fix this.
+        #
+        #if isinstance(result, cls):
+        #    return result
+        #else:
+        #    # TODO: throw a better exception
+        #    raise ValueError("open_edge_file expected to produce an EdgeReader")
+        return typing.cast(EdgeReader, result)
 
     def _ignore_if_blank_required_fields(self, values: typing.List[str], line: str)->bool:
         # Ignore line_action with blank node1 fields.  This code comes after
@@ -126,7 +94,6 @@ def main():
                                                error_file=error_file,
                                                options=reader_options,
                                                value_options=value_options,
-                                               column_separator=args.column_separator,
                                                verbose=args.verbose, very_verbose=args.very_verbose)
 
     line_count: int = 0
diff --git a/kgtk/io/kgtkbase.py b/kgtk/io/kgtkbase.py
index 7cbcef44c..afa7093b9 100644
--- a/kgtk/io/kgtkbase.py
+++ b/kgtk/io/kgtkbase.py
@@ -145,34 +145,39 @@ def build_column_name_map(cls,
         return column_name_map
 
     @classmethod
-    def required_edge_columns(cls,
-                              column_name_map: typing.Mapping[str, int],
-                              header_line: str,
-                              error_action: ValidationAction,
-                              error_file: typing.TextIO = sys.stderr
-    )->typing.Tuple[int, int, int]:
-        # Ensure that the three required columns are present:
+    def get_special_columns(cls,
+                            column_name_map: typing.Mapping[str, int],
+                            header_line: str,
+                            error_action: ValidationAction,
+                            error_file: typing.TextIO = sys.stderr,
+                            is_edge_file: bool = False,
+                            is_node_file: bool = False,
+    )->typing.Tuple[int, int, int, int]:
+        """
+        Four predefined column names are special: they may have name aliases, and
+        they may be required in ede or node files.
+
+        """
+
+        # These three predefined columns columns are required for edge files:
         node1_column_idx: int = cls.get_column_idx(cls.NODE1_COLUMN_NAMES, column_name_map,
-                                                   header_line=header_line, error_action=error_action, error_file=error_file)
+                                                   header_line=header_line, error_action=error_action, error_file=error_file,
+                                                   is_optional=not is_edge_file)
 
-        node2_column_idx: int = cls.get_column_idx(cls.NODE2_COLUMN_NAMES, column_name_map,
-                                                   header_line=header_line, error_action=error_action, error_file=error_file)
-                                                   
         label_column_idx: int = cls.get_column_idx(cls.LABEL_COLUMN_NAMES, column_name_map,
-                                                   header_line=header_line, error_action=error_action, error_file=error_file)
+                                                   header_line=header_line, error_action=error_action, error_file=error_file,
+                                                   is_optional=not is_edge_file)
 
-        return (node1_column_idx, node2_column_idx, label_column_idx)
+        node2_column_idx: int = cls.get_column_idx(cls.NODE2_COLUMN_NAMES, column_name_map,
+                                                   header_line=header_line, error_action=error_action, error_file=error_file,
+                                                   is_optional=not is_edge_file)
+                                                   
+        # This predefined column is required for node files:
+        id_column_idx: int = cls.get_column_idx(cls.ID_COLUMN_NAMES, column_name_map,
+                                                header_line=header_line, error_action=error_action, error_file=error_file,
+                                                is_optional=not is_node_file)
 
-    @classmethod
-    def required_node_column(cls,
-                             column_name_map: typing.Mapping[str, int],
-                             header_line: str,
-                             error_action: ValidationAction,
-                             error_file: typing.TextIO = sys.stderr
-    )->int:
-        # Ensure that the required column is present:
-        return cls.get_column_idx(cls.ID_COLUMN_NAMES, column_name_map,
-                                  header_line=header_line, error_action=error_action, error_file=error_file)
+        return (node1_column_idx, label_column_idx, node2_column_idx, id_column_idx)
 
     @classmethod
     def additional_edge_columns(cls, column_names: typing.List[str])->typing.List[str]:
diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index ed6de11d9..abc5e76fa 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -326,10 +326,16 @@ class KgtkReader(KgtkBase, ClosableIter[typing.List[str]]):
     column_name_map: typing.Mapping[str, int] = attr.ib(validator=attr.validators.deep_mapping(key_validator=attr.validators.instance_of(str),
                                                                                                value_validator=attr.validators.instance_of(int)))
 
-    # The index of the mandatory columns.  -1 means missing:
+    # The actual mode used.
+    #
+    # TODO: fix the validator.
+    # mode: KgtkReaderMode = attr.ib(validator=attr.validators.instance_of(KgtkReaderMode), default=KgtkReaderMode.NONE)
+    mode: KgtkReaderMode = attr.ib(default=KgtkReaderMode.NONE)
+
+    # The index of the mandatory/aliased columns.  -1 means missing:
     node1_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1) # edge file
-    node2_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1) # edge file
     label_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1) # edge file
+    node2_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1) # edge file
     id_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1) # node file
 
     data_lines_read: int = attr.ib(validator=attr.validators.instance_of(int), default=0)
@@ -364,6 +370,7 @@ def _default_options(
     def open(cls,
              file_path: typing.Optional[Path],
              error_file: typing.TextIO = sys.stderr,
+             mode: typing.Optional[KgtkReaderMode] = None,
              options: typing.Optional[KgtkReaderOptions] = None,
              value_options: typing.Optional[KgtkValueOptions] = None,
              verbose: bool = False,
@@ -395,9 +402,11 @@ def open(cls,
                                                                               error_file=error_file)
 
         # Should we automatically determine if this is an edge file or a node file?
+        if mode is None:
+            mode = options.mode
         is_edge_file: bool = False
         is_node_file: bool = False
-        if options.mode is KgtkReaderMode.AUTO:
+        if mode is KgtkReaderMode.AUTO:
             # If we have a node1 (or alias) column, then this must be an edge file. Otherwise, assume it is a node file.
             node1_idx: int = cls.get_column_idx(cls.NODE1_COLUMN_NAMES,
                                                 column_name_map,
@@ -416,88 +425,71 @@ def open(cls,
                 if verbose:
                     print("node1 column not found, assuming this is a KGTK node file", file=error_file, flush=True)
 
-        elif options.mode is KgtkReaderMode.EDGE:
+        elif mode is KgtkReaderMode.EDGE:
             is_edge_file = True
-        elif options.mode is KgtkReaderMode.NODE:
+        elif mode is KgtkReaderMode.NODE:
             is_node_file = True
-        elif options.mode is KgtkReaderMode.NONE:
+        elif mode is KgtkReaderMode.NONE:
             pass
 
+        # Get the indices of the special columns.
+        node1_column_idx: int
+        label_column_idx: int
+        node2_column_idx: int
+        id_column_idx: int
+        (node1_column_idx,
+         label_column_idx,
+         node2_column_idx,
+         id_column_idx) = cls.get_special_columns(column_name_map,
+                                                  header_line=header,
+                                                  error_action=options.header_error_action,
+                                                  error_file=error_file,
+                                                  is_edge_file=is_edge_file,
+                                                  is_node_file=is_node_file)
+        
+        if verbose:
+            print("KgtkReader: Special columns: node1=%d label=%d node2=%d id=%d" % (node1_column_idx,
+                                                                                     label_column_idx,
+                                                                                     node2_column_idx,
+                                                                                     id_column_idx), file=error_file, flush=True)
         if is_edge_file:
             # We'll instantiate an EdgeReader, which is a subclass of KgtkReader.
             # The EdgeReader import is deferred to avoid circular imports.
             from kgtk.io.edgereader import EdgeReader
             
-            # Get the indices of the required columns.
-            node1_column_idx: int
-            node2_column_idx: int
-            label_column_idx: int
-            (node1_column_idx, node2_column_idx, label_column_idx) = cls.required_edge_columns(column_name_map,
-                                                                                               header_line=header,
-                                                                                               error_action=options.header_error_action,
-                                                                                               error_file=error_file)
-
             if verbose:
-                print("KgtkReader: Reading an edge file. node1=%d label=%d node2=%d" % (node1_column_idx, label_column_idx, node2_column_idx), file=error_file, flush=True)
-
-            return EdgeReader(file_path=file_path,
-                              source=source,
-                              column_names=column_names,
-                              column_name_map=column_name_map,
-                              column_count=len(column_names),
-                              node1_column_idx=node1_column_idx,
-                              node2_column_idx=node2_column_idx,
-                              label_column_idx=label_column_idx,
-                              error_file=error_file,
-                              options=options,
-                              value_options=value_options,
-                              is_edge_file=is_edge_file,
-                              is_node_file=is_node_file,
-                              verbose=verbose,
-                              very_verbose=very_verbose)
+                print("KgtkReader: Reading an edge file.", file=error_file, flush=True)
+
+            cls = EdgeReader
         
         elif is_node_file:
             # We'll instantiate an NodeReader, which is a subclass of KgtkReader.
             # The NodeReader import is deferred to avoid circular imports.
             from kgtk.io.nodereader import NodeReader
             
-            # Get the index of the required column:
-            id_column_idx: int = cls.required_node_column(column_name_map,
-                                                          header_line=header,
-                                                          error_action=options.header_error_action,
-                                                          error_file=error_file)
-
             if verbose:
-                print("KgtkReader: Reading an node file. id=%d" % (id_column_idx), file=error_file, flush=True)
-
-            return NodeReader(file_path=file_path,
-                              source=source,
-                              column_names=column_names,
-                              column_name_map=column_name_map,
-                              column_count=len(column_names),
-                              id_column_idx=id_column_idx,
-                              error_file=error_file,
-                              options=options,
-                              value_options=value_options,
-                              is_edge_file=is_edge_file,
-                              is_node_file=is_node_file,
-                              verbose=verbose,
-                              very_verbose=very_verbose,
-            )
-        else:
-            return cls(file_path=file_path,
-                       source=source,
-                       column_names=column_names,
-                       column_name_map=column_name_map,
-                       column_count=len(column_names),
-                       error_file=error_file,
-                       options=options,
-                       value_options=value_options,
-                       is_edge_file=is_edge_file,
-                       is_node_file=is_node_file,
-                       verbose=verbose,
-                       very_verbose=very_verbose,
-            )
+                print("KgtkReader: Reading an node file.", file=error_file, flush=True)
+
+            cls = NodeReader
+
+        return cls(file_path=file_path,
+                   source=source,
+                   column_names=column_names,
+                   column_name_map=column_name_map,
+                   column_count=len(column_names),
+                   mode=mode,
+                   node1_column_idx=node1_column_idx,
+                   label_column_idx=label_column_idx,
+                   node2_column_idx=node2_column_idx,
+                   id_column_idx=id_column_idx,
+                   error_file=error_file,
+                   options=options,
+                   value_options=value_options,
+                   is_edge_file=is_edge_file,
+                   is_node_file=is_node_file,
+                   verbose=verbose,
+                   very_verbose=very_verbose,
+        )
 
     @classmethod
     def _open_compressed_file(cls,
diff --git a/kgtk/io/kgtkwriter.py b/kgtk/io/kgtkwriter.py
index d3d256c07..f740e4c55 100644
--- a/kgtk/io/kgtkwriter.py
+++ b/kgtk/io/kgtkwriter.py
@@ -202,18 +202,14 @@ def _setup(cls,
         elif mode is KgtkWriter.Mode.NONE:
             pass
         
-        if is_edge_file:
-            # Validate that we have the proper columns for an edge file.
-            cls.required_edge_columns(column_name_map,
-                                      header_line=header,
-                                      error_action=header_error_action,
-                                      error_file=error_file)
-        elif is_node_file:
-            # Validate that we have the proper columns for an node file.
-            cls.required_node_column(column_name_map,
-                                     header_line=header,
-                                     error_action=header_error_action,
-                                     error_file=error_file)
+        # Validate that we have the proper columns for an edge or node file,
+        # ignoring the result.
+        cls.get_special_columns(column_name_map,
+                                header_line=header,
+                                error_action=header_error_action,
+                                error_file=error_file,
+                                is_edge_file=is_edge_file,
+                                is_node_file=is_node_file)
 
         # Write the column names to the first line.
         if verbose:
diff --git a/kgtk/io/nodereader.py b/kgtk/io/nodereader.py
index 4d73ca3a6..bf74beb85 100644
--- a/kgtk/io/nodereader.py
+++ b/kgtk/io/nodereader.py
@@ -10,7 +10,7 @@
 import sys
 import typing
 
-from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderMode, KgtkReaderOptions
 from kgtk.utils.closableiter import ClosableIter
 from kgtk.utils.enumnameaction import EnumNameAction
 from kgtk.utils.validationaction import ValidationAction
@@ -28,50 +28,24 @@ def open_node_file(cls,
                        verbose: bool = False,
                        very_verbose: bool = False)->"NodeReader":
 
-        # Supply the default reader and value options:
-        (options, value_options) = cls._default_options(options, value_options)
-
-        source: ClosableIter[str] = cls._openfile(file_path, options=options, error_file=error_file, verbose=verbose)
-
-        # Read the edge file header and split it into column names.
-        header: str
-        column_names: typing.List[str]
-        (header, column_names) = cls._build_column_names(source, options=options, error_file=error_file, verbose=verbose)
-
-        # Check for unsafe column names.
-        cls.check_column_names(column_names,
-                               header_line=header,
-                               error_action=options.unsafe_column_name_action,
-                               error_file=error_file)
-
-        # Build a map from column name to column index.
-        column_name_map: typing.Mapping[str, int] = cls.build_column_name_map(column_names,
-                                                                              header_line=header,
-                                                                              error_action=options.header_error_action,
-                                                                              error_file=error_file)
-        # Get the index of the required column.
-        id_column_idx: int = cls.required_node_column(column_name_map,
-                                                      header_line=header,
-                                                      error_action=options.header_error_action,
-                                                      error_file=error_file)
-
-        if verbose:
-            print("NodeReader: Reading an node file. id=%d" % (id_column_idx))
-
-        return cls(file_path=file_path,
-                   source=source,
-                   column_names=column_names,
-                   column_name_map=column_name_map,
-                   column_count=len(column_names),
-                   id_column_idx=id_column_idx,
-                   error_file=error_file,
-                   options=options,
-                   value_options=value_options,
-                   is_edge_file=False,
-                   is_node_file=True,
-                   verbose=verbose,
-                   very_verbose=very_verbose,
-        )
+        result: KgtkReader = cls.open(file_path=file_path,
+                                      error_file=error_file,
+                                      mode=KgtkReaderMode.NODE,
+                                      options=options,
+                                      value_options=value_options,
+                                      verbose=verbose,
+                                      very_verbose=very_verbose)
+        # This doesn't work because the EdgeReader imported inside KgtkReader
+        # is a different class than this one!
+        #
+        # TODO: Fix this.
+        #
+        #if isinstance(result, cls):
+        #    return result
+        #else:
+        #    # TODO: throw a better exception
+        #    raise ValueError("open_node_file expected to produce a NodeReader")
+        return typing.cast(NodeReader, result)
 
     def _ignore_if_blank_required_fields(self, values: typing.List[str], line: str)->bool:
         # Ignore line_action with blank id fields.  This code comes after
@@ -105,11 +79,10 @@ def main():
     reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, mode=KgtkReaderMode.NODE)
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
-    nr: NodeReader = NodeReader.open_edge_file(args.kgtk_file,
+    nr: NodeReader = NodeReader.open_node_file(args.kgtk_file,
                                                error_file=error_file,
                                                options=reader_options,
                                                value_options=value_options,
-                                               column_separator=args.column_separator,
                                                verbose=args.verbose, very_verbose=args.very_verbose)
 
     line_count: int = 0

From fd78a76c3409d44608854a6c27ba49e91456e994 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 12 May 2020 12:40:52 -0700
Subject: [PATCH 163/278] Use the proper output file.

---
 kgtk/join/test/ifexists-test2-label-and-node2.sh      | 2 +-
 kgtk/join/test/kgtk-ifexists-test2-label-and-node2.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kgtk/join/test/ifexists-test2-label-and-node2.sh b/kgtk/join/test/ifexists-test2-label-and-node2.sh
index d2fea5d62..7f4cb0850 100755
--- a/kgtk/join/test/ifexists-test2-label-and-node2.sh
+++ b/kgtk/join/test/ifexists-test2-label-and-node2.sh
@@ -5,4 +5,4 @@ python3 kgtk/join/ifexists.py \
 	--filter-on kgtk/join/test/ifexists-test2-file2.tsv \
 	--filter-keys label node2 \
 	--filter-mode NONE \
-	--output-file kgtk/join/test/ifexists-test1-node1-output.tsv
+	--output-file kgtk/join/test/ifexists-test2-label-and-node2-output.tsv
diff --git a/kgtk/join/test/kgtk-ifexists-test2-label-and-node2.sh b/kgtk/join/test/kgtk-ifexists-test2-label-and-node2.sh
index 5e8c295ce..2a98c2b7f 100755
--- a/kgtk/join/test/kgtk-ifexists-test2-label-and-node2.sh
+++ b/kgtk/join/test/kgtk-ifexists-test2-label-and-node2.sh
@@ -5,4 +5,4 @@ python3 -m kgtk ifexists \
 	--filter-on kgtk/join/test/ifexists-test2-file2.tsv \
 	--filter-keys label node2 \
 	--filter-mode NONE \
-	--output-file kgtk/join/test/ifexists-test1-node1-output.tsv
+	--output-file kgtk/join/test/ifexists-test2-label-and-node2-output.tsv

From 6e969b301d469fad50326c870d86e9ef16abc621 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 12 May 2020 13:01:28 -0700
Subject: [PATCH 164/278] Pass error_file and verbose flags in open.

---
 kgtk/join/kgtkjoiner.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kgtk/join/kgtkjoiner.py b/kgtk/join/kgtkjoiner.py
index d0d2f0f4c..f51e0eba7 100644
--- a/kgtk/join/kgtkjoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -166,6 +166,7 @@ def extract_join_key_set(self, file_path: Path, who: str, join_idx_list: typing.
         kr: KgtkReader = KgtkReader.open(file_path,
                                          options=reader_options,
                                          value_options = self.value_options,
+                                         error_file=self.error_file,
                                          verbose=self.verbose,
                                          very_verbose=self.very_verbose)
 
@@ -256,6 +257,9 @@ def process(self):
         left_kr: KgtkReader = KgtkReader.open(self.left_file_path,
                                               options=self.left_reader_options,
                                               value_options = self.value_options,
+                                              error_file=self.error_file,
+                                              verbose=self.verbose,
+                                              very_verbose=self.very_verbose
         )
 
 
@@ -264,6 +268,9 @@ def process(self):
         right_kr: KgtkReader = KgtkReader.open(self.right_file_path,
                                                options=self.right_reader_options,
                                                value_options = self.value_options,
+                                               error_file=self.error_file,
+                                               verbose=self.verbose,
+                                               very_verbose=self.very_verbose
         )
 
         if left_kr.is_edge_file and right_kr.is_edge_file:

From 2ae42e79f3deb0b3f8b7c97f9268064aa81a8231 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Tue, 12 May 2020 14:46:23 -0700
Subject: [PATCH 165/278] fix a bug when property is defined in prop_file

---
 kgtk/triple_generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 61133179c..51aeb52bf 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -85,7 +85,7 @@ def _node_2_entity(self, node: str):
         A node can be Qxxx or Pxxx, return the proper entity.
         '''
         if node in self.prop_types:
-            entity = WDProperty(node, self.datatype_mapping[self.prop_types[node]])
+            entity = WDProperty(node, self.prop_types[node])
         else:
             entity = WDItem(TripleGenerator.replace_illegal_string(node))
         return entity

From b21d9e39048831ffded5cb19ae0edc25a79fd923 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Tue, 12 May 2020 15:07:41 -0700
Subject: [PATCH 166/278] current workaround of handling url

---
 kgtk/triple_generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 9c1bda570..3aa85ffa9 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -52,7 +52,7 @@ def __init__(
             "monolingualtext": MonolingualText,
             "string": StringValue,
             "external-identifier": ExternalIdentifier,
-            "url": URLValue
+            "url": StringValue
         }
         self.prop_types = self.set_properties(prop_file)
         self.label_set, self.alias_set, self.description_set = self.set_sets(

From 3d88fa4cde1902c31034f9b0438c5e055a0f5887 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Tue, 12 May 2020 16:14:06 -0700
Subject: [PATCH 167/278] fixing the bug when there is trailing tab, the
 edge_list drops empty list members

---
 kgtk/triple_generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index 9c1bda570..9ee2c0b64 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -374,7 +374,7 @@ def entry_point(self, line_number: int, edge: str):
         Call corresponding downstream functions
         """
 
-        edge_list = edge.strip().split("\t")
+        edge_list = edge.strip("\n").split("\t")
         l = len(edge_list)
         if line_number == 1:
             # initialize the order_map

From f0d546018e290b28464ea393d28cfdd8efcb273a Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Tue, 12 May 2020 16:45:56 -0700
Subject: [PATCH 168/278] fix bug if meet empty node value for text embedding

---
 kgtk/gt/embedding_utils.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kgtk/gt/embedding_utils.py b/kgtk/gt/embedding_utils.py
index 8a4b2b833..a4bfda51a 100644
--- a/kgtk/gt/embedding_utils.py
+++ b/kgtk/gt/embedding_utils.py
@@ -430,10 +430,15 @@ def read_input(self, file_path: str, target_properties: dict, property_labels_di
                     if "@" in node_value and node_value[0] != "@":
                         node_value = node_value[:node_value.index("@")]
 
+                    # in case we meet an empty value, skip it
+                    if node_value == "":
+                        self._logger.warning("""Skip line "{}" because of empty value.""".format(each_line))
+                        continue
+
                     # remove extra double quote " and single quote '
-                    while node_value[0] == '"' and node_value[-1] == '"':
+                    while len(node_value) >= 3 and node_value[0] == '"' and node_value[-1] == '"':
                         node_value = node_value[1:-1]
-                    while node_value[0] == "'" and node_value[-1] == "'":
+                    while len(node_value) >= 3 and node_value[0] == "'" and node_value[-1] == "'":
                         node_value = node_value[1:-1]
 
                     if current_process_node_id != node_id:

From bf2c9a5cd6730ce171e21471b4cae41e71b6ce52 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 12 May 2020 17:25:45 -0700
Subject: [PATCH 169/278] Fix type case in typing.

---
 kgtk/join/kgtkjoiner.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/kgtk/join/kgtkjoiner.py b/kgtk/join/kgtkjoiner.py
index f51e0eba7..c8416e10e 100644
--- a/kgtk/join/kgtkjoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -8,17 +8,13 @@
 
 from argparse import ArgumentParser
 import attr
-import gzip
 from pathlib import Path
-from multiprocessing import Queue
 import sys
 import typing
 
 from kgtk.kgtkformat import KgtkFormat
 from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
-from kgtk.utils.enumnameaction import EnumNameAction
-from kgtk.utils.validationaction import ValidationAction
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 @attr.s(slots=True, frozen=True)
@@ -325,7 +321,7 @@ def process(self):
         
         if self.verbose:
             print("Processing the left input file: %s" % str(self.left_file_path), file=self.error_file, flush=True)
-        row: typing.list[str]
+        row: typing.List[str]
         for row in left_kr:
             left_data_lines_read += 1
             if joined_key_set is None:

From 961240ade10e09100d7dfb40338de0f41b1a3a72 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 12 May 2020 17:25:55 -0700
Subject: [PATCH 170/278] Initial cat support.

---
 kgtk/join/kgtkcat.py          | 185 ++++++++++++++++++++++++++++++++++
 kgtk/join/kgtkmergecolumns.py |  86 ++++++++++++++++
 2 files changed, 271 insertions(+)
 create mode 100644 kgtk/join/kgtkcat.py
 create mode 100644 kgtk/join/kgtkmergecolumns.py

diff --git a/kgtk/join/kgtkcat.py b/kgtk/join/kgtkcat.py
new file mode 100644
index 000000000..5c2c6bf56
--- /dev/null
+++ b/kgtk/join/kgtkcat.py
@@ -0,0 +1,185 @@
+"""
+Cat multuple KGTK file together.
+
+"""
+
+from argparse import ArgumentParser
+import attr
+from pathlib import Path
+import sys
+import typing
+
+1>from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
+from kgtk.io.kgtkwriter import KgtkWriter
+from kgtk.join.kgtkmergecolumns import KgtkMergeColumns
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
+
+@attr.s(slots=True, frozen=True)
+class KgtkCat():
+    input_file_paths: typing.List[Path] = attr.ib()
+    output_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
+
+    # TODO: find working validators:
+    reader_options: typing.Optional[KgtkReaderOptions] = attr.ib(default=None)
+    # value_options: typing.Optional[KgtkValueOptions] = attr.ib(attr.validators.optional(attr.validators.instance_of(KgtkValueOptions)), default=None)
+    value_options: typing.Optional[KgtkValueOptions] = attr.ib(default=None)
+
+    error_file: typing.TextIO = attr.ib(default=sys.stderr)
+    verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+    very_verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+
+    def process(self):
+        kmc: KgtkMergeColumns = KgtkMergeColumns()
+
+        # Is the output file an edge file, a node file, or unknown?
+        is_edge_file: bool = False
+        is_node_file: bool = False
+
+        krs: typing.List[KgtkReader] = [ ]
+        kr: KgtkReader
+        idx: int
+
+        if self.verbose:
+            print("Opening the %d input files." % len(self.input_file_paths), file=self.error_file, flush=True)
+
+        saw_stdin: bool = False
+        input_file_path: Path
+        for idx, input_file_path in enumerate(self.input_file_paths):
+            if str(input_file_path) == "-":
+                if saw_stdin:
+                    raise ValueError("Duplicate standard input file %d" % idx + 1)
+                else:
+                    saw_stdin = False
+                if self.verbose:
+                    print("Opening file %d: standard input" % idx + 1, file=self.error_file, flush=True)
+            else:
+                if self.verbose:
+                    print("Opening file %d: %s" % (idx + 1, str(input_file_path)), file=self.error_file, flush=True)
+
+            kr = KgtkReader.open(input_file_path,
+                                 options=self.reader_options,
+                                 value_options = self.value_options,
+                                 error_file=self.error_file,
+                                 verbose=self.verbose,
+                                 very_verbose=self.very_verbose,
+            )
+            krs.append(kr)
+
+            # Unless directed otherwise, do not merge edge files with node
+            # files.  If options.mode == KgtkReaderMode.NONE, then neither
+            # kr.is_edge_file nor kr.is_node_file will be set and the
+            # consistency check will be skipped.
+            if kr.is_edge_file:
+                if is_node_file:
+                    raise ValueError("Cannot merge an edge file to a node file: %s" % input_file_path)
+                if is_edge_file == False and self.verbose:
+                    print("The output file will be an edge file.")
+                is_edge_file = True
+            elif kr.is_node_file:
+                if is_edge_file:
+                    raise ValueError("Cannot merge a node file to an edge file: %s" % input_file_path)
+                if is_node_file == False and self.verbose:
+                    print("The output file will be an node file.")
+                is_node_file = True
+
+            if self.verbose or self.very_verbose:
+                print("Mapping the %d column names in %s." % (len(kr.column_names), input_file_path), file=self.error_file, flush=True)
+            if self.very_verbose:
+                print(" ".join(kr.column_names))
+            new_column_names: typing.List[str] =  kmc.merge(kr)
+            if self.very_verbose:
+                print(" ".join(new_column_names))
+
+        if self.verbose or self.very_verbose:
+            print("There are %d merged columns." % len(kmc.column_names))
+        if self.very_verbose:
+            print(" ".join(self.column_names))
+            
+        output_mode: KgtkWriter.Mode = KgtkWriter.Mode.NONE
+        if is_edge_file:
+            output_mode = KgtkWriter.Mode.EDGE
+            if self.verbose:
+                print("Opening the output edge file: %s" % str(self.output_path), file=self.error_file, flush=True)
+        elif is_node_file:
+            output_mode = KgtkWriter.Mode.NODE
+            if self.verbose:
+                print("Opening the output node file: %s" % str(self.output_path), file=self.error_file, flush=True)
+        else:
+            if self.verbose:
+                print("Opening the output file: %s" % str(self.output_path), file=self.error_file, flush=True)
+
+        ew: KgtkWriter = KgtkWriter.open(kmc.column_names,
+                                         self.output_path,
+                                         require_all_columns=False,
+                                         prohibit_extra_columns=True,
+                                         fill_missing_columns=True,
+                                         gzip_in_parallel=False,
+                                         mode=output_mode,
+                                         verbose=self.verbose,
+                                         very_verbose=self.very_verbose)
+
+        output_data_lines: int = 0
+        for idx, kr in enumerate(krs):
+            if kr.file_path is None:
+                # This shouldn't happen because we constrined all
+                # input_file_path elements to be not None.  However,
+                # checking here keeps mypy happy.
+                #
+                # TODO: throw a better exception.
+                raise ValueError("Missing file path.")
+            input_file_path = self.file_path
+            if self.verbose:
+                print("Copying data from file %d: %s" % (idx + 1, input_file_path))
+
+            shuffle_list: typing.List[int] = ew.build_shuffle_list(kmc.new_column_name_lists[idx])
+
+            input_data_lines: int = 0
+            row: typing.List[str]
+            for row in kr:
+                input_data_lines += 1
+                output_data_lines += 1
+                ew.write(row, shuffle_list=shuffle_list)
+
+            # Flush the output file so far:
+            ew.flush()
+
+            if self.verbose:
+                print("Read %d data lines from file %d: %s" % (input_data_lines, idx + 1, input_file_path))
+        
+        ew.close()
+        if self.verbose:
+            print("Wrote %d lines total from %d files" % (output_data_lines, len(krs)))
+        
+def main():
+    """
+    Test the KGTK file concatenator.
+    """
+    parser = ArgumentParser()
+    parser.add_argument(dest="input_file_paths", help="The KGTK files to concatenate", type=Path, nargs='+')
+    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to read", type=Path, default=None)
+
+    KgtkReader.add_debug_arguments(parser, expert=True)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=True)
+    KgtkValueOptions.add_arguments(parser, expert=True)
+
+    args = parser.parse_args()
+
+    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
+
+    # Build the option structures.
+    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
+
+    ec: KgtkCat = KgtkCatr(input_file_paths=args.input_file_paths,
+                                output_path=args.output_file_path,
+                                reader_options=reader_options,
+                                value_options=value_options,
+                                error_file=error_file,
+                                verbose=args.verbose,
+                                very_verbose=args.very_verbose)
+
+    ej.process()
+
+if __name__ == "__main__":
+    main()
+
diff --git a/kgtk/join/kgtkmergecolumns.py b/kgtk/join/kgtkmergecolumns.py
new file mode 100644
index 000000000..955e947f6
--- /dev/null
+++ b/kgtk/join/kgtkmergecolumns.py
@@ -0,0 +1,86 @@
+import attr
+import typing
+
+
+from kgtk.io.kgtkreader import KgtkReader
+
+@attr.s(slots=True, frozen=False)
+class KgtkMergeColumns:
+    """Merge columns from multiple KgtkReaders, respecting predefined column
+    names with aliases.
+
+    """
+    # For attrs 19.1.0 and later:
+    column_names: typing.List[str] = attr.ib(validator=attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
+                                                                                     iterable_validator=attr.validators.instance_of(list)),
+                                             factory=list)
+
+    # Keep a record of the reserved columns with aliases as we encounter them.
+    # We will retain the first alias encountered of each group.
+    id_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1)
+    node1_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1)
+    label_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1)
+    node2_column_idx: int = attr.ib(validator=attr.validators.instance_of(int), default=-1)
+
+    # The column name map is a debugging convenience.  It is not required for
+    # the merge algorithm.
+    column_name_map: typing.MutableMapping[str, int] = attr.ib(validator=attr.validators.deep_mapping(key_validator=attr.validators.instance_of(str),
+                                                                                                      value_validator=attr.validators.instance_of(int)),
+                                                               factory=dict)
+    
+    # Maintain a list of the old and new column name lists as a convenience
+    # for debugging and feedback.
+    old_column_name_lists: typing.List[typing.List[str]] = attr.ib(factory=list)
+    new_column_name_lists: typing.List[typing.List[str]] = attr.ib(factory=list)
+
+    def merge(self, kr: KgtkReader):
+        """
+        Add the columns from a KgtkReader into the merged column list,
+        respecting predefined column names with aliases.
+
+        Return a list of new column names for the KgtkReader, with
+        predefined names replaced with the name first used in the 
+        joint list of column names.
+        """
+        new_column_names: typing.List[str] = [ ]
+
+        # Record the old column names for debugging.
+        self.old_column_name_lists.append(kr.column_names)
+
+        column_name: str
+        idx: int = 0
+        for idx, column_name in enumerate(kr.column_names):
+            if idx == kr.id_column_idx:
+                if self.id_column_idx >= 0:
+                    column_name = self.column_names[self.id_column_idx]
+                else:
+                    self.idx_column_idx = len(self.column_names)
+
+            elif idx == kr.node1_column_idx:
+                if self.node1_column_idx >= 0:
+                    column_name = self.column_names[self.node1_column_idx]
+                else:
+                    self.node1_column_idx = len(self.column_names)
+            
+            elif idx == kr.label_column_idx:
+                if self.label_column_idx >= 0:
+                    column_name = self.column_names[self.labelcolumn_idx]
+                else:
+                    self.label_column_idx = len(self.column_names)
+            
+            elif idx == kr.node2_column_idx:
+                if self.node2_column_idx >= 0:
+                    column_name = self.column_names[self.node2_column_idx]
+                else:
+                    self.node2_column_idx = len(self.column_names)
+            
+            new_column_names.append(column_name)
+            if column_name not in self.column_name_map:
+                self.column_name_map[column_name] = len(self.column_names)
+                self.column_names.append(column_name)
+
+        self.new_column_name_lists.append(new_column_names)
+        return new_column_names
+                    
+
+

From c484a244b0d0f14f241f83e94fbd80caa360f278 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 12 May 2020 17:26:49 -0700
Subject: [PATCH 171/278] Fix typo.

---
 kgtk/join/kgtkcat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/join/kgtkcat.py b/kgtk/join/kgtkcat.py
index 5c2c6bf56..282dc6163 100644
--- a/kgtk/join/kgtkcat.py
+++ b/kgtk/join/kgtkcat.py
@@ -9,7 +9,7 @@
 import sys
 import typing
 
-1>from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
 from kgtk.join.kgtkmergecolumns import KgtkMergeColumns
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions

From 5c9eafb50257d23e687bb48ca925550ff0be320b Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 12 May 2020 17:33:51 -0700
Subject: [PATCH 172/278] Fixed various bugs.

---
 kgtk/join/kgtkcat.py          | 39 ++++++++++++++++++-----------------
 kgtk/join/kgtkmergecolumns.py |  2 +-
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/kgtk/join/kgtkcat.py b/kgtk/join/kgtkcat.py
index 282dc6163..11e7e521b 100644
--- a/kgtk/join/kgtkcat.py
+++ b/kgtk/join/kgtkcat.py
@@ -73,27 +73,27 @@ def process(self):
                 if is_node_file:
                     raise ValueError("Cannot merge an edge file to a node file: %s" % input_file_path)
                 if is_edge_file == False and self.verbose:
-                    print("The output file will be an edge file.")
+                    print("The output file will be an edge file.", file=self.error_file, flush=True)
                 is_edge_file = True
             elif kr.is_node_file:
                 if is_edge_file:
                     raise ValueError("Cannot merge a node file to an edge file: %s" % input_file_path)
                 if is_node_file == False and self.verbose:
-                    print("The output file will be an node file.")
+                    print("The output file will be an node file.", file=self.error_file, flush=True)
                 is_node_file = True
 
             if self.verbose or self.very_verbose:
                 print("Mapping the %d column names in %s." % (len(kr.column_names), input_file_path), file=self.error_file, flush=True)
             if self.very_verbose:
-                print(" ".join(kr.column_names))
+                print(" ".join(kr.column_names), file=self.error_file, flush=True)
             new_column_names: typing.List[str] =  kmc.merge(kr)
             if self.very_verbose:
-                print(" ".join(new_column_names))
+                print(" ".join(new_column_names), file=self.error_file, flush=True)
 
         if self.verbose or self.very_verbose:
-            print("There are %d merged columns." % len(kmc.column_names))
+            print("There are %d merged columns." % len(kmc.column_names), file=self.error_file, flush=True)
         if self.very_verbose:
-            print(" ".join(self.column_names))
+            print(" ".join(self.column_names), file=self.error_file, flush=True)
             
         output_mode: KgtkWriter.Mode = KgtkWriter.Mode.NONE
         if is_edge_file:
@@ -127,9 +127,9 @@ def process(self):
                 #
                 # TODO: throw a better exception.
                 raise ValueError("Missing file path.")
-            input_file_path = self.file_path
+            input_file_path = kr.file_path
             if self.verbose:
-                print("Copying data from file %d: %s" % (idx + 1, input_file_path))
+                print("Copying data from file %d: %s" % (idx + 1, input_file_path), file=self.error_file, flush=True)
 
             shuffle_list: typing.List[int] = ew.build_shuffle_list(kmc.new_column_name_lists[idx])
 
@@ -146,9 +146,10 @@ def process(self):
             if self.verbose:
                 print("Read %d data lines from file %d: %s" % (input_data_lines, idx + 1, input_file_path))
         
-        ew.close()
         if self.verbose:
-            print("Wrote %d lines total from %d files" % (output_data_lines, len(krs)))
+            print("Wrote %d lines total from %d files" % (output_data_lines, len(krs)), file=self.error_file, flush=True)
+
+        ew.close()
         
 def main():
     """
@@ -156,7 +157,7 @@ def main():
     """
     parser = ArgumentParser()
     parser.add_argument(dest="input_file_paths", help="The KGTK files to concatenate", type=Path, nargs='+')
-    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to read", type=Path, default=None)
+    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to read (default=%(default)s)", type=Path, default="-")
 
     KgtkReader.add_debug_arguments(parser, expert=True)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=True)
@@ -170,15 +171,15 @@ def main():
     reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
-    ec: KgtkCat = KgtkCatr(input_file_paths=args.input_file_paths,
-                                output_path=args.output_file_path,
-                                reader_options=reader_options,
-                                value_options=value_options,
-                                error_file=error_file,
-                                verbose=args.verbose,
-                                very_verbose=args.very_verbose)
+    ec: KgtkCat = KgtkCat(input_file_paths=args.input_file_paths,
+                          output_path=args.output_file_path,
+                          reader_options=reader_options,
+                          value_options=value_options,
+                          error_file=error_file,
+                          verbose=args.verbose,
+                          very_verbose=args.very_verbose)
 
-    ej.process()
+    ec.process()
 
 if __name__ == "__main__":
     main()
diff --git a/kgtk/join/kgtkmergecolumns.py b/kgtk/join/kgtkmergecolumns.py
index 955e947f6..a5a75d4b5 100644
--- a/kgtk/join/kgtkmergecolumns.py
+++ b/kgtk/join/kgtkmergecolumns.py
@@ -64,7 +64,7 @@ def merge(self, kr: KgtkReader):
             
             elif idx == kr.label_column_idx:
                 if self.label_column_idx >= 0:
-                    column_name = self.column_names[self.labelcolumn_idx]
+                    column_name = self.column_names[self.label_column_idx]
                 else:
                     self.label_column_idx = len(self.column_names)
             

From 3386ff350aeee797879031a0154fdce16cee133b Mon Sep 17 00:00:00 2001
From: ckxz105 <liu494@usc.edu>
Date: Tue, 12 May 2020 18:17:11 -0700
Subject: [PATCH 173/278] code improvement

---
 kgtk/gt/embedding_utils.py | 114 ++++++++++++++++++-------------------
 1 file changed, 54 insertions(+), 60 deletions(-)

diff --git a/kgtk/gt/embedding_utils.py b/kgtk/gt/embedding_utils.py
index a4bfda51a..f76c76801 100644
--- a/kgtk/gt/embedding_utils.py
+++ b/kgtk/gt/embedding_utils.py
@@ -597,35 +597,26 @@ def dump_vectors(self, file_name, type_=None):
                     _ = f.write("\n")
 
     def print_vector(self, vectors, output_properties: str = "text_embedding", output_format="kgtk_format"):
+        self._logger.debug("START printing the vectors")
         if output_format == "kgtk_format":
             print("node\tproperty\tvalue\n", end="")
-            if self.input_format == "kgtk_format":
-                for i, each_vector in enumerate(vectors):
-                    print(str(list(self.candidates.keys())[i]) + "\t", end="")
-                    print(output_properties + "\t", end="")
-                    for j, each_dimension in enumerate(each_vector):
-                        if j != len(each_vector) - 1:
-                            print(str(each_dimension) + ",", end="")
-                        else:
-                            print(str(each_dimension) + "\n", end="")
-            elif self.input_format == "test_format":
-                all_nodes = list(self.vectors_map.keys())
-                for i, each_vector in enumerate(vectors):
-                    print(all_nodes[i] + "\t", end="")
-                    print(output_properties + "\t", end="")
-                    for j, each_dimension in enumerate(each_vector):
-                        if j != len(each_vector) - 1:
-                            print(str(each_dimension) + ",", end="")
-                        else:
-                            print(str(each_dimension) + "\n", end="")
+            all_nodes = list(self.vectors_map.keys())
+            ten_percent_len = math.ceil(len(vectors) / 10)
+            for i, each_vector in enumerate(vectors):
+                if i % ten_percent_len == 0:
+                    percent = i / ten_percent_len * 10
+                    self._logger.debug("Finished {}%".format(percent))
+                print("{}\t{}\t".format(all_nodes[i], output_properties), end="")
+                for each_dimension in each_vector[:-1]:
+                    print(str(each_dimension) + ",", end="")
+                print(str(each_vector[-1]))
 
         elif output_format == "tsv_format":
             for each_vector in vectors:
-                for i, each_dimension in enumerate(each_vector):
-                    if i != len(each_vector) - 1:
-                        print(str(each_dimension) + "\t", end="")
-                    else:
-                        print(str(each_dimension) + "\n", end="")
+                for each_dimension in each_vector[:-1]:
+                    print(str(each_dimension) + "\t", end="")
+                print(str(each_vector[-1]))
+        self._logger.debug("END printing the vectors")
 
     def plot_result(self, output_properties: dict, input_format="kgtk_format",
                     output_uri: str = "", output_format="kgtk_format",
@@ -655,48 +646,51 @@ def plot_result(self, output_properties: dict, input_format="kgtk_format",
         else:
             raise KGTKException("Unknown or unsupport dimensional reduction type: {}".format(dimensional_reduction))
 
-        if input_format == "test_format":
-            gt_indexes = set()
-            vector_map_keys = list(self.vectors_map.keys())
-            for each_node in self.gt_nodes:
-                gt_indexes.add(vector_map_keys.index(each_node))
-
-            self.metadata.append("Q_nodes\tType\tLabel\tDescription")
-            for i, each in enumerate(self.vectors_map.keys()):
-                label = self.node_labels[each]
-                description = self.candidates[each]["sentence"]
-                if i in gt_indexes:
-                    self.metadata.append("{}\tground_truth_node\t{}\t{}".format(each, label, description))
+        if output_uri not in {"", "none"}:
+            if not os.path.exists(output_uri):
+                raise ValueError("The given metadata output folder does not exist!")
+
+            metadata_output_path = os.path.join(output_uri, self.vector_dump_file.split("/")[-1])
+            if input_format == "test_format":
+                gt_indexes = set()
+                vector_map_keys = list(self.vectors_map.keys())
+                for each_node in self.gt_nodes:
+                    gt_indexes.add(vector_map_keys.index(each_node))
+
+                self.metadata.append("Q_nodes\tType\tLabel\tDescription")
+                for i, each in enumerate(self.vectors_map.keys()):
+                    label = self.node_labels[each]
+                    description = self.candidates[each]["sentence"]
+                    if i in gt_indexes:
+                        self.metadata.append("{}\tground_truth_node\t{}\t{}".format(each, label, description))
+                    else:
+                        self.metadata.append("{}\tcandidates\t{}\t{}".format(each, label, description))
+                self.gt_indexes = gt_indexes
+
+            elif input_format == "kgtk_format":
+                if len(output_properties.get("metadata_properties", [])) == 0:
+                    for k, v in self.candidates.items():
+                        label = v.get("label_properties", "")
+                        if len(label) > 0 and isinstance(label, list):
+                            label = label[0]
+                        description = v.get("description_properties", "")
+                        if len(description) > 0 and isinstance(description, list):
+                            description = description[0]
+                        self.metadata.append("{}\t\t{}\t{}".format(k, label, description))
                 else:
-                    self.metadata.append("{}\tcandidates\t{}\t{}".format(each, label, description))
-            self.gt_indexes = gt_indexes
+                    required_properties = output_properties["metadata_properties"]
+                    self.metadata.append("node\t" + "\t".join(required_properties))
+                    for k, v in self.candidates.items():
+                        each_metadata = k + "\t"
+                        for each in required_properties:
+                            each_metadata += v.get(each, " ") + "\t"
+                        self.metadata.append(each_metadata)
+            self.dump_vectors(metadata_output_path, "metadata")
 
-        elif input_format == "kgtk_format":
-            if len(output_properties.get("metadata_properties", [])) == 0:
-                for k, v in self.candidates.items():
-                    label = v.get("label_properties", "")
-                    if len(label) > 0 and isinstance(label, list):
-                        label = label[0]
-                    description = v.get("description_properties", "")
-                    if len(description) > 0 and isinstance(description, list):
-                        description = description[0]
-                    self.metadata.append("{}\t\t{}\t{}".format(k, label, description))
-            else:
-                required_properties = output_properties["metadata_properties"]
-                self.metadata.append("node\t" + "\t".join(required_properties))
-                for k, v in self.candidates.items():
-                    each_metadata = k + "\t"
-                    for each in required_properties:
-                        each_metadata += v.get(each, " ") + "\t"
-                    self.metadata.append(each_metadata)
-
-        metadata_output_path = os.path.join(output_uri, self.vector_dump_file.split("/")[-1])
         if self.vectors_2D is not None:
             self.print_vector(self.vectors_2D, output_properties.get("output_properties"), output_format)
         else:
             self.print_vector(vectors, output_properties.get("output_properties"), output_format)
-        if output_uri != "none":
-            self.dump_vectors(metadata_output_path, "metadata")
 
     def evaluate_result(self):
         """

From 8c1660e85ef0ac73f7fe507194f5106a5a49dd39 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 12 May 2020 21:25:02 -0700
Subject: [PATCH 174/278] Decouple KgtkMergeColumns from KgtkReader.

---
 kgtk/join/kgtkcat.py          |  2 +-
 kgtk/join/kgtkjoiner.py       | 38 +++++------------------------------
 kgtk/join/kgtkmergecolumns.py | 31 +++++++++++++++-------------
 3 files changed, 23 insertions(+), 48 deletions(-)

diff --git a/kgtk/join/kgtkcat.py b/kgtk/join/kgtkcat.py
index 11e7e521b..537d54601 100644
--- a/kgtk/join/kgtkcat.py
+++ b/kgtk/join/kgtkcat.py
@@ -86,7 +86,7 @@ def process(self):
                 print("Mapping the %d column names in %s." % (len(kr.column_names), input_file_path), file=self.error_file, flush=True)
             if self.very_verbose:
                 print(" ".join(kr.column_names), file=self.error_file, flush=True)
-            new_column_names: typing.List[str] =  kmc.merge(kr)
+            new_column_names: typing.List[str] =  kmc.merge(kr.column_names)
             if self.very_verbose:
                 print(" ".join(new_column_names), file=self.error_file, flush=True)
 
diff --git a/kgtk/join/kgtkjoiner.py b/kgtk/join/kgtkjoiner.py
index c8416e10e..c949fdfa2 100644
--- a/kgtk/join/kgtkjoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -15,6 +15,7 @@
 from kgtk.kgtkformat import KgtkFormat
 from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
+from kgtk.join.kgtkmergecolumns import KgtkMergeColumns
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 @attr.s(slots=True, frozen=True)
@@ -213,39 +214,10 @@ def join_key_sets(self, left_join_idx_list: typing.List[int], right_join_idx_lis
             return join_key_set
     
     def merge_columns(self, left_kr: KgtkReader, right_kr: KgtkReader)->typing.Tuple[typing.List[str], typing.List[str]]:
-        joined_column_names: typing.List[str] = [ ]
-        right_column_names: typing.List[str] = [ ]
-
-        # First step: copy the left column names.
-        column_name: str
-        for column_name in left_kr.column_names:
-            joined_column_names.append(column_name)
-
-        idx: int = 0
-        for column_name in right_kr.column_names:
-            if idx == right_kr.id_column_idx and left_kr.id_column_idx >= 0:
-                # Map the id columns to the name used in the left file.
-                column_name = left_kr.column_names[left_kr.id_column_idx]
-            elif idx == right_kr.node1_column_idx and left_kr.node1_column_idx >= 0:
-                # Map the node1 columns to the name used in the left file,
-                column_name = left_kr.column_names[left_kr.node1_column_idx]
-            elif idx == right_kr.label_column_idx and left_kr.label_column_idx >= 0:
-                # Map the right file's label column to the left file's label column.
-                column_name = left_kr.column_names[left_kr.label_column_idx]
-            elif idx == right_kr.node2_column_idx and left_kr.node2_column_idx >= 0:
-                # Map the right file's node2 column to the left file's node2 column.
-                column_name = left_kr.column_names[left_kr.node2_column_idx]
-            else:
-                # Apply the prefix.
-                if self.prefix is not None and len(self.prefix) > 0:
-                    column_name = self.prefix + column_name
-
-            right_column_names.append(column_name)
-            if column_name not in joined_column_names:
-                joined_column_names.append(column_name)
-            idx += 1        
-
-        return (joined_column_names, right_column_names)
+        kmc: KgtkMergeColumns = KgtkMergeColumns()
+        kmc.merge(left_kr.column_names)
+        right_column_names: typing.List[str] = kmc.merge(right_kr.column_names, prefix=self.prefix)
+        return (kmc.column_names, right_column_names)
 
     def process(self):
         if self.verbose:
diff --git a/kgtk/join/kgtkmergecolumns.py b/kgtk/join/kgtkmergecolumns.py
index a5a75d4b5..d2d4eef5d 100644
--- a/kgtk/join/kgtkmergecolumns.py
+++ b/kgtk/join/kgtkmergecolumns.py
@@ -2,7 +2,7 @@
 import typing
 
 
-from kgtk.io.kgtkreader import KgtkReader
+from kgtk.kgtkformat import KgtkFormat
 
 @attr.s(slots=True, frozen=False)
 class KgtkMergeColumns:
@@ -33,46 +33,49 @@ class KgtkMergeColumns:
     old_column_name_lists: typing.List[typing.List[str]] = attr.ib(factory=list)
     new_column_name_lists: typing.List[typing.List[str]] = attr.ib(factory=list)
 
-    def merge(self, kr: KgtkReader):
-        """
-        Add the columns from a KgtkReader into the merged column list,
-        respecting predefined column names with aliases.
+    def merge(self, column_names: typing.List[str], prefix: typing.Optional[str]=None):
+        """Add column names into the merged column name list, respecting predefined
+        column names with aliases.
+
+        Return a list of new column names with predefined name aliases replaced with
+        the name first used in each alias group in the joint list of column names.
 
-        Return a list of new column names for the KgtkReader, with
-        predefined names replaced with the name first used in the 
-        joint list of column names.
         """
         new_column_names: typing.List[str] = [ ]
 
         # Record the old column names for debugging.
-        self.old_column_name_lists.append(kr.column_names)
+        self.old_column_name_lists.append(column_names.copy())
 
         column_name: str
         idx: int = 0
-        for idx, column_name in enumerate(kr.column_names):
-            if idx == kr.id_column_idx:
+        for idx, column_name in enumerate(column_names):
+            if column_name in KgtkFormat.ID_COLUMN_NAMES:
                 if self.id_column_idx >= 0:
                     column_name = self.column_names[self.id_column_idx]
                 else:
                     self.idx_column_idx = len(self.column_names)
 
-            elif idx == kr.node1_column_idx:
+            elif column_name in KgtkFormat.NODE1_COLUMN_NAMES:
                 if self.node1_column_idx >= 0:
                     column_name = self.column_names[self.node1_column_idx]
                 else:
                     self.node1_column_idx = len(self.column_names)
             
-            elif idx == kr.label_column_idx:
+            elif column_name in KgtkFormat.LABEL_COLUMN_NAMES:
                 if self.label_column_idx >= 0:
                     column_name = self.column_names[self.label_column_idx]
                 else:
                     self.label_column_idx = len(self.column_names)
             
-            elif idx == kr.node2_column_idx:
+            elif column_name in KgtkFormat.NODE2_COLUMN_NAMES:
                 if self.node2_column_idx >= 0:
                     column_name = self.column_names[self.node2_column_idx]
                 else:
                     self.node2_column_idx = len(self.column_names)
+            else:
+                # Apply the optional prefix.
+                if prefix is not None and len(prefix) > 0:
+                    column_name = prefix + column_name
             
             new_column_names.append(column_name)
             if column_name not in self.column_name_map:

From 374ce947ec703d043097344e6b64c1d9a48077b1 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 13 May 2020 00:23:17 -0700
Subject: [PATCH 175/278] Move merge_columns inline.

---
 kgtk/join/kgtkjoiner.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/kgtk/join/kgtkjoiner.py b/kgtk/join/kgtkjoiner.py
index c949fdfa2..332daf235 100644
--- a/kgtk/join/kgtkjoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -213,12 +213,6 @@ def join_key_sets(self, left_join_idx_list: typing.List[int], right_join_idx_lis
                 print("There are %d keys in the inner join key set." % len(join_key_set), file=self.error_file, flush=True)
             return join_key_set
     
-    def merge_columns(self, left_kr: KgtkReader, right_kr: KgtkReader)->typing.Tuple[typing.List[str], typing.List[str]]:
-        kmc: KgtkMergeColumns = KgtkMergeColumns()
-        kmc.merge(left_kr.column_names)
-        right_column_names: typing.List[str] = kmc.merge(right_kr.column_names, prefix=self.prefix)
-        return (kmc.column_names, right_column_names)
-
     def process(self):
         if self.verbose:
             print("Opening the left edge file: %s" % str(self.left_file_path), file=self.error_file, flush=True)
@@ -264,9 +258,10 @@ def process(self):
 
         if self.verbose:
             print("Mapping the column names for the join.", file=self.error_file, flush=True)
-        joined_column_names: typing.List[str]
-        right_column_names: typing.List[str]
-        (joined_column_names, right_column_names)  = self.merge_columns(left_kr, right_kr)
+        kmc: KgtkMergeColumns = KgtkMergeColumns()
+        kmc.merge(left_kr.column_names)
+        right_column_names: typing.List[str] = kmc.merge(right_kr.column_names, prefix=self.prefix)
+        joined_column_names: typing.List[str] = kmc.column_names
 
         if self.verbose:
             print("       left   columns: %s" % " ".join(left_kr.column_names), file=self.error_file, flush=True)

From c632deac5a5dbebb283926d0be357cb91f657a68 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 13 May 2020 00:51:50 -0700
Subject: [PATCH 176/278] Add a kgtk join command.  Add more defaults.

---
 kgtk/cli/join.py        | 143 ++++++++++++++++++++++++++++++++++++++++
 kgtk/join/ifexists.py   |   2 +-
 kgtk/join/kgtkcat.py    |   2 +-
 kgtk/join/kgtkjoiner.py |   4 +-
 4 files changed, 147 insertions(+), 4 deletions(-)
 create mode 100644 kgtk/cli/join.py

diff --git a/kgtk/cli/join.py b/kgtk/cli/join.py
new file mode 100644
index 000000000..20c585e24
--- /dev/null
+++ b/kgtk/cli/join.py
@@ -0,0 +1,143 @@
+"""
+Join two KGTK edge files or two KGTK node files.
+
+TODO: Need KgtkWriterOptions
+"""
+
+from argparse import Namespace, SUPPRESS
+from pathlib import Path
+import sys
+import typing
+
+from kgtk.cli_argparse import KGTKArgumentParser
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
+from kgtk.io.kgtkwriter import KgtkWriter
+from kgtk.join.kgtkjoiner import KgtkJoiner
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
+
+def parser():
+    return {
+        'help': 'Join two KGTK files',
+        'description': 'Join two KGTK edge files or two KGTK node files.'
+    }
+
+
+def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace):
+    """
+    Parse arguments
+    Args:
+        parser (argparse.ArgumentParser)
+    """
+
+    _expert: bool = parsed_shared_args._expert
+
+    # This helper function makes it easy to suppress options from
+    # The help message.  The options are still there, and initialize
+    # what they need to initialize.
+    def h(msg: str)->str:
+        if _expert:
+            return msg
+        else:
+            return SUPPRESS
+
+    parser.add_argument(      "left_file_path", help="The left-side KGTK file to join. Use '-' for stdin (default=%(default)s).", type=Path, default="-")
+
+    parser.add_argument(      "right_file_path", help="The right-side KGTK file to join (no default).", type=Path, default="-")
+
+    parser.add_argument(      "--join-on-label", dest="join_on_label",
+                              help="If both input files are edge files, include the label column in the join (default=%(default)s).",
+                              action='store_true')
+
+    parser.add_argument(      "--join-on-node2", dest="join_on_node2",
+                              help="If both input files are edge files, include the node2 column in the join (default=%(default)s).",
+                              action='store_true')
+    
+    parser.add_argument(      "--left-file-join-columns", dest="left_join_columns", help="Left file join columns (default=None).", nargs='+')
+
+    parser.add_argument(      "--left-join", dest="left_join", help="Perform a left outer join (default=%(default)s).", action='store_true')
+
+    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-")
+
+    parser.add_argument(      "--prefix", dest="prefix",
+                              help="An optional prefix applied to right file column names in the output file (default=None).")
+    
+    parser.add_argument(      "--right-file-join-columns", dest="right_join_columns", help="Right file join columns (default=None).", nargs='+')
+    
+    parser.add_argument(      "--right-join", dest="right_join", help="Perform a right outer join (default=%(default)s).", action='store_true')
+
+    # This argument is retained for compatability with earlier versions of this command.
+    parser.add_argument(      "--error-limit", dest="error_limit",
+                              help=h("The maximum number of errors per input fule (default=%(default)s)"),
+                              default=KgtkReaderOptions.ERROR_LIMIT_DEFAULT)
+
+    parser.add_argument(      "--field-separator", dest="field_separator",
+                              help=h("Separator for multifield keys (default=%(default)s)")
+                              , default=KgtkJoiner.FIELD_SEPARATOR_DEFAULT)
+
+    KgtkReader.add_debug_arguments(parser, expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="left", expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="right", expert=_expert)
+    KgtkValueOptions.add_arguments(parser, expert=_expert)
+
+def run(left_file_path: Path,
+        right_file_path: Path,
+        left_join: bool,
+        right_join: bool,
+        join_on_label: bool,
+        join_on_node2: bool,
+        left_join_columns: typing.Optional[typing.List[str]],
+        right_join_columns: typing.Optional[typing.List[str]],
+        output_file_path: Path,
+        prefix: typing.Optional[str] = None,
+
+        field_separator: str = KgtkJoiner.FIELD_SEPARATOR_DEFAULT,
+
+        errors_to_stdout: bool = False,
+        errors_to_stderr: bool = True,
+        verbose: bool = False,
+        very_verbose: bool = False,
+
+        **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want.
+)->int:
+    # import modules locally
+    from kgtk.exceptions import KGTKException
+
+
+    # Select where to send error messages, defaulting to stderr.
+    error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr
+
+    # Build the option structures.
+    left_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="left", fallback=True)
+    right_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="right", fallback=True)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
+
+    try:
+        kr: KgtkJoiner = KgtkJoiner(
+            left_file_path=left_file_path,
+            right_file_path=right_file_path,
+            output_path=output_file_path,
+            left_join=left_join,
+            right_join=right_join,
+            join_on_label=join_on_label,
+            join_on_node2=join_on_node2,
+            left_join_columns=left_join_columns,
+            right_join_columns=right_join_columns,
+            prefix=prefix,
+            field_separator=field_separator,
+            left_reader_options=left_reader_options,
+            right_reader_options=right_reader_options,
+            value_options=value_options,
+            error_file=error_file,
+            verbose=verbose,
+            very_verbose=very_verbose,
+        )
+        
+        kr.process()
+
+        return 0
+
+    except SystemExit as e:
+        raise KGTKException("Exit requested")
+    except Exception as e:
+        raise KGTKException(str(e))
+
diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index 9e5a788c0..26f1f1965 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -207,7 +207,7 @@ def main():
 
     parser.add_argument(      "--filter-on", dest="filter_file_path", help="The KGTK file with the filter data", type=Path, required=True)
 
-    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to read", type=Path, default=None)
+    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-")
     
     parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=IfExists.FIELD_SEPARATOR_DEFAULT)
    
diff --git a/kgtk/join/kgtkcat.py b/kgtk/join/kgtkcat.py
index 537d54601..7861185d8 100644
--- a/kgtk/join/kgtkcat.py
+++ b/kgtk/join/kgtkcat.py
@@ -157,7 +157,7 @@ def main():
     """
     parser = ArgumentParser()
     parser.add_argument(dest="input_file_paths", help="The KGTK files to concatenate", type=Path, nargs='+')
-    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to read (default=%(default)s)", type=Path, default="-")
+    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s)", type=Path, default="-")
 
     KgtkReader.add_debug_arguments(parser, expert=True)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=True)
diff --git a/kgtk/join/kgtkjoiner.py b/kgtk/join/kgtkjoiner.py
index 332daf235..5c2f70139 100644
--- a/kgtk/join/kgtkjoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -346,8 +346,8 @@ def main():
     parser.add_argument(      "--left-file-join-columns", dest="left_join_columns", help="Left file join columns.", nargs='+')
     parser.add_argument(      "--left-join", dest="left_join", help="Perform a left outer join.", action='store_true')
 
-    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to read", type=Path, default=None)
-    parser.add_argument(      "--prefix", dest="prefix", help="The prefix applied to right file column names in the output file.")
+    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write", type=Path, default=None)
+    parser.add_argument(      "--prefix", dest="prefix", help="An optional prefix applied to right file column names in the output file (default=None).")
     parser.add_argument(      "--right-file-join-columns", dest="right_join_columns", help="Right file join columns.", nargs='+')
     parser.add_argument(      "--right-join", dest="right_join", help="Perform a right outer join.", action='store_true')
 

From 7cbbcb3c199bd12b95e0f9335e5358f3dc8120f2 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 13 May 2020 01:04:09 -0700
Subject: [PATCH 177/278] Check for forbidden uses of stdin.

---
 kgtk/cli/join.py | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/kgtk/cli/join.py b/kgtk/cli/join.py
index 20c585e24..1c6386e56 100644
--- a/kgtk/cli/join.py
+++ b/kgtk/cli/join.py
@@ -18,7 +18,7 @@
 def parser():
     return {
         'help': 'Join two KGTK files',
-        'description': 'Join two KGTK edge files or two KGTK node files.'
+        'description': 'Join two KGTK edge files or two KGTK node files. '
     }
 
 
@@ -40,9 +40,9 @@ def h(msg: str)->str:
         else:
             return SUPPRESS
 
-    parser.add_argument(      "left_file_path", help="The left-side KGTK file to join. Use '-' for stdin (default=%(default)s).", type=Path, default="-")
+    parser.add_argument(      "left_file_path", help="The left-side KGTK file to join (no default).", type=Path, default=None)
 
-    parser.add_argument(      "right_file_path", help="The right-side KGTK file to join (no default).", type=Path, default="-")
+    parser.add_argument(      "right_file_path", help="The right-side KGTK file to join (no default).", type=Path, default=None)
 
     parser.add_argument(      "--join-on-label", dest="join_on_label",
                               help="If both input files are edge files, include the label column in the join (default=%(default)s).",
@@ -79,8 +79,8 @@ def h(msg: str)->str:
     KgtkReaderOptions.add_arguments(parser, mode_options=True, who="right", expert=_expert)
     KgtkValueOptions.add_arguments(parser, expert=_expert)
 
-def run(left_file_path: Path,
-        right_file_path: Path,
+def run(left_file_path: typing.Optional[Path],
+        right_file_path: typing.Optional[Path],
         left_join: bool,
         right_join: bool,
         join_on_label: bool,
@@ -106,6 +106,27 @@ def run(left_file_path: Path,
     # Select where to send error messages, defaulting to stderr.
     error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr
 
+    if not right_join:
+        if left_file_path is None or str(left_file_path) == "-":
+            print("The left file may not be stdin when an inner join or left join is requested.", file=error_file, flush=True)
+            return 1
+
+    if not left_join:
+        if right_file_path is None or str(right_file_path) == "-":
+            print("The right file may not be stdin when an inner join or right join is requested.", file=error_file, flush=True)
+            return 1
+
+    if (left_file_path is None or str(left_file_path) == "-") and (right_file_path is None or str(right_file_path) == "-"):
+        print("The left and right files may not both be stdin.", file=error_file, flush=True)
+        return 1
+
+    if left_file_path is None:
+        left_file_path = Path("-")
+        
+    if right_file_path is None:
+        right_file_path = Path("-")
+        
+
     # Build the option structures.
     left_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="left", fallback=True)
     right_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="right", fallback=True)

From fd39326c4dc1877d04b6b9d794aaeef17a12a75c Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 13 May 2020 01:05:24 -0700
Subject: [PATCH 178/278] Improve the description.

---
 kgtk/cli/join.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/cli/join.py b/kgtk/cli/join.py
index 1c6386e56..10e754c58 100644
--- a/kgtk/cli/join.py
+++ b/kgtk/cli/join.py
@@ -18,7 +18,7 @@
 def parser():
     return {
         'help': 'Join two KGTK files',
-        'description': 'Join two KGTK edge files or two KGTK node files. '
+        'description': 'Join two KGTK edge files or two KGTK node files. Two passes may be needed, stdin may be forbidden.'
     }
 
 

From 0613b4d3c6996fb32d4ddb79c0a6880869587120 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 13 May 2020 01:14:26 -0700
Subject: [PATCH 179/278] Add a kgtk cat command.

---
 kgtk/cli/cat.py      | 92 ++++++++++++++++++++++++++++++++++++++++++++
 kgtk/join/kgtkcat.py |  4 +-
 2 files changed, 94 insertions(+), 2 deletions(-)
 create mode 100644 kgtk/cli/cat.py

diff --git a/kgtk/cli/cat.py b/kgtk/cli/cat.py
new file mode 100644
index 000000000..e911c98c6
--- /dev/null
+++ b/kgtk/cli/cat.py
@@ -0,0 +1,92 @@
+"""
+Concatenate KGTK files.
+
+TODO: Need KgtkWriterOptions
+"""
+
+from argparse import Namespace, SUPPRESS
+from pathlib import Path
+import sys
+import typing
+
+from kgtk.cli_argparse import KGTKArgumentParser
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
+from kgtk.io.kgtkwriter import KgtkWriter
+from kgtk.join.kgtkcat import KgtkCat
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
+
+def parser():
+    return {
+        'help': 'Join two KGTK files',
+        'description': 'Join two KGTK edge files or two KGTK node files. Two passes may be needed, stdin may be forbidden.'
+    }
+
+
+def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace):
+    """
+    Parse arguments
+    Args:
+        parser (argparse.ArgumentParser)
+    """
+
+    _expert: bool = parsed_shared_args._expert
+
+    # This helper function makes it easy to suppress options from
+    # The help message.  The options are still there, and initialize
+    # what they need to initialize.
+    def h(msg: str)->str:
+        if _expert:
+            return msg
+        else:
+            return SUPPRESS
+
+    parser.add_argument(      "input_file_paths", help="The KGTK files to concatenate.", type=Path, nargs='+', default=[Path("-")])
+
+    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-")
+
+    KgtkReader.add_debug_arguments(parser, expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
+    KgtkValueOptions.add_arguments(parser, expert=_expert)
+
+def run(input_file_paths: typing.List[Path],
+        output_file_path: Path,
+
+        errors_to_stdout: bool = False,
+        errors_to_stderr: bool = True,
+        verbose: bool = False,
+        very_verbose: bool = False,
+
+        **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want.
+)->int:
+    # import modules locally
+    from kgtk.exceptions import KGTKException
+
+
+    # Select where to send error messages, defaulting to stderr.
+    error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr
+
+    # TODO: check that at most one input file is stdin?
+
+    # Build the option structures.
+    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
+
+    try:
+        kc: KgtkCat = KgtkCat(input_file_paths=input_file_paths,
+                              output_path=output_file_path,
+                              reader_options=reader_options,
+                              value_options=value_options,
+                              error_file=error_file,
+                              verbose=verbose,
+                              very_verbose=very_verbose
+        )
+        
+        kc.process()
+
+        return 0
+
+    except SystemExit as e:
+        raise KGTKException("Exit requested")
+    except Exception as e:
+        raise KGTKException(str(e))
+
diff --git a/kgtk/join/kgtkcat.py b/kgtk/join/kgtkcat.py
index 7861185d8..cb0d6ba8d 100644
--- a/kgtk/join/kgtkcat.py
+++ b/kgtk/join/kgtkcat.py
@@ -171,7 +171,7 @@ def main():
     reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
-    ec: KgtkCat = KgtkCat(input_file_paths=args.input_file_paths,
+    kc: KgtkCat = KgtkCat(input_file_paths=args.input_file_paths,
                           output_path=args.output_file_path,
                           reader_options=reader_options,
                           value_options=value_options,
@@ -179,7 +179,7 @@ def main():
                           verbose=args.verbose,
                           very_verbose=args.very_verbose)
 
-    ec.process()
+    kc.process()
 
 if __name__ == "__main__":
     main()

From e36f94624cbc28aeccfee67447312d6139024504 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Wed, 13 May 2020 08:27:39 -0700
Subject: [PATCH 180/278] no change of - to _ in qnode

---
 kgtk/triple_generator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index a948908ee..c79789d3b 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -21,7 +21,7 @@
 )
 from etk.knowledge_graph.node import LiteralType
 
-BAD_CHARS = [":", "-", "&", ",", " ",
+BAD_CHARS = [":", "&", ",", " ",
              "(", ")", "\'", '\"', "/", "\\", "[", "]", ";", "|"]
 
 

From 14d4e3813913deef48aff711f637226da90d7ccb Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 13 May 2020 10:31:36 -0700
Subject: [PATCH 181/278] Refactor slightly and close input files on an error
 return.

---
 kgtk/join/kgtkjoiner.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/kgtk/join/kgtkjoiner.py b/kgtk/join/kgtkjoiner.py
index 5c2f70139..a1662ae0e 100644
--- a/kgtk/join/kgtkjoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -213,6 +213,21 @@ def join_key_sets(self, left_join_idx_list: typing.List[int], right_join_idx_lis
                 print("There are %d keys in the inner join key set." % len(join_key_set), file=self.error_file, flush=True)
             return join_key_set
     
+    def ok_to_join(self, left_kr: KgtkReader, right_kr: KgtkReader)->bool:
+        if left_kr.is_edge_file and right_kr.is_edge_file:
+            if self.verbose:
+                print("Both input files are edge files.", file=self.error_file, flush=True)
+            return True
+
+        elif left_kr.is_node_file and right_kr.is_node_file:
+            if self.verbose:
+                print("Both input files are node files.", file=self.error_file, flush=True)
+            return True
+
+        else:
+            print("Cannot join edge and node files.", file=self.error_file, flush=True)
+            return False
+
     def process(self):
         if self.verbose:
             print("Opening the left edge file: %s" % str(self.left_file_path), file=self.error_file, flush=True)
@@ -235,15 +250,10 @@ def process(self):
                                                very_verbose=self.very_verbose
         )
 
-        if left_kr.is_edge_file and right_kr.is_edge_file:
-            if self.verbose:
-                print("Both input files are edge files.", file=self.error_file, flush=True)
-        elif left_kr.is_node_file and right_kr.is_node_file:
-            if self.verbose:
-                print("Both input files are node files.", file=self.error_file, flush=True)
-        else:
-            print("Cannot join edge and node files.", file=self.error_file, flush=True)
-            return
+        if not self.ok_to_join(left_kr, right_kr):
+            left_kr.close()
+            right_kr.close()
+            return 1
 
         left_join_idx_list: typing.List[int] = self.build_join_idx_list(left_kr, self.LEFT, self.left_join_columns)
         right_join_idx_list: typing.List[int] = self.build_join_idx_list(right_kr, self.RIGHT, self.right_join_columns)
@@ -251,7 +261,7 @@ def process(self):
             print("the left join key has %d components, the right join key has %d columns. Exiting." % (len(left_join_idx_list), len(right_join_idx_list)), file=self.error_file, flush=True)
             left_kr.close()
             right_kr.close()
-            return
+            return 1
 
         # This might open the input files for a second time. This won't work with stdin.
         joined_key_set: typing.Optional[typing.Set[str]] = self.join_key_sets(left_join_idx_list, right_join_idx_list)

From 19c06ec43fe5ac329f430a5987998db884bf1234 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 13 May 2020 10:48:20 -0700
Subject: [PATCH 182/278] Add file processing arguments that apply to both
 files.  Fix the suppression of the --errors-to- arguments.

---
 kgtk/cli/join.py      |  8 +++-----
 kgtk/io/kgtkreader.py | 38 +++++++++++++++++++++++++-------------
 2 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/kgtk/cli/join.py b/kgtk/cli/join.py
index 10e754c58..09044526a 100644
--- a/kgtk/cli/join.py
+++ b/kgtk/cli/join.py
@@ -65,16 +65,14 @@ def h(msg: str)->str:
     
     parser.add_argument(      "--right-join", dest="right_join", help="Perform a right outer join (default=%(default)s).", action='store_true')
 
-    # This argument is retained for compatability with earlier versions of this command.
-    parser.add_argument(      "--error-limit", dest="error_limit",
-                              help=h("The maximum number of errors per input fule (default=%(default)s)"),
-                              default=KgtkReaderOptions.ERROR_LIMIT_DEFAULT)
-
     parser.add_argument(      "--field-separator", dest="field_separator",
                               help=h("Separator for multifield keys (default=%(default)s)")
                               , default=KgtkJoiner.FIELD_SEPARATOR_DEFAULT)
 
+    # Build the command arguments. File arguments can be set for individual
+    # files, or for all files.
     KgtkReader.add_debug_arguments(parser, expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, who="left", expert=_expert)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, who="right", expert=_expert)
     KgtkValueOptions.add_arguments(parser, expert=_expert)
diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index abc5e76fa..23f8e6c0c 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -953,19 +953,31 @@ def h(msg: str)->str:
             else:
                 return SUPPRESS
 
-        # TODO: Fix the argparse bug that prevents these two arguments from
-        # having their help messages suppressed.
-        errors_to = parser.add_mutually_exclusive_group()
-        errors_to.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
-                                     help="Send errors to stdout instead of stderr",
-                                     action="store_true")
-        errors_to.add_argument(      "--errors-to-stderr", dest="errors_to_stderr",
-                                     help="Send errors to stderr instead of stdout",
-                                     action="store_true")
-
-        parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
-
-        parser.add_argument(      "--very-verbose", dest="very_verbose",
+        egroup: _ArgumentGroup = parser.add_argument_group(h("Error and feedback messages"),
+                                                           h("Send error messages and feedback to stderr or stdout, " +
+                                                             "control the amount of feedback and debugging messages."))
+
+        # Avoid the argparse bug that prevents these two arguments from having
+        # their help messages suppressed directly.
+        if expert:
+            errors_to = egroup.add_mutually_exclusive_group()
+            errors_to.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
+                                         help="Send errors to stdout instead of stderr",
+                                         action="store_true")
+            errors_to.add_argument(      "--errors-to-stderr", dest="errors_to_stderr",
+                                         help="Send errors to stderr instead of stdout",
+                                         action="store_true")
+        else:
+            egroup.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
+                                      help=h("Send errors to stdout instead of stderr"),
+                                      action="store_true")
+            egroup.add_argument(      "--errors-to-stderr", dest="errors_to_stderr",
+                                      help=h("Send errors to stderr instead of stdout"),
+                                      action="store_true")
+
+        egroup.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
+
+        egroup.add_argument(      "--very-verbose", dest="very_verbose",
                                   help=h("Print additional progress messages."),
                                   action='store_true')
         

From 92813786ed3b6b91613e1096be675244b866d2cb Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 13 May 2020 10:59:41 -0700
Subject: [PATCH 183/278] Better documentation.

---
 kgtk/cli/join.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/kgtk/cli/join.py b/kgtk/cli/join.py
index 09044526a..f7bf8820a 100644
--- a/kgtk/cli/join.py
+++ b/kgtk/cli/join.py
@@ -18,7 +18,19 @@
 def parser():
     return {
         'help': 'Join two KGTK files',
-        'description': 'Join two KGTK edge files or two KGTK node files. Two passes may be needed, stdin may be forbidden.'
+        'description': """Join two KGTK edge files or two KGTK node files.
+Join keys are extracted from one or both input files and stored in memory,
+then the data is processed in a second pass.
+stdin will not work as an input file if two passes are needed.
+
+The output file contains the union of the columns in the two
+input files, adjusted for predefined name aliasing.
+
+Specify --left-join to get a left outer join.
+Specify --right-join to get a right outer join.
+Specify both to get a full outer join (equivalent to cat).
+Specify neither to get an inner join.
+"""
     }
 
 
@@ -40,9 +52,9 @@ def h(msg: str)->str:
         else:
             return SUPPRESS
 
-    parser.add_argument(      "left_file_path", help="The left-side KGTK file to join (no default).", type=Path, default=None)
+    parser.add_argument(      "left_file_path", help="The left-side KGTK file to join (required).", type=Path, default=None)
 
-    parser.add_argument(      "right_file_path", help="The right-side KGTK file to join (no default).", type=Path, default=None)
+    parser.add_argument(      "right_file_path", help="The right-side KGTK file to join (required).", type=Path, default=None)
 
     parser.add_argument(      "--join-on-label", dest="join_on_label",
                               help="If both input files are edge files, include the label column in the join (default=%(default)s).",

From 7b9783bcb3a10970ad1f0f34dc0a490e6d395ac3 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 13 May 2020 11:02:33 -0700
Subject: [PATCH 184/278] Mention expert mode.

---
 kgtk/cli/join.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kgtk/cli/join.py b/kgtk/cli/join.py
index f7bf8820a..7e88b5449 100644
--- a/kgtk/cli/join.py
+++ b/kgtk/cli/join.py
@@ -30,6 +30,8 @@ def parser():
 Specify --right-join to get a right outer join.
 Specify both to get a full outer join (equivalent to cat).
 Specify neither to get an inner join.
+
+Expert mode provides additional command arguments.
 """
     }
 

From 0285933c708f548fb1da562d370f8fae482efd7a Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Wed, 13 May 2020 12:02:00 -0700
Subject: [PATCH 185/278] add unit testsf or triple generation

---
 kgtk/tests/data/P10.tsv                  |   22 +
 kgtk/tests/data/P10_not_truthy.ttl       |  154 +
 kgtk/tests/data/P10_truthy.ttl           |  169 +
 kgtk/tests/data/Q57160439.tsv            |   40 +
 kgtk/tests/data/Q57160439_not_truthy.ttl |  200 +
 kgtk/tests/data/Q57160439_truthy.ttl     |  224 +
 kgtk/tests/data/wikidata_properties.tsv  | 7440 ++++++++++++++++++++++
 kgtk/tests/test_triple_generation.py     |   97 +
 8 files changed, 8346 insertions(+)
 create mode 100644 kgtk/tests/data/P10.tsv
 create mode 100644 kgtk/tests/data/P10_not_truthy.ttl
 create mode 100644 kgtk/tests/data/P10_truthy.ttl
 create mode 100644 kgtk/tests/data/Q57160439.tsv
 create mode 100644 kgtk/tests/data/Q57160439_not_truthy.ttl
 create mode 100644 kgtk/tests/data/Q57160439_truthy.ttl
 create mode 100644 kgtk/tests/data/wikidata_properties.tsv
 create mode 100644 kgtk/tests/test_triple_generation.py

diff --git a/kgtk/tests/data/P10.tsv b/kgtk/tests/data/P10.tsv
new file mode 100644
index 000000000..412cd3a05
--- /dev/null
+++ b/kgtk/tests/data/P10.tsv
@@ -0,0 +1,22 @@
+node1	property	node2	id
+P10	P1628	http://schema.org/video	P10-P1628-1
+P10	P1628	http://www.w3.org/2006/vcard/ns#Video	P10-P1628-2
+P10	P1629	Q34508	P10-P1629-1
+P10	P1659	P1651	P10-P1659-1
+P10	P1659	P51	P10-P1659-2
+P10	P1659	P18	P10-P1659-3
+P10	P1659	P4238	P10-P1659-4
+P10	P1855	Q4504	P10-P1855-1
+P10	P1855	Q69063653	P10-P1855-2
+P10	P1855	Q15075950	P10-P1855-3
+P10	P1855	Q7378	P10-P1855-4
+P10	P2302	Q21510852	P10-P2302-1
+P10	P2302	Q21502404	P10-P2302-2
+P10	P2302	Q21510851	P10-P2302-3
+P10	P31	Q18610173	P10-P31-1
+P10	aliases	'media'@en	P10-alias-0
+P10	aliases	'animation'@en	P10-alias-1
+P10	aliases	'gif'@en	P10-alias-2
+P10	aliases	'trailer (Commons)'@en	P10-alias-3
+P10	descriptions	"'relevant video. For images, use the property P18. For film trailers, qualify with ""object has role"" (P3831)=""trailer"" (Q622550)'@en"	P10-description-0
+P10	label	'video'@en	P10-label-1
diff --git a/kgtk/tests/data/P10_not_truthy.ttl b/kgtk/tests/data/P10_not_truthy.ttl
new file mode 100644
index 000000000..688da992a
--- /dev/null
+++ b/kgtk/tests/data/P10_not_truthy.ttl
@@ -0,0 +1,154 @@
+@prefix wikibase: <http://wikiba.se/ontology#> .
+@prefix wd: <http://www.wikidata.org/entity/> .
+@prefix wdt: <http://www.wikidata.org/prop/direct/> .
+@prefix wdtn: <http://www.wikidata.org/prop/direct-normalized/> .
+@prefix wdno: <http://www.wikidata.org/prop/novalue/> .
+@prefix wds: <http://www.wikidata.org/entity/statement/> .
+@prefix wdv: <http://www.wikidata.org/value/> .
+@prefix wdref: <http://www.wikidata.org/reference/> .
+@prefix p: <http://www.wikidata.org/prop/> .
+@prefix pr: <http://www.wikidata.org/prop/reference/> .
+@prefix prv: <http://www.wikidata.org/prop/reference/value/> .
+@prefix prn: <http://www.wikidata.org/prop/reference/value-normalized/> .
+@prefix ps: <http://www.wikidata.org/prop/statement/> .
+@prefix psv: <http://www.wikidata.org/prop/statement/value/> .
+@prefix psn: <http://www.wikidata.org/prop/statement/value-normalized/> .
+@prefix pq: <http://www.wikidata.org/prop/qualifier/> .
+@prefix pqv: <http://www.wikidata.org/prop/qualifier/value/> .
+@prefix pqn: <http://www.wikidata.org/prop/qualifier/value-normalized/> .
+@prefix prov: <http://www.w3.org/ns/prov#> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
+@prefix schema: <http://schema.org/> .
+
+wd:P10 a wikibase:Property ;
+    rdfs:label "video"@en ;
+    schema:description "relevant video. For images, use the property P18. For film trailers, qualify with object has role (P3831)=trailer (Q622550)"@en ;
+    schema:name "video"@en ;
+    wikibase:claim p:P10 ;
+    wikibase:directClaim wdt:P10 ;
+    wikibase:directClaimNormalized wdtn:P10 ;
+    wikibase:novalue wdno:P10 ;
+    wikibase:propertyType wikibase:String ;
+    wikibase:qualifier pq:P10 ;
+    wikibase:qualifierValue pqv:P10 ;
+    wikibase:qualifierValueNormalized pqn:P10 ;
+    wikibase:reference pr:P10 ;
+    wikibase:referenceValue prv:P10 ;
+    wikibase:referenceValueNormalized prn:P10 ;
+    wikibase:statementProperty ps:P10 ;
+    wikibase:statementValue psv:P10 ;
+    wikibase:statementValueNormalized psn:P10 ;
+    skos:altLabel "animation"@en,
+        "gif"@en,
+        "media"@en,
+        "trailer (Commons)"@en ;
+    skos:prefLabel "video"@en ;
+    p:P1628 wds:P10-P10-P1628-1,
+        wds:P10-P10-P1628-2 ;
+    p:P1629 wds:P10-P10-P1629-1 ;
+    p:P1659 wds:P10-P10-P1659-1,
+        wds:P10-P10-P1659-2,
+        wds:P10-P10-P1659-3,
+        wds:P10-P10-P1659-4 ;
+    p:P1855 wds:P10-P10-P1855-1,
+        wds:P10-P10-P1855-2,
+        wds:P10-P10-P1855-3,
+        wds:P10-P10-P1855-4 ;
+    p:P2302 wds:P10-P10-P2302-1,
+        wds:P10-P10-P2302-2,
+        wds:P10-P10-P2302-3 ;
+    p:P31 wds:P10-P10-P31-1 .
+
+wd:Q15075950 a wikibase:Item .
+
+wd:Q18610173 a wikibase:Item .
+
+wd:Q21502404 a wikibase:Item .
+
+wd:Q21510851 a wikibase:Item .
+
+wd:Q21510852 a wikibase:Item .
+
+wd:Q34508 a wikibase:Item .
+
+wd:Q4504 a wikibase:Item .
+
+wd:Q69063653 a wikibase:Item .
+
+wd:Q7378 a wikibase:Item .
+
+wds:P10-P10-P1628-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1628 "http://schema.org/video"^^xsd:string .
+
+wds:P10-P10-P1628-2 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1628 "http://www.w3.org/2006/vcard/ns#Video"^^xsd:string .
+
+wds:P10-P10-P1629-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1629 wd:Q34508 .
+
+wds:P10-P10-P1659-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1659 "P1651"^^xsd:string .
+
+wds:P10-P10-P1659-2 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1659 "P51"^^xsd:string .
+
+wds:P10-P10-P1659-3 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1659 "P18"^^xsd:string .
+
+wds:P10-P10-P1659-4 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1659 "P4238"^^xsd:string .
+
+wds:P10-P10-P1855-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1855 wd:Q4504 .
+
+wds:P10-P10-P1855-2 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1855 wd:Q69063653 .
+
+wds:P10-P10-P1855-3 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1855 wd:Q15075950 .
+
+wds:P10-P10-P1855-4 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1855 wd:Q7378 .
+
+wds:P10-P10-P2302-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P2302 wd:Q21510852 .
+
+wds:P10-P10-P2302-2 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P2302 wd:Q21502404 .
+
+wds:P10-P10-P2302-3 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P2302 wd:Q21510851 .
+
+wds:P10-P10-P31-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P31 wd:Q18610173 .
+
diff --git a/kgtk/tests/data/P10_truthy.ttl b/kgtk/tests/data/P10_truthy.ttl
new file mode 100644
index 000000000..eb2a54df1
--- /dev/null
+++ b/kgtk/tests/data/P10_truthy.ttl
@@ -0,0 +1,169 @@
+@prefix wikibase: <http://wikiba.se/ontology#> .
+@prefix wd: <http://www.wikidata.org/entity/> .
+@prefix wdt: <http://www.wikidata.org/prop/direct/> .
+@prefix wdtn: <http://www.wikidata.org/prop/direct-normalized/> .
+@prefix wdno: <http://www.wikidata.org/prop/novalue/> .
+@prefix wds: <http://www.wikidata.org/entity/statement/> .
+@prefix wdv: <http://www.wikidata.org/value/> .
+@prefix wdref: <http://www.wikidata.org/reference/> .
+@prefix p: <http://www.wikidata.org/prop/> .
+@prefix pr: <http://www.wikidata.org/prop/reference/> .
+@prefix prv: <http://www.wikidata.org/prop/reference/value/> .
+@prefix prn: <http://www.wikidata.org/prop/reference/value-normalized/> .
+@prefix ps: <http://www.wikidata.org/prop/statement/> .
+@prefix psv: <http://www.wikidata.org/prop/statement/value/> .
+@prefix psn: <http://www.wikidata.org/prop/statement/value-normalized/> .
+@prefix pq: <http://www.wikidata.org/prop/qualifier/> .
+@prefix pqv: <http://www.wikidata.org/prop/qualifier/value/> .
+@prefix pqn: <http://www.wikidata.org/prop/qualifier/value-normalized/> .
+@prefix prov: <http://www.w3.org/ns/prov#> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
+@prefix schema: <http://schema.org/> .
+
+wd:P10 a wikibase:Property ;
+    rdfs:label "video"@en ;
+    schema:description "relevant video. For images, use the property P18. For film trailers, qualify with object has role (P3831)=trailer (Q622550)"@en ;
+    schema:name "video"@en ;
+    wikibase:claim p:P10 ;
+    wikibase:directClaim wdt:P10 ;
+    wikibase:directClaimNormalized wdtn:P10 ;
+    wikibase:novalue wdno:P10 ;
+    wikibase:propertyType wikibase:String ;
+    wikibase:qualifier pq:P10 ;
+    wikibase:qualifierValue pqv:P10 ;
+    wikibase:qualifierValueNormalized pqn:P10 ;
+    wikibase:reference pr:P10 ;
+    wikibase:referenceValue prv:P10 ;
+    wikibase:referenceValueNormalized prn:P10 ;
+    wikibase:statementProperty ps:P10 ;
+    wikibase:statementValue psv:P10 ;
+    wikibase:statementValueNormalized psn:P10 ;
+    skos:altLabel "animation"@en,
+        "gif"@en,
+        "media"@en,
+        "trailer (Commons)"@en ;
+    skos:prefLabel "video"@en ;
+    p:P1628 wds:P10-P10-P1628-1,
+        wds:P10-P10-P1628-2 ;
+    p:P1629 wds:P10-P10-P1629-1 ;
+    p:P1659 wds:P10-P10-P1659-1,
+        wds:P10-P10-P1659-2,
+        wds:P10-P10-P1659-3,
+        wds:P10-P10-P1659-4 ;
+    p:P1855 wds:P10-P10-P1855-1,
+        wds:P10-P10-P1855-2,
+        wds:P10-P10-P1855-3,
+        wds:P10-P10-P1855-4 ;
+    p:P2302 wds:P10-P10-P2302-1,
+        wds:P10-P10-P2302-2,
+        wds:P10-P10-P2302-3 ;
+    p:P31 wds:P10-P10-P31-1 ;
+    wdt:P1628 "http://schema.org/video"^^xsd:string,
+        "http://www.w3.org/2006/vcard/ns#Video"^^xsd:string ;
+    wdt:P1629 wd:Q34508 ;
+    wdt:P1659 "P1651"^^xsd:string,
+        "P18"^^xsd:string,
+        "P4238"^^xsd:string,
+        "P51"^^xsd:string ;
+    wdt:P1855 wd:Q15075950,
+        wd:Q4504,
+        wd:Q69063653,
+        wd:Q7378 ;
+    wdt:P2302 wd:Q21502404,
+        wd:Q21510851,
+        wd:Q21510852 ;
+    wdt:P31 wd:Q18610173 .
+
+wds:P10-P10-P1628-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1628 "http://schema.org/video"^^xsd:string .
+
+wds:P10-P10-P1628-2 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1628 "http://www.w3.org/2006/vcard/ns#Video"^^xsd:string .
+
+wds:P10-P10-P1629-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1629 wd:Q34508 .
+
+wds:P10-P10-P1659-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1659 "P1651"^^xsd:string .
+
+wds:P10-P10-P1659-2 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1659 "P51"^^xsd:string .
+
+wds:P10-P10-P1659-3 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1659 "P18"^^xsd:string .
+
+wds:P10-P10-P1659-4 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1659 "P4238"^^xsd:string .
+
+wds:P10-P10-P1855-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1855 wd:Q4504 .
+
+wds:P10-P10-P1855-2 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1855 wd:Q69063653 .
+
+wds:P10-P10-P1855-3 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1855 wd:Q15075950 .
+
+wds:P10-P10-P1855-4 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1855 wd:Q7378 .
+
+wds:P10-P10-P2302-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P2302 wd:Q21510852 .
+
+wds:P10-P10-P2302-2 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P2302 wd:Q21502404 .
+
+wds:P10-P10-P2302-3 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P2302 wd:Q21510851 .
+
+wds:P10-P10-P31-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P31 wd:Q18610173 .
+
+wd:Q15075950 a wikibase:Item .
+
+wd:Q18610173 a wikibase:Item .
+
+wd:Q21502404 a wikibase:Item .
+
+wd:Q21510851 a wikibase:Item .
+
+wd:Q21510852 a wikibase:Item .
+
+wd:Q34508 a wikibase:Item .
+
+wd:Q4504 a wikibase:Item .
+
+wd:Q69063653 a wikibase:Item .
+
+wd:Q7378 a wikibase:Item .
+
diff --git a/kgtk/tests/data/Q57160439.tsv b/kgtk/tests/data/Q57160439.tsv
new file mode 100644
index 000000000..2307da78a
--- /dev/null
+++ b/kgtk/tests/data/Q57160439.tsv
@@ -0,0 +1,40 @@
+node1	property	node2	id
+Q57160439	P1433	Q1146531	Q57160439-P1433-1
+Q57160439	P1476	'A quantitative test to estimate neutralizing antibodies to the hepatitis C virus: cytofluorimetric assessment of envelope glycoprotein 2 binding to target cells'@en	Q57160439-P1476-1
+Q57160439	P2093	D. Rosa	Q57160439-P2093-1
+Q57160439-P2093-1	P1545	1	Q57160439-P2093-1-P1545-1
+Q57160439	P2093	Q. L. Choo	Q57160439-P2093-10
+Q57160439-P2093-10	P1545	10	Q57160439-P2093-10-P1545-1
+Q57160439	P2093	D. Chien	Q57160439-P2093-11
+Q57160439-P2093-11	P1545	11	Q57160439-P2093-11-P1545-1
+Q57160439	P2093	P. Pileri	Q57160439-P2093-12
+Q57160439-P2093-12	P1545	12	Q57160439-P2093-12-P1545-1
+Q57160439	P2093	M. Houghton	Q57160439-P2093-13
+Q57160439-P2093-13	P1545	13	Q57160439-P2093-13-P1545-1
+Q57160439	P2093	S. Abrignani	Q57160439-P2093-14
+Q57160439-P2093-14	P1545	14	Q57160439-P2093-14-P1545-1
+Q57160439	P2093	S. Campagnoli	Q57160439-P2093-2
+Q57160439-P2093-2	P1545	2	Q57160439-P2093-2-P1545-1
+Q57160439	P2093	C. Moretto	Q57160439-P2093-3
+Q57160439-P2093-3	P1545	3	Q57160439-P2093-3-P1545-1
+Q57160439	P2093	E. Guenzi	Q57160439-P2093-4
+Q57160439-P2093-4	P1545	4	Q57160439-P2093-4-P1545-1
+Q57160439	P2093	L. Cousens	Q57160439-P2093-5
+Q57160439-P2093-5	P1545	5	Q57160439-P2093-5-P1545-1
+Q57160439	P2093	M. Chin	Q57160439-P2093-6
+Q57160439-P2093-6	P1545	6	Q57160439-P2093-6-P1545-1
+Q57160439	P2093	C. Dong	Q57160439-P2093-7
+Q57160439-P2093-7	P1545	7	Q57160439-P2093-7-P1545-1
+Q57160439	P2093	A. J. Weiner	Q57160439-P2093-8
+Q57160439-P2093-8	P1545	8	Q57160439-P2093-8-P1545-1
+Q57160439	P2093	J. Y. Lau	Q57160439-P2093-9
+Q57160439-P2093-9	P1545	9	Q57160439-P2093-9-P1545-1
+Q57160439	P304	1759-1763	Q57160439-P304-1
+Q57160439	P31	Q13442814	Q57160439-P31-1
+Q57160439	P356	10.1073/PNAS.93.5.1759	Q57160439-P356-1
+Q57160439	P407	Q1860	Q57160439-P407-1
+Q57160439	P433	5	Q57160439-P433-1
+Q57160439	P478	93	Q57160439-P478-1
+Q57160439	P577	^1996-03-05T00:00:00Z/11	Q57160439-P577-1
+Q57160439	P921	Q79460	Q57160439-P921-1
+Q57160439	label	'A quantitative test to estimate neutralizing antibodies to the hepatitis C virus: cytofluorimetric assessment of envelope glycoprotein 2 binding to target cells'@en	Q57160439-label-1
diff --git a/kgtk/tests/data/Q57160439_not_truthy.ttl b/kgtk/tests/data/Q57160439_not_truthy.ttl
new file mode 100644
index 000000000..58ebdbf77
--- /dev/null
+++ b/kgtk/tests/data/Q57160439_not_truthy.ttl
@@ -0,0 +1,200 @@
+@prefix wikibase: <http://wikiba.se/ontology#> .
+@prefix wd: <http://www.wikidata.org/entity/> .
+@prefix wdt: <http://www.wikidata.org/prop/direct/> .
+@prefix wdtn: <http://www.wikidata.org/prop/direct-normalized/> .
+@prefix wdno: <http://www.wikidata.org/prop/novalue/> .
+@prefix wds: <http://www.wikidata.org/entity/statement/> .
+@prefix wdv: <http://www.wikidata.org/value/> .
+@prefix wdref: <http://www.wikidata.org/reference/> .
+@prefix p: <http://www.wikidata.org/prop/> .
+@prefix pr: <http://www.wikidata.org/prop/reference/> .
+@prefix prv: <http://www.wikidata.org/prop/reference/value/> .
+@prefix prn: <http://www.wikidata.org/prop/reference/value-normalized/> .
+@prefix ps: <http://www.wikidata.org/prop/statement/> .
+@prefix psv: <http://www.wikidata.org/prop/statement/value/> .
+@prefix psn: <http://www.wikidata.org/prop/statement/value-normalized/> .
+@prefix pq: <http://www.wikidata.org/prop/qualifier/> .
+@prefix pqv: <http://www.wikidata.org/prop/qualifier/value/> .
+@prefix pqn: <http://www.wikidata.org/prop/qualifier/value-normalized/> .
+@prefix prov: <http://www.w3.org/ns/prov#> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
+@prefix schema: <http://schema.org/> .
+
+wd:Q57160439 a wikibase:Item ;
+    rdfs:label "A quantitative test to estimate neutralizing antibodies to the hepatitis C virus: cytofluorimetric assessment of envelope glycoprotein 2 binding to target cells"@en ;
+    schema:name "A quantitative test to estimate neutralizing antibodies to the hepatitis C virus: cytofluorimetric assessment of envelope glycoprotein 2 binding to target cells"@en ;
+    skos:prefLabel "A quantitative test to estimate neutralizing antibodies to the hepatitis C virus: cytofluorimetric assessment of envelope glycoprotein 2 binding to target cells"@en ;
+    p:P1433 wds:Q57160439-Q57160439-P1433-1 ;
+    p:P1476 wds:Q57160439-Q57160439-P1476-1 ;
+    p:P2093 wds:Q57160439-Q57160439-P2093-1,
+        wds:Q57160439-Q57160439-P2093-10,
+        wds:Q57160439-Q57160439-P2093-11,
+        wds:Q57160439-Q57160439-P2093-12,
+        wds:Q57160439-Q57160439-P2093-13,
+        wds:Q57160439-Q57160439-P2093-14,
+        wds:Q57160439-Q57160439-P2093-2,
+        wds:Q57160439-Q57160439-P2093-3,
+        wds:Q57160439-Q57160439-P2093-4,
+        wds:Q57160439-Q57160439-P2093-5,
+        wds:Q57160439-Q57160439-P2093-6,
+        wds:Q57160439-Q57160439-P2093-7,
+        wds:Q57160439-Q57160439-P2093-8,
+        wds:Q57160439-Q57160439-P2093-9 ;
+    p:P304 wds:Q57160439-Q57160439-P304-1 ;
+    p:P31 wds:Q57160439-Q57160439-P31-1 ;
+    p:P356 wds:Q57160439-Q57160439-P356-1 ;
+    p:P407 wds:Q57160439-Q57160439-P407-1 ;
+    p:P433 wds:Q57160439-Q57160439-P433-1 ;
+    p:P478 wds:Q57160439-Q57160439-P478-1 ;
+    p:P577 wds:Q57160439-Q57160439-P577-1 ;
+    p:P921 wds:Q57160439-Q57160439-P921-1 .
+
+wd:Q1146531 a wikibase:Item .
+
+wd:Q13442814 a wikibase:Item .
+
+wd:Q1860 a wikibase:Item .
+
+wd:Q79460 a wikibase:Item .
+
+wds:Q57160439-Q57160439-P1433-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1433 wd:Q1146531 .
+
+wds:Q57160439-Q57160439-P1476-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1476 "A quantitative test to estimate neutralizing antibodies to the hepatitis C virus: cytofluorimetric assessment of envelope glycoprotein 2 binding to target cells"@en .
+
+wds:Q57160439-Q57160439-P2093-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "1"^^xsd:string ;
+    ps:P2093 "D. Rosa"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-10 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "10"^^xsd:string ;
+    ps:P2093 "Q. L. Choo"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-11 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "11"^^xsd:string ;
+    ps:P2093 "D. Chien"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-12 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "12"^^xsd:string ;
+    ps:P2093 "P. Pileri"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-13 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "13"^^xsd:string ;
+    ps:P2093 "M. Houghton"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-14 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "14"^^xsd:string ;
+    ps:P2093 "S. Abrignani"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-2 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "2"^^xsd:string ;
+    ps:P2093 "S. Campagnoli"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-3 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "3"^^xsd:string ;
+    ps:P2093 "C. Moretto"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-4 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "4"^^xsd:string ;
+    ps:P2093 "E. Guenzi"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-5 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "5"^^xsd:string ;
+    ps:P2093 "L. Cousens"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-6 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "6"^^xsd:string ;
+    ps:P2093 "M. Chin"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-7 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "7"^^xsd:string ;
+    ps:P2093 "C. Dong"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-8 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "8"^^xsd:string ;
+    ps:P2093 "A. J. Weiner"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-9 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "9"^^xsd:string ;
+    ps:P2093 "J. Y. Lau"^^xsd:string .
+
+wds:Q57160439-Q57160439-P304-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P304 "1759-1763"^^xsd:string .
+
+wds:Q57160439-Q57160439-P31-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P31 wd:Q13442814 .
+
+wds:Q57160439-Q57160439-P356-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P356 "10.1073/PNAS.93.5.1759"^^xsd:string .
+
+wds:Q57160439-Q57160439-P407-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P407 wd:Q1860 .
+
+wds:Q57160439-Q57160439-P433-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P433 "5"^^xsd:string .
+
+wds:Q57160439-Q57160439-P478-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P478 "93"^^xsd:string .
+
+wds:Q57160439-Q57160439-P577-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P577 "1996-03-05T00:00:00"^^xsd:dateTime ;
+    psv:P577 wdv:Timec1996-03-05T000000cQc11c0 .
+
+wds:Q57160439-Q57160439-P921-1 a wikibase:Statement ;
+    wikibase:rank wikibase:NormalRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P921 wd:Q79460 .
+
+wdv:Timec1996-03-05T000000cQc11c0 a wikibase:Time ;
+    wikibase:timeCalendarModel wd:Q1985727 ;
+    wikibase:timePrecision 11 ;
+    wikibase:timeTimezone 0 ;
+    wikibase:timeValue "1996-03-05T00:00:00"^^xsd:dateTime .
+
diff --git a/kgtk/tests/data/Q57160439_truthy.ttl b/kgtk/tests/data/Q57160439_truthy.ttl
new file mode 100644
index 000000000..79d181af1
--- /dev/null
+++ b/kgtk/tests/data/Q57160439_truthy.ttl
@@ -0,0 +1,224 @@
+@prefix wikibase: <http://wikiba.se/ontology#> .
+@prefix wd: <http://www.wikidata.org/entity/> .
+@prefix wdt: <http://www.wikidata.org/prop/direct/> .
+@prefix wdtn: <http://www.wikidata.org/prop/direct-normalized/> .
+@prefix wdno: <http://www.wikidata.org/prop/novalue/> .
+@prefix wds: <http://www.wikidata.org/entity/statement/> .
+@prefix wdv: <http://www.wikidata.org/value/> .
+@prefix wdref: <http://www.wikidata.org/reference/> .
+@prefix p: <http://www.wikidata.org/prop/> .
+@prefix pr: <http://www.wikidata.org/prop/reference/> .
+@prefix prv: <http://www.wikidata.org/prop/reference/value/> .
+@prefix prn: <http://www.wikidata.org/prop/reference/value-normalized/> .
+@prefix ps: <http://www.wikidata.org/prop/statement/> .
+@prefix psv: <http://www.wikidata.org/prop/statement/value/> .
+@prefix psn: <http://www.wikidata.org/prop/statement/value-normalized/> .
+@prefix pq: <http://www.wikidata.org/prop/qualifier/> .
+@prefix pqv: <http://www.wikidata.org/prop/qualifier/value/> .
+@prefix pqn: <http://www.wikidata.org/prop/qualifier/value-normalized/> .
+@prefix prov: <http://www.w3.org/ns/prov#> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
+@prefix schema: <http://schema.org/> .
+
+wd:Q57160439 a wikibase:Item ;
+    rdfs:label "A quantitative test to estimate neutralizing antibodies to the hepatitis C virus: cytofluorimetric assessment of envelope glycoprotein 2 binding to target cells"@en ;
+    schema:name "A quantitative test to estimate neutralizing antibodies to the hepatitis C virus: cytofluorimetric assessment of envelope glycoprotein 2 binding to target cells"@en ;
+    skos:prefLabel "A quantitative test to estimate neutralizing antibodies to the hepatitis C virus: cytofluorimetric assessment of envelope glycoprotein 2 binding to target cells"@en ;
+    p:P1433 wds:Q57160439-Q57160439-P1433-1 ;
+    p:P1476 wds:Q57160439-Q57160439-P1476-1 ;
+    p:P2093 wds:Q57160439-Q57160439-P2093-1,
+        wds:Q57160439-Q57160439-P2093-10,
+        wds:Q57160439-Q57160439-P2093-11,
+        wds:Q57160439-Q57160439-P2093-12,
+        wds:Q57160439-Q57160439-P2093-13,
+        wds:Q57160439-Q57160439-P2093-14,
+        wds:Q57160439-Q57160439-P2093-2,
+        wds:Q57160439-Q57160439-P2093-3,
+        wds:Q57160439-Q57160439-P2093-4,
+        wds:Q57160439-Q57160439-P2093-5,
+        wds:Q57160439-Q57160439-P2093-6,
+        wds:Q57160439-Q57160439-P2093-7,
+        wds:Q57160439-Q57160439-P2093-8,
+        wds:Q57160439-Q57160439-P2093-9 ;
+    p:P304 wds:Q57160439-Q57160439-P304-1 ;
+    p:P31 wds:Q57160439-Q57160439-P31-1 ;
+    p:P356 wds:Q57160439-Q57160439-P356-1 ;
+    p:P407 wds:Q57160439-Q57160439-P407-1 ;
+    p:P433 wds:Q57160439-Q57160439-P433-1 ;
+    p:P478 wds:Q57160439-Q57160439-P478-1 ;
+    p:P577 wds:Q57160439-Q57160439-P577-1 ;
+    p:P921 wds:Q57160439-Q57160439-P921-1 ;
+    wdt:P1433 wd:Q1146531 ;
+    wdt:P1476 "A quantitative test to estimate neutralizing antibodies to the hepatitis C virus: cytofluorimetric assessment of envelope glycoprotein 2 binding to target cells"@en ;
+    wdt:P2093 "A. J. Weiner"^^xsd:string,
+        "C. Dong"^^xsd:string,
+        "C. Moretto"^^xsd:string,
+        "D. Chien"^^xsd:string,
+        "D. Rosa"^^xsd:string,
+        "E. Guenzi"^^xsd:string,
+        "J. Y. Lau"^^xsd:string,
+        "L. Cousens"^^xsd:string,
+        "M. Chin"^^xsd:string,
+        "M. Houghton"^^xsd:string,
+        "P. Pileri"^^xsd:string,
+        "Q. L. Choo"^^xsd:string,
+        "S. Abrignani"^^xsd:string,
+        "S. Campagnoli"^^xsd:string ;
+    wdt:P304 "1759-1763"^^xsd:string ;
+    wdt:P31 wd:Q13442814 ;
+    wdt:P356 "10.1073/PNAS.93.5.1759"^^xsd:string ;
+    wdt:P407 wd:Q1860 ;
+    wdt:P433 "5"^^xsd:string ;
+    wdt:P478 "93"^^xsd:string ;
+    wdt:P577 "1996-03-05T00:00:00"^^xsd:dateTime ;
+    wdt:P921 wd:Q79460 .
+
+wds:Q57160439-Q57160439-P1433-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1433 wd:Q1146531 .
+
+wds:Q57160439-Q57160439-P1476-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P1476 "A quantitative test to estimate neutralizing antibodies to the hepatitis C virus: cytofluorimetric assessment of envelope glycoprotein 2 binding to target cells"@en .
+
+wds:Q57160439-Q57160439-P2093-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "1"^^xsd:string ;
+    ps:P2093 "D. Rosa"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-10 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "10"^^xsd:string ;
+    ps:P2093 "Q. L. Choo"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-11 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "11"^^xsd:string ;
+    ps:P2093 "D. Chien"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-12 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "12"^^xsd:string ;
+    ps:P2093 "P. Pileri"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-13 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "13"^^xsd:string ;
+    ps:P2093 "M. Houghton"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-14 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "14"^^xsd:string ;
+    ps:P2093 "S. Abrignani"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-2 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "2"^^xsd:string ;
+    ps:P2093 "S. Campagnoli"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-3 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "3"^^xsd:string ;
+    ps:P2093 "C. Moretto"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-4 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "4"^^xsd:string ;
+    ps:P2093 "E. Guenzi"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-5 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "5"^^xsd:string ;
+    ps:P2093 "L. Cousens"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-6 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "6"^^xsd:string ;
+    ps:P2093 "M. Chin"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-7 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "7"^^xsd:string ;
+    ps:P2093 "C. Dong"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-8 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "8"^^xsd:string ;
+    ps:P2093 "A. J. Weiner"^^xsd:string .
+
+wds:Q57160439-Q57160439-P2093-9 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    pq:P1545 "9"^^xsd:string ;
+    ps:P2093 "J. Y. Lau"^^xsd:string .
+
+wds:Q57160439-Q57160439-P304-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P304 "1759-1763"^^xsd:string .
+
+wds:Q57160439-Q57160439-P31-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P31 wd:Q13442814 .
+
+wds:Q57160439-Q57160439-P356-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P356 "10.1073/PNAS.93.5.1759"^^xsd:string .
+
+wds:Q57160439-Q57160439-P407-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P407 wd:Q1860 .
+
+wds:Q57160439-Q57160439-P433-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P433 "5"^^xsd:string .
+
+wds:Q57160439-Q57160439-P478-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P478 "93"^^xsd:string .
+
+wds:Q57160439-Q57160439-P577-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P577 "1996-03-05T00:00:00"^^xsd:dateTime ;
+    psv:P577 wdv:Timec1996-03-05T000000cQc11c0 .
+
+wds:Q57160439-Q57160439-P921-1 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P921 wd:Q79460 .
+
+wdv:Timec1996-03-05T000000cQc11c0 a wikibase:Time ;
+    wikibase:timeCalendarModel wd:Q1985727 ;
+    wikibase:timePrecision 11 ;
+    wikibase:timeTimezone 0 ;
+    wikibase:timeValue "1996-03-05T00:00:00"^^xsd:dateTime .
+
+wd:Q1146531 a wikibase:Item .
+
+wd:Q13442814 a wikibase:Item .
+
+wd:Q1860 a wikibase:Item .
+
+wd:Q79460 a wikibase:Item .
+
diff --git a/kgtk/tests/data/wikidata_properties.tsv b/kgtk/tests/data/wikidata_properties.tsv
new file mode 100644
index 000000000..687d3175e
--- /dev/null
+++ b/kgtk/tests/data/wikidata_properties.tsv
@@ -0,0 +1,7440 @@
+node1	label	node2
+P493	property_type	external-identifier
+P494	property_type	external-identifier
+P495	property_type	item
+P496	property_type	external-identifier
+P497	property_type	external-identifier
+P498	property_type	external-identifier
+P500	property_type	item
+P501	property_type	item
+P502	property_type	string
+P503	property_type	external-identifier
+P504	property_type	item
+P505	property_type	item
+P506	property_type	external-identifier
+P507	property_type	external-identifier
+P508	property_type	external-identifier
+P509	property_type	item
+P511	property_type	item
+P512	property_type	item
+P514	property_type	item
+P515	property_type	item
+P516	property_type	item
+P517	property_type	item
+P518	property_type	item
+P520	property_type	item
+P521	property_type	item
+P522	property_type	item
+P523	property_type	item
+P524	property_type	item
+P525	property_type	external-identifier
+P527	property_type	item
+P528	property_type	string
+P529	property_type	string
+P530	property_type	item
+P531	property_type	item
+P532	property_type	item
+P533	property_type	item
+P534	property_type	item
+P535	property_type	external-identifier
+P536	property_type	external-identifier
+P537	property_type	item
+P538	property_type	item
+P539	property_type	external-identifier
+P541	property_type	item
+P542	property_type	item
+P543	property_type	item
+P545	property_type	item
+P546	property_type	item
+P547	property_type	item
+P548	property_type	item
+P549	property_type	external-identifier
+P550	property_type	item
+P551	property_type	item
+P552	property_type	item
+P553	property_type	item
+P554	property_type	string
+P555	property_type	string
+P556	property_type	item
+P557	property_type	external-identifier
+P559	property_type	item
+P560	property_type	item
+P561	property_type	string
+P562	property_type	item
+P563	property_type	external-identifier
+P564	property_type	string
+P565	property_type	item
+P566	property_type	item
+P567	property_type	item
+P568	property_type	item
+P569	property_type	time
+P570	property_type	time
+P571	property_type	time
+P574	property_type	time
+P575	property_type	time
+P576	property_type	time
+P577	property_type	time
+P578	property_type	time
+P579	property_type	item
+P580	property_type	time
+P582	property_type	time
+P585	property_type	time
+P586	property_type	external-identifier
+P587	property_type	external-identifier
+P588	property_type	item
+P589	property_type	item
+P590	property_type	external-identifier
+P591	property_type	string
+P592	property_type	external-identifier
+P593	property_type	string
+P594	property_type	external-identifier
+P595	property_type	external-identifier
+P597	property_type	external-identifier
+P598	property_type	item
+P599	property_type	external-identifier
+P600	property_type	external-identifier
+P604	property_type	external-identifier
+P605	property_type	external-identifier
+P606	property_type	time
+P607	property_type	item
+P608	property_type	item
+P609	property_type	item
+P610	property_type	item
+P611	property_type	item
+P612	property_type	item
+P613	property_type	string
+P617	property_type	string
+P618	property_type	item
+P619	property_type	time
+P620	property_type	time
+P621	property_type	time
+P622	property_type	time
+P624	property_type	item
+P625	property_type	globe-coordinate
+P626	property_type	globe-coordinate
+P627	property_type	string
+P628	property_type	external-identifier
+P629	property_type	item
+P630	property_type	external-identifier
+P631	property_type	item
+P632	property_type	external-identifier
+P633	property_type	external-identifier
+P634	property_type	item
+P635	property_type	external-identifier
+P636	property_type	item
+P637	property_type	external-identifier
+P638	property_type	external-identifier
+P639	property_type	external-identifier
+P640	property_type	external-identifier
+P641	property_type	item
+P642	property_type	item
+P644	property_type	string
+P645	property_type	string
+P646	property_type	external-identifier
+P647	property_type	item
+P648	property_type	external-identifier
+P649	property_type	external-identifier
+P650	property_type	external-identifier
+P651	property_type	external-identifier
+P652	property_type	external-identifier
+P653	property_type	external-identifier
+P654	property_type	item
+P655	property_type	item
+P656	property_type	string
+P657	property_type	external-identifier
+P658	property_type	item
+P659	property_type	item
+P660	property_type	item
+P661	property_type	external-identifier
+P662	property_type	external-identifier
+P663	property_type	external-identifier
+P664	property_type	item
+P665	property_type	external-identifier
+P667	property_type	string
+P668	property_type	external-identifier
+P669	property_type	item
+P670	property_type	string
+P671	property_type	external-identifier
+P672	property_type	external-identifier
+P673	property_type	external-identifier
+P674	property_type	item
+P675	property_type	external-identifier
+P676	property_type	item
+P677	property_type	external-identifier
+P678	property_type	item
+P679	property_type	external-identifier
+P680	property_type	item
+P681	property_type	item
+P682	property_type	item
+P683	property_type	external-identifier
+P684	property_type	item
+P685	property_type	external-identifier
+P686	property_type	external-identifier
+P687	property_type	external-identifier
+P688	property_type	item
+P689	property_type	item
+P690	property_type	item
+P691	property_type	external-identifier
+P692	property_type	string
+P693	property_type	item
+P694	property_type	item
+P695	property_type	external-identifier
+P696	property_type	external-identifier
+P697	property_type	item
+P698	property_type	external-identifier
+P699	property_type	external-identifier
+P700	property_type	external-identifier
+P701	property_type	external-identifier
+P702	property_type	item
+P703	property_type	item
+P704	property_type	external-identifier
+P705	property_type	external-identifier
+P706	property_type	item
+P707	property_type	item
+P708	property_type	item
+P709	property_type	external-identifier
+P710	property_type	item
+P711	property_type	string
+P712	property_type	string
+P713	property_type	string
+P714	property_type	external-identifier
+P715	property_type	external-identifier
+P360	property_type	item
+P361	property_type	item
+P364	property_type	item
+P366	property_type	item
+P367	property_type	string
+P368	property_type	string
+P369	property_type	item
+P370	property_type	string
+P371	property_type	item
+P373	property_type	string
+P374	property_type	external-identifier
+P375	property_type	item
+P376	property_type	item
+P377	property_type	external-identifier
+P380	property_type	external-identifier
+P381	property_type	external-identifier
+P382	property_type	external-identifier
+P393	property_type	string
+P395	property_type	string
+P396	property_type	external-identifier
+P397	property_type	item
+P398	property_type	item
+P399	property_type	item
+P400	property_type	item
+P402	property_type	external-identifier
+P403	property_type	item
+P404	property_type	item
+P405	property_type	item
+P406	property_type	item
+P407	property_type	item
+P408	property_type	item
+P409	property_type	external-identifier
+P410	property_type	item
+P411	property_type	item
+P412	property_type	item
+P413	property_type	item
+P414	property_type	item
+P415	property_type	item
+P416	property_type	string
+P417	property_type	item
+P418	property_type	item
+P421	property_type	item
+P423	property_type	item
+P424	property_type	string
+P425	property_type	item
+P426	property_type	string
+P427	property_type	item
+P428	property_type	external-identifier
+P429	property_type	external-identifier
+P432	property_type	external-identifier
+P433	property_type	string
+P434	property_type	external-identifier
+P435	property_type	external-identifier
+P436	property_type	external-identifier
+P437	property_type	item
+P439	property_type	external-identifier
+P440	property_type	external-identifier
+P442	property_type	external-identifier
+P443	property_type	string
+P444	property_type	string
+P447	property_type	item
+P449	property_type	item
+P450	property_type	item
+P451	property_type	item
+P452	property_type	item
+P453	property_type	item
+P454	property_type	external-identifier
+P455	property_type	external-identifier
+P457	property_type	item
+P458	property_type	external-identifier
+P459	property_type	item
+P460	property_type	item
+P461	property_type	item
+P462	property_type	item
+P463	property_type	item
+P464	property_type	external-identifier
+P465	property_type	string
+P466	property_type	item
+P467	property_type	item
+P468	property_type	item
+P469	property_type	item
+P470	property_type	item
+P473	property_type	string
+P474	property_type	string
+P476	property_type	external-identifier
+P477	property_type	external-identifier
+P478	property_type	string
+P479	property_type	item
+P480	property_type	external-identifier
+P481	property_type	external-identifier
+P483	property_type	item
+P484	property_type	external-identifier
+P485	property_type	item
+P486	property_type	external-identifier
+P487	property_type	string
+P488	property_type	item
+P489	property_type	item
+P490	property_type	string
+P491	property_type	string
+P492	property_type	external-identifier
+P179	property_type	item
+P180	property_type	item
+P181	property_type	string
+P183	property_type	item
+P184	property_type	item
+P185	property_type	item
+P186	property_type	item
+P189	property_type	item
+P190	property_type	item
+P193	property_type	item
+P194	property_type	item
+P195	property_type	item
+P196	property_type	item
+P197	property_type	item
+P199	property_type	item
+P200	property_type	item
+P201	property_type	item
+P205	property_type	item
+P206	property_type	item
+P207	property_type	string
+P208	property_type	item
+P209	property_type	item
+P210	property_type	item
+P212	property_type	external-identifier
+P213	property_type	external-identifier
+P214	property_type	external-identifier
+P215	property_type	string
+P217	property_type	string
+P218	property_type	external-identifier
+P219	property_type	external-identifier
+P220	property_type	external-identifier
+P221	property_type	external-identifier
+P223	property_type	string
+P225	property_type	string
+P227	property_type	external-identifier
+P229	property_type	string
+P230	property_type	string
+P231	property_type	external-identifier
+P232	property_type	external-identifier
+P233	property_type	string
+P234	property_type	external-identifier
+P235	property_type	external-identifier
+P236	property_type	external-identifier
+P237	property_type	item
+P238	property_type	string
+P239	property_type	string
+P240	property_type	string
+P241	property_type	item
+P242	property_type	string
+P243	property_type	external-identifier
+P244	property_type	external-identifier
+P245	property_type	external-identifier
+P246	property_type	string
+P247	property_type	external-identifier
+P248	property_type	item
+P249	property_type	string
+P263	property_type	item
+P264	property_type	item
+P267	property_type	external-identifier
+P268	property_type	external-identifier
+P269	property_type	external-identifier
+P270	property_type	external-identifier
+P271	property_type	external-identifier
+P272	property_type	item
+P274	property_type	string
+P275	property_type	item
+P276	property_type	item
+P277	property_type	item
+P278	property_type	external-identifier
+P279	property_type	item
+P281	property_type	string
+P282	property_type	item
+P286	property_type	item
+P287	property_type	item
+P289	property_type	item
+P291	property_type	item
+P296	property_type	string
+P297	property_type	external-identifier
+P298	property_type	external-identifier
+P299	property_type	external-identifier
+P300	property_type	external-identifier
+P301	property_type	item
+P303	property_type	external-identifier
+P304	property_type	string
+P305	property_type	external-identifier
+P306	property_type	item
+P344	property_type	item
+P345	property_type	external-identifier
+P347	property_type	external-identifier
+P348	property_type	string
+P349	property_type	external-identifier
+P350	property_type	external-identifier
+P351	property_type	external-identifier
+P352	property_type	external-identifier
+P353	property_type	external-identifier
+P354	property_type	external-identifier
+P355	property_type	item
+P356	property_type	external-identifier
+P358	property_type	item
+P359	property_type	external-identifier
+P6	property_type	item
+P10	property_type	string
+P14	property_type	string
+P15	property_type	string
+P16	property_type	item
+P17	property_type	item
+P18	property_type	string
+P19	property_type	item
+P20	property_type	item
+P21	property_type	item
+P22	property_type	item
+P25	property_type	item
+P26	property_type	item
+P27	property_type	item
+P30	property_type	item
+P31	property_type	item
+P35	property_type	item
+P36	property_type	item
+P37	property_type	item
+P38	property_type	item
+P39	property_type	item
+P40	property_type	item
+P41	property_type	string
+P47	property_type	item
+P50	property_type	item
+P51	property_type	string
+P53	property_type	item
+P54	property_type	item
+P57	property_type	item
+P58	property_type	item
+P59	property_type	item
+P61	property_type	item
+P65	property_type	item
+P66	property_type	item
+P69	property_type	item
+P78	property_type	item
+P81	property_type	item
+P84	property_type	item
+P85	property_type	item
+P86	property_type	item
+P87	property_type	item
+P88	property_type	item
+P91	property_type	item
+P92	property_type	item
+P94	property_type	string
+P97	property_type	item
+P98	property_type	item
+P101	property_type	item
+P102	property_type	item
+P103	property_type	item
+P105	property_type	item
+P106	property_type	item
+P108	property_type	item
+P109	property_type	string
+P110	property_type	item
+P111	property_type	item
+P112	property_type	item
+P113	property_type	item
+P114	property_type	item
+P115	property_type	item
+P117	property_type	string
+P118	property_type	item
+P119	property_type	item
+P121	property_type	item
+P122	property_type	item
+P123	property_type	item
+P126	property_type	item
+P127	property_type	item
+P128	property_type	item
+P129	property_type	item
+P131	property_type	item
+P135	property_type	item
+P136	property_type	item
+P137	property_type	item
+P138	property_type	item
+P140	property_type	item
+P141	property_type	item
+P143	property_type	item
+P144	property_type	item
+P149	property_type	item
+P150	property_type	item
+P154	property_type	string
+P155	property_type	item
+P156	property_type	item
+P157	property_type	item
+P158	property_type	string
+P159	property_type	item
+P161	property_type	item
+P162	property_type	item
+P163	property_type	item
+P166	property_type	item
+P167	property_type	item
+P169	property_type	item
+P170	property_type	item
+P171	property_type	item
+P172	property_type	item
+P175	property_type	item
+P176	property_type	item
+P177	property_type	item
+P178	property_type	item
+P716	property_type	external-identifier
+P717	property_type	external-identifier
+P718	property_type	external-identifier
+P720	property_type	item
+P721	property_type	external-identifier
+P722	property_type	external-identifier
+P723	property_type	external-identifier
+P724	property_type	external-identifier
+P725	property_type	item
+P726	property_type	item
+P729	property_type	time
+P730	property_type	time
+P731	property_type	external-identifier
+P732	property_type	external-identifier
+P733	property_type	external-identifier
+P734	property_type	item
+P735	property_type	item
+P736	property_type	item
+P737	property_type	item
+P739	property_type	item
+P740	property_type	item
+P741	property_type	item
+P742	property_type	string
+P744	property_type	item
+P745	property_type	external-identifier
+P746	property_type	time
+P747	property_type	item
+P748	property_type	item
+P749	property_type	item
+P750	property_type	item
+P751	property_type	item
+P756	property_type	item
+P757	property_type	external-identifier
+P758	property_type	external-identifier
+P759	property_type	external-identifier
+P760	property_type	external-identifier
+P761	property_type	external-identifier
+P762	property_type	external-identifier
+P763	property_type	external-identifier
+P764	property_type	external-identifier
+P765	property_type	item
+P767	property_type	item
+P768	property_type	item
+P769	property_type	item
+P770	property_type	item
+P771	property_type	external-identifier
+P772	property_type	external-identifier
+P773	property_type	external-identifier
+P774	property_type	external-identifier
+P775	property_type	external-identifier
+P776	property_type	external-identifier
+P777	property_type	external-identifier
+P778	property_type	external-identifier
+P779	property_type	external-identifier
+P780	property_type	item
+P781	property_type	external-identifier
+P782	property_type	external-identifier
+P783	property_type	item
+P784	property_type	item
+P785	property_type	item
+P786	property_type	item
+P787	property_type	item
+P788	property_type	item
+P789	property_type	item
+P790	property_type	item
+P791	property_type	string
+P792	property_type	string
+P793	property_type	item
+P795	property_type	item
+P797	property_type	item
+P798	property_type	string
+P799	property_type	string
+P800	property_type	item
+P802	property_type	item
+P803	property_type	item
+P804	property_type	external-identifier
+P805	property_type	item
+P806	property_type	external-identifier
+P807	property_type	item
+P808	property_type	external-identifier
+P809	property_type	external-identifier
+P811	property_type	item
+P812	property_type	item
+P813	property_type	time
+P814	property_type	item
+P815	property_type	external-identifier
+P816	property_type	item
+P817	property_type	item
+P818	property_type	external-identifier
+P819	property_type	external-identifier
+P820	property_type	string
+P821	property_type	external-identifier
+P822	property_type	item
+P823	property_type	item
+P824	property_type	external-identifier
+P825	property_type	item
+P826	property_type	item
+P827	property_type	external-identifier
+P828	property_type	item
+P829	property_type	external-identifier
+P2131	property_type	quantity
+P2132	property_type	quantity
+P2133	property_type	quantity
+P2134	property_type	quantity
+P2135	property_type	quantity
+P2136	property_type	quantity
+P2137	property_type	quantity
+P2138	property_type	quantity
+P2139	property_type	quantity
+P2140	property_type	quantity
+P2141	property_type	quantity
+P2142	property_type	quantity
+P2143	property_type	quantity
+P2144	property_type	quantity
+P2145	property_type	quantity
+P2146	property_type	quantity
+P2147	property_type	quantity
+P2148	property_type	quantity
+P2149	property_type	quantity
+P2150	property_type	quantity
+P2151	property_type	quantity
+P2152	property_type	item
+P2153	property_type	external-identifier
+P2154	property_type	quantity
+P2155	property_type	item
+P2156	property_type	item
+P2158	property_type	external-identifier
+P2159	property_type	item
+P2160	property_type	quantity
+P2161	property_type	external-identifier
+P2162	property_type	external-identifier
+P2163	property_type	external-identifier
+P2164	property_type	external-identifier
+P2165	property_type	external-identifier
+P2166	property_type	external-identifier
+P2167	property_type	external-identifier
+P2168	property_type	external-identifier
+P2169	property_type	external-identifier
+P2170	property_type	external-identifier
+P2171	property_type	external-identifier
+P2172	property_type	external-identifier
+P2173	property_type	external-identifier
+P2174	property_type	external-identifier
+P2175	property_type	item
+P2176	property_type	item
+P2177	property_type	quantity
+P2178	property_type	item
+P2179	property_type	string
+P2180	property_type	external-identifier
+P2181	property_type	external-identifier
+P2182	property_type	external-identifier
+P2183	property_type	string
+P2184	property_type	item
+P2185	property_type	external-identifier
+P2186	property_type	external-identifier
+P2187	property_type	external-identifier
+P2188	property_type	external-identifier
+P2189	property_type	external-identifier
+P2190	property_type	external-identifier
+P2191	property_type	external-identifier
+P2192	property_type	external-identifier
+P2193	property_type	external-identifier
+P2194	property_type	external-identifier
+P2195	property_type	external-identifier
+P2196	property_type	quantity
+P2197	property_type	quantity
+P2198	property_type	quantity
+P2199	property_type	quantity
+P2200	property_type	quantity
+P2201	property_type	quantity
+P2202	property_type	quantity
+P2203	property_type	quantity
+P2204	property_type	quantity
+P2205	property_type	external-identifier
+P2206	property_type	external-identifier
+P2207	property_type	external-identifier
+P2208	property_type	quantity
+P2209	property_type	external-identifier
+P2210	property_type	item
+P2211	property_type	quantity
+P2212	property_type	quantity
+P2213	property_type	quantity
+P2214	property_type	quantity
+P2215	property_type	quantity
+P2216	property_type	quantity
+P2217	property_type	quantity
+P2218	property_type	quantity
+P2219	property_type	quantity
+P2220	property_type	quantity
+P2221	property_type	quantity
+P2222	property_type	quantity
+P2223	property_type	quantity
+P2225	property_type	quantity
+P2226	property_type	quantity
+P2227	property_type	quantity
+P2228	property_type	quantity
+P2229	property_type	quantity
+P2230	property_type	quantity
+P2231	property_type	quantity
+P2232	property_type	quantity
+P1913	property_type	item
+P1914	property_type	item
+P1915	property_type	item
+P1916	property_type	item
+P1917	property_type	item
+P1918	property_type	item
+P1919	property_type	external-identifier
+P1920	property_type	external-identifier
+P1921	property_type	string
+P1922	property_type	monolingualtext
+P1923	property_type	item
+P1924	property_type	item
+P1925	property_type	external-identifier
+P1928	property_type	external-identifier
+P1929	property_type	external-identifier
+P1930	property_type	external-identifier
+P1931	property_type	string
+P1932	property_type	string
+P1933	property_type	external-identifier
+P1934	property_type	external-identifier
+P1935	property_type	external-identifier
+P1936	property_type	external-identifier
+P1937	property_type	external-identifier
+P1938	property_type	external-identifier
+P1939	property_type	external-identifier
+P1940	property_type	external-identifier
+P1942	property_type	string
+P1943	property_type	string
+P1944	property_type	string
+P1945	property_type	string
+P1947	property_type	external-identifier
+P1948	property_type	external-identifier
+P1949	property_type	external-identifier
+P1950	property_type	item
+P1951	property_type	item
+P1952	property_type	external-identifier
+P1953	property_type	external-identifier
+P1954	property_type	external-identifier
+P1955	property_type	external-identifier
+P1956	property_type	item
+P1957	property_type	url
+P1958	property_type	external-identifier
+P1959	property_type	external-identifier
+P1960	property_type	external-identifier
+P1961	property_type	external-identifier
+P1963	property_type	string
+P1966	property_type	external-identifier
+P1967	property_type	external-identifier
+P1968	property_type	external-identifier
+P1969	property_type	external-identifier
+P1970	property_type	external-identifier
+P1971	property_type	quantity
+P1972	property_type	external-identifier
+P1973	property_type	external-identifier
+P1976	property_type	external-identifier
+P1977	property_type	external-identifier
+P1978	property_type	external-identifier
+P1979	property_type	external-identifier
+P1980	property_type	external-identifier
+P1981	property_type	item
+P1982	property_type	external-identifier
+P1983	property_type	external-identifier
+P1984	property_type	external-identifier
+P1985	property_type	external-identifier
+P1986	property_type	external-identifier
+P1987	property_type	string
+P1988	property_type	external-identifier
+P1989	property_type	external-identifier
+P1990	property_type	item
+P1991	property_type	url
+P1992	property_type	external-identifier
+P1993	property_type	string
+P1994	property_type	external-identifier
+P1995	property_type	item
+P1996	property_type	external-identifier
+P1997	property_type	external-identifier
+P1998	property_type	string
+P1999	property_type	item
+P2000	property_type	external-identifier
+P2001	property_type	string
+P2002	property_type	external-identifier
+P2003	property_type	external-identifier
+P2004	property_type	external-identifier
+P2005	property_type	external-identifier
+P2006	property_type	external-identifier
+P2007	property_type	external-identifier
+P2008	property_type	external-identifier
+P2009	property_type	string
+P2010	property_type	string
+P2011	property_type	external-identifier
+P2012	property_type	item
+P2013	property_type	external-identifier
+P2014	property_type	external-identifier
+P2015	property_type	external-identifier
+P2016	property_type	external-identifier
+P2017	property_type	string
+P2018	property_type	external-identifier
+P2019	property_type	external-identifier
+P2020	property_type	external-identifier
+P2021	property_type	quantity
+P2777	property_type	external-identifier
+P2778	property_type	external-identifier
+P2779	property_type	external-identifier
+P2780	property_type	external-identifier
+P2781	property_type	quantity
+P2782	property_type	external-identifier
+P2783	property_type	external-identifier
+P2784	property_type	item
+P2786	property_type	globe-coordinate
+P2787	property_type	quantity
+P2788	property_type	external-identifier
+P2789	property_type	item
+P2790	property_type	quantity
+P2791	property_type	quantity
+P2792	property_type	external-identifier
+P2793	property_type	quantity
+P2794	property_type	external-identifier
+P2795	property_type	monolingualtext
+P2796	property_type	external-identifier
+P2797	property_type	quantity
+P2798	property_type	external-identifier
+P2799	property_type	external-identifier
+P2800	property_type	external-identifier
+P2801	property_type	external-identifier
+P2802	property_type	string
+P2803	property_type	quantity
+P2804	property_type	external-identifier
+P2805	property_type	external-identifier
+P2806	property_type	quantity
+P2807	property_type	quantity
+P2808	property_type	quantity
+P2809	property_type	external-identifier
+P2810	property_type	external-identifier
+P2811	property_type	external-identifier
+P2812	property_type	external-identifier
+P2813	property_type	item
+P2814	property_type	external-identifier
+P2815	property_type	external-identifier
+P2816	property_type	external-identifier
+P2817	property_type	item
+P2818	property_type	external-identifier
+P2819	property_type	external-identifier
+P2820	property_type	item
+P2821	property_type	item
+P2822	property_type	item
+P2823	property_type	external-identifier
+P2824	property_type	external-identifier
+P2825	property_type	item
+P2826	property_type	external-identifier
+P2827	property_type	item
+P2828	property_type	item
+P2829	property_type	external-identifier
+P2830	property_type	external-identifier
+P2831	property_type	item
+P2832	property_type	external-identifier
+P2833	property_type	external-identifier
+P2834	property_type	quantity
+P2835	property_type	quantity
+P2836	property_type	quantity
+P2838	property_type	item
+P2839	property_type	item
+P2840	property_type	external-identifier
+P2841	property_type	item
+P2842	property_type	item
+P2843	property_type	external-identifier
+P2844	property_type	quantity
+P2845	property_type	external-identifier
+P2846	property_type	item
+P2847	property_type	external-identifier
+P2848	property_type	item
+P2849	property_type	item
+P2850	property_type	external-identifier
+P2851	property_type	item
+P2852	property_type	item
+P2853	property_type	item
+P2854	property_type	quantity
+P2855	property_type	quantity
+P2856	property_type	external-identifier
+P2857	property_type	external-identifier
+P2858	property_type	external-identifier
+P2859	property_type	string
+P2860	property_type	item
+P2861	property_type	external-identifier
+P2862	property_type	external-identifier
+P2863	property_type	external-identifier
+P2864	property_type	external-identifier
+P2865	property_type	external-identifier
+P2866	property_type	external-identifier
+P2867	property_type	external-identifier
+P2868	property_type	item
+P2869	property_type	item
+P2870	property_type	external-identifier
+P2871	property_type	external-identifier
+P2872	property_type	item
+P2873	property_type	quantity
+P2874	property_type	external-identifier
+P2875	property_type	item
+P2876	property_type	item
+P2877	property_type	external-identifier
+P2878	property_type	external-identifier
+P2879	property_type	external-identifier
+P2880	property_type	external-identifier
+P2881	property_type	item
+P2882	property_type	item
+P2883	property_type	external-identifier
+P2884	property_type	quantity
+P2886	property_type	external-identifier
+P2887	property_type	external-identifier
+P2888	property_type	url
+P2889	property_type	external-identifier
+P2892	property_type	external-identifier
+P2893	property_type	string
+P2894	property_type	item
+P2895	property_type	quantity
+P2896	property_type	quantity
+P2897	property_type	external-identifier
+P2898	property_type	external-identifier
+P2899	property_type	quantity
+P2900	property_type	string
+P2903	property_type	external-identifier
+P2904	property_type	external-identifier
+P2905	property_type	external-identifier
+P2907	property_type	quantity
+P2908	property_type	external-identifier
+P2909	property_type	external-identifier
+P2910	property_type	string
+P2911	property_type	quantity
+P2912	property_type	item
+P2913	property_type	time
+P2914	property_type	external-identifier
+P2915	property_type	external-identifier
+P2916	property_type	monolingualtext
+P2917	property_type	external-identifier
+P2918	property_type	string
+P2919	property_type	string
+P2922	property_type	item
+P2923	property_type	quantity
+P2924	property_type	external-identifier
+P2925	property_type	item
+P2926	property_type	external-identifier
+P2927	property_type	quantity
+P2928	property_type	quantity
+P2929	property_type	quantity
+P2930	property_type	external-identifier
+P2931	property_type	external-identifier
+P2935	property_type	item
+P2936	property_type	item
+P2937	property_type	item
+P2938	property_type	external-identifier
+P2939	property_type	external-identifier
+P2940	property_type	external-identifier
+P2941	property_type	external-identifier
+P2942	property_type	external-identifier
+P2943	property_type	external-identifier
+P2944	property_type	external-identifier
+P2945	property_type	external-identifier
+P2946	property_type	external-identifier
+P2948	property_type	external-identifier
+P2949	property_type	external-identifier
+P2950	property_type	external-identifier
+P2951	property_type	external-identifier
+P2952	property_type	external-identifier
+P2953	property_type	external-identifier
+P2954	property_type	external-identifier
+P2955	property_type	quantity
+P2956	property_type	external-identifier
+P2957	property_type	quantity
+P2959	property_type	item
+P2960	property_type	time
+P2961	property_type	external-identifier
+P2962	property_type	item
+P2963	property_type	external-identifier
+P2964	property_type	item
+P2965	property_type	external-identifier
+P2966	property_type	external-identifier
+P2967	property_type	external-identifier
+P2968	property_type	external-identifier
+P2969	property_type	external-identifier
+P2970	property_type	external-identifier
+P2971	property_type	external-identifier
+P2972	property_type	external-identifier
+P2973	property_type	external-identifier
+P2974	property_type	item
+P2975	property_type	item
+P2976	property_type	item
+P2977	property_type	external-identifier
+P2978	property_type	item
+P2979	property_type	string
+P2980	property_type	external-identifier
+P2981	property_type	external-identifier
+P2982	property_type	external-identifier
+P2983	property_type	external-identifier
+P2984	property_type	external-identifier
+P2985	property_type	external-identifier
+P2986	property_type	external-identifier
+P2987	property_type	external-identifier
+P2988	property_type	external-identifier
+P2989	property_type	item
+P2990	property_type	external-identifier
+P2991	property_type	external-identifier
+P2233	property_type	quantity
+P2234	property_type	quantity
+P2235	property_type	url
+P2236	property_type	url
+P2238	property_type	item
+P2239	property_type	item
+P2240	property_type	quantity
+P2241	property_type	item
+P2242	property_type	external-identifier
+P2243	property_type	quantity
+P2244	property_type	quantity
+P2248	property_type	quantity
+P2249	property_type	external-identifier
+P2250	property_type	quantity
+P2252	property_type	external-identifier
+P2253	property_type	external-identifier
+P2254	property_type	quantity
+P2255	property_type	external-identifier
+P2257	property_type	quantity
+P2258	property_type	string
+P2259	property_type	string
+P2260	property_type	quantity
+P2261	property_type	quantity
+P2262	property_type	quantity
+P2263	property_type	string
+P2264	property_type	external-identifier
+P2266	property_type	external-identifier
+P2267	property_type	external-identifier
+P2268	property_type	external-identifier
+P2270	property_type	external-identifier
+P2271	property_type	string
+P2272	property_type	external-identifier
+P2273	property_type	external-identifier
+P2275	property_type	monolingualtext
+P2276	property_type	external-identifier
+P2277	property_type	external-identifier
+P2278	property_type	external-identifier
+P2279	property_type	item
+P2280	property_type	external-identifier
+P2281	property_type	external-identifier
+P2282	property_type	external-identifier
+P2283	property_type	item
+P2284	property_type	quantity
+P2285	property_type	time
+P2286	property_type	item
+P2287	property_type	external-identifier
+P2288	property_type	item
+P2289	property_type	item
+P2290	property_type	external-identifier
+P2291	property_type	item
+P2292	property_type	quantity
+P2293	property_type	item
+P2294	property_type	quantity
+P2295	property_type	quantity
+P2296	property_type	quantity
+P2297	property_type	quantity
+P2298	property_type	external-identifier
+P2299	property_type	quantity
+P2300	property_type	quantity
+P2302	property_type	item
+P2303	property_type	item
+P2304	property_type	string
+P2305	property_type	item
+P2306	property_type	string
+P2307	property_type	string
+P2308	property_type	item
+P2309	property_type	item
+P2310	property_type	time
+P2311	property_type	time
+P2312	property_type	quantity
+P2313	property_type	quantity
+P2315	property_type	monolingualtext
+P2316	property_type	item
+P2317	property_type	string
+P2318	property_type	item
+P2319	property_type	item
+P2320	property_type	quantity
+P2321	property_type	item
+P2322	property_type	string
+P2323	property_type	external-identifier
+P2324	property_type	quantity
+P2325	property_type	quantity
+P2326	property_type	external-identifier
+P2327	property_type	external-identifier
+P2328	property_type	external-identifier
+P2329	property_type	item
+P2330	property_type	external-identifier
+P2331	property_type	external-identifier
+P2332	property_type	external-identifier
+P2333	property_type	external-identifier
+P2334	property_type	external-identifier
+P2335	property_type	external-identifier
+P2336	property_type	external-identifier
+P2337	property_type	external-identifier
+P2338	property_type	external-identifier
+P2339	property_type	external-identifier
+P2340	property_type	external-identifier
+P2341	property_type	item
+P2342	property_type	external-identifier
+P2343	property_type	string
+P2024	property_type	external-identifier
+P2025	property_type	external-identifier
+P2026	property_type	external-identifier
+P2027	property_type	external-identifier
+P2028	property_type	external-identifier
+P2029	property_type	external-identifier
+P2030	property_type	external-identifier
+P2031	property_type	time
+P2032	property_type	time
+P2033	property_type	item
+P2034	property_type	external-identifier
+P2036	property_type	external-identifier
+P2037	property_type	external-identifier
+P2038	property_type	external-identifier
+P2040	property_type	external-identifier
+P2041	property_type	external-identifier
+P2042	property_type	external-identifier
+P2043	property_type	quantity
+P2044	property_type	quantity
+P2045	property_type	quantity
+P2046	property_type	quantity
+P2047	property_type	quantity
+P2048	property_type	quantity
+P2049	property_type	quantity
+P2050	property_type	quantity
+P2051	property_type	quantity
+P2052	property_type	quantity
+P2053	property_type	quantity
+P2054	property_type	quantity
+P2055	property_type	quantity
+P2056	property_type	quantity
+P2057	property_type	external-identifier
+P2058	property_type	item
+P2060	property_type	quantity
+P2061	property_type	item
+P2062	property_type	external-identifier
+P2063	property_type	external-identifier
+P2064	property_type	external-identifier
+P2065	property_type	external-identifier
+P2066	property_type	quantity
+P2067	property_type	quantity
+P2068	property_type	quantity
+P2069	property_type	quantity
+P2070	property_type	external-identifier
+P2071	property_type	external-identifier
+P2072	property_type	external-identifier
+P2073	property_type	quantity
+P2074	property_type	external-identifier
+P2075	property_type	quantity
+P2076	property_type	quantity
+P2077	property_type	quantity
+P2078	property_type	url
+P2079	property_type	item
+P2080	property_type	external-identifier
+P2081	property_type	external-identifier
+P2082	property_type	external-identifier
+P2083	property_type	external-identifier
+P2084	property_type	external-identifier
+P2085	property_type	external-identifier
+P2086	property_type	external-identifier
+P2087	property_type	external-identifier
+P2088	property_type	external-identifier
+P2089	property_type	external-identifier
+P2090	property_type	external-identifier
+P2091	property_type	external-identifier
+P2092	property_type	external-identifier
+P2093	property_type	string
+P2094	property_type	item
+P2095	property_type	item
+P2096	property_type	monolingualtext
+P2097	property_type	quantity
+P2098	property_type	item
+P2099	property_type	external-identifier
+P2100	property_type	external-identifier
+P2101	property_type	quantity
+P2102	property_type	quantity
+P2103	property_type	quantity
+P2105	property_type	quantity
+P2106	property_type	external-identifier
+P2107	property_type	quantity
+P2108	property_type	external-identifier
+P2109	property_type	quantity
+P2112	property_type	quantity
+P2113	property_type	quantity
+P2114	property_type	quantity
+P2115	property_type	external-identifier
+P2116	property_type	quantity
+P2117	property_type	quantity
+P2118	property_type	quantity
+P2119	property_type	quantity
+P2120	property_type	quantity
+P2121	property_type	quantity
+P2123	property_type	external-identifier
+P2124	property_type	quantity
+P2125	property_type	string
+P2126	property_type	string
+P2127	property_type	item
+P2128	property_type	quantity
+P2129	property_type	quantity
+P2130	property_type	quantity
+P2344	property_type	external-identifier
+P2345	property_type	external-identifier
+P2346	property_type	external-identifier
+P2347	property_type	external-identifier
+P2348	property_type	item
+P2349	property_type	external-identifier
+P2350	property_type	external-identifier
+P2351	property_type	quantity
+P2352	property_type	item
+P2353	property_type	item
+P2354	property_type	item
+P2355	property_type	external-identifier
+P2357	property_type	string
+P2358	property_type	item
+P2359	property_type	item
+P2360	property_type	item
+P2361	property_type	item
+P2362	property_type	quantity
+P2363	property_type	item
+P2364	property_type	string
+P2365	property_type	item
+P2366	property_type	item
+P2367	property_type	external-identifier
+P2368	property_type	string
+P2369	property_type	external-identifier
+P2370	property_type	quantity
+P2371	property_type	item
+P2372	property_type	external-identifier
+P2373	property_type	external-identifier
+P2374	property_type	quantity
+P2375	property_type	item
+P2376	property_type	item
+P2377	property_type	item
+P2378	property_type	item
+P2379	property_type	item
+P2380	property_type	external-identifier
+P2381	property_type	external-identifier
+P2382	property_type	external-identifier
+P2383	property_type	external-identifier
+P2384	property_type	item
+P2385	property_type	external-identifier
+P2386	property_type	quantity
+P2387	property_type	external-identifier
+P2388	property_type	item
+P2389	property_type	item
+P2390	property_type	external-identifier
+P2391	property_type	external-identifier
+P2392	property_type	item
+P2393	property_type	external-identifier
+P2394	property_type	external-identifier
+P2396	property_type	item
+P2397	property_type	external-identifier
+P2398	property_type	external-identifier
+P2399	property_type	external-identifier
+P2400	property_type	external-identifier
+P2401	property_type	external-identifier
+P2402	property_type	quantity
+P2403	property_type	quantity
+P2404	property_type	quantity
+P2405	property_type	quantity
+P2406	property_type	quantity
+P2407	property_type	quantity
+P2408	property_type	item
+P2409	property_type	external-identifier
+P2410	property_type	string
+P2411	property_type	string
+P2412	property_type	external-identifier
+P2413	property_type	external-identifier
+P2414	property_type	item
+P2415	property_type	quantity
+P2416	property_type	item
+P2417	property_type	item
+P2418	property_type	external-identifier
+P2421	property_type	external-identifier
+P2423	property_type	external-identifier
+P2424	property_type	external-identifier
+P2425	property_type	string
+P2426	property_type	external-identifier
+P2427	property_type	external-identifier
+P2428	property_type	external-identifier
+P2429	property_type	item
+P2430	property_type	quantity
+P2431	property_type	external-identifier
+P2432	property_type	external-identifier
+P2433	property_type	item
+P2434	property_type	external-identifier
+P2435	property_type	external-identifier
+P2436	property_type	quantity
+P2437	property_type	quantity
+P2438	property_type	item
+P2440	property_type	string
+P2441	property_type	monolingualtext
+P2442	property_type	quantity
+P2443	property_type	item
+P2444	property_type	item
+P2445	property_type	item
+P2446	property_type	external-identifier
+P2447	property_type	external-identifier
+P2448	property_type	external-identifier
+P2449	property_type	external-identifier
+P2992	property_type	item
+P2993	property_type	quantity
+P2997	property_type	quantity
+P2998	property_type	quantity
+P2999	property_type	quantity
+P3000	property_type	quantity
+P3001	property_type	quantity
+P3002	property_type	external-identifier
+P3003	property_type	external-identifier
+P3004	property_type	external-identifier
+P3005	property_type	item
+P3006	property_type	external-identifier
+P3007	property_type	external-identifier
+P3008	property_type	external-identifier
+P3009	property_type	external-identifier
+P3010	property_type	external-identifier
+P3012	property_type	external-identifier
+P3013	property_type	quantity
+P3014	property_type	item
+P3015	property_type	item
+P3016	property_type	external-identifier
+P3017	property_type	external-identifier
+P3018	property_type	item
+P3019	property_type	item
+P3020	property_type	quantity
+P3021	property_type	external-identifier
+P3022	property_type	item
+P3023	property_type	external-identifier
+P3024	property_type	external-identifier
+P3025	property_type	item
+P3026	property_type	item
+P3027	property_type	item
+P3028	property_type	item
+P3029	property_type	external-identifier
+P3030	property_type	string
+P3031	property_type	external-identifier
+P3032	property_type	item
+P3033	property_type	item
+P3034	property_type	external-identifier
+P3035	property_type	external-identifier
+P3036	property_type	quantity
+P3037	property_type	item
+P3038	property_type	external-identifier
+P3039	property_type	quantity
+P3040	property_type	external-identifier
+P3041	property_type	quantity
+P3042	property_type	external-identifier
+P3043	property_type	external-identifier
+P3044	property_type	external-identifier
+P3045	property_type	external-identifier
+P3046	property_type	external-identifier
+P3047	property_type	external-identifier
+P3048	property_type	external-identifier
+P3049	property_type	external-identifier
+P3050	property_type	external-identifier
+P3051	property_type	external-identifier
+P3052	property_type	external-identifier
+P3053	property_type	external-identifier
+P3054	property_type	external-identifier
+P3055	property_type	external-identifier
+P3056	property_type	external-identifier
+P3057	property_type	external-identifier
+P3058	property_type	external-identifier
+P3059	property_type	external-identifier
+P3060	property_type	external-identifier
+P3061	property_type	external-identifier
+P3063	property_type	quantity
+P3064	property_type	external-identifier
+P3065	property_type	external-identifier
+P3066	property_type	external-identifier
+P3067	property_type	string
+P3068	property_type	external-identifier
+P3069	property_type	external-identifier
+P3070	property_type	quantity
+P3071	property_type	quantity
+P3072	property_type	external-identifier
+P3073	property_type	external-identifier
+P3074	property_type	external-identifier
+P3075	property_type	item
+P3076	property_type	external-identifier
+P3077	property_type	external-identifier
+P3078	property_type	quantity
+P3080	property_type	item
+P3081	property_type	item
+P3082	property_type	item
+P3083	property_type	external-identifier
+P3085	property_type	item
+P3086	property_type	quantity
+P3087	property_type	quantity
+P3088	property_type	external-identifier
+P3089	property_type	external-identifier
+P3090	property_type	string
+P3091	property_type	item
+P3092	property_type	item
+P3093	property_type	item
+P3094	property_type	item
+P3095	property_type	item
+P3096	property_type	item
+P3097	property_type	external-identifier
+P3098	property_type	external-identifier
+P830	property_type	external-identifier
+P831	property_type	item
+P832	property_type	item
+P833	property_type	item
+P834	property_type	item
+P835	property_type	string
+P836	property_type	external-identifier
+P837	property_type	item
+P838	property_type	external-identifier
+P839	property_type	external-identifier
+P840	property_type	item
+P841	property_type	item
+P842	property_type	external-identifier
+P843	property_type	external-identifier
+P844	property_type	external-identifier
+P845	property_type	external-identifier
+P846	property_type	external-identifier
+P847	property_type	string
+P849	property_type	external-identifier
+P850	property_type	external-identifier
+P852	property_type	item
+P853	property_type	item
+P854	property_type	url
+P855	property_type	url
+P856	property_type	url
+P858	property_type	external-identifier
+P859	property_type	item
+P860	property_type	external-identifier
+P861	property_type	external-identifier
+P862	property_type	external-identifier
+P863	property_type	external-identifier
+P864	property_type	external-identifier
+P865	property_type	external-identifier
+P866	property_type	external-identifier
+P867	property_type	external-identifier
+P868	property_type	item
+P870	property_type	item
+P872	property_type	item
+P873	property_type	item
+P874	property_type	string
+P875	property_type	string
+P876	property_type	string
+P877	property_type	string
+P878	property_type	item
+P879	property_type	string
+P880	property_type	item
+P881	property_type	item
+P882	property_type	external-identifier
+P884	property_type	external-identifier
+P885	property_type	item
+P886	property_type	external-identifier
+P887	property_type	item
+P888	property_type	external-identifier
+P889	property_type	external-identifier
+P892	property_type	external-identifier
+P893	property_type	external-identifier
+P894	property_type	external-identifier
+P897	property_type	string
+P898	property_type	string
+P901	property_type	external-identifier
+P902	property_type	external-identifier
+P905	property_type	external-identifier
+P906	property_type	external-identifier
+P908	property_type	item
+P909	property_type	external-identifier
+P910	property_type	item
+P911	property_type	external-identifier
+P912	property_type	item
+P913	property_type	item
+P914	property_type	item
+P915	property_type	item
+P916	property_type	item
+P917	property_type	external-identifier
+P918	property_type	external-identifier
+P919	property_type	external-identifier
+P920	property_type	string
+P921	property_type	item
+P922	property_type	item
+P923	property_type	item
+P924	property_type	item
+P925	property_type	item
+P926	property_type	item
+P927	property_type	item
+P928	property_type	item
+P929	property_type	item
+P930	property_type	item
+P931	property_type	item
+P932	property_type	external-identifier
+P933	property_type	external-identifier
+P935	property_type	string
+P937	property_type	item
+P938	property_type	external-identifier
+P939	property_type	external-identifier
+P941	property_type	item
+P942	property_type	item
+P943	property_type	item
+P944	property_type	item
+P945	property_type	item
+P946	property_type	string
+P947	property_type	external-identifier
+P4031	property_type	external-identifier
+P4032	property_type	item
+P4033	property_type	external-identifier
+P4034	property_type	external-identifier
+P4035	property_type	external-identifier
+P4036	property_type	quantity
+P4037	property_type	external-identifier
+P4038	property_type	external-identifier
+P4040	property_type	external-identifier
+P4041	property_type	external-identifier
+P4042	property_type	external-identifier
+P4043	property_type	item
+P4044	property_type	item
+P4045	property_type	string
+P4046	property_type	external-identifier
+P4047	property_type	string
+P4048	property_type	external-identifier
+P4050	property_type	external-identifier
+P4051	property_type	external-identifier
+P4052	property_type	external-identifier
+P4053	property_type	external-identifier
+P4054	property_type	external-identifier
+P4055	property_type	external-identifier
+P4056	property_type	external-identifier
+P4057	property_type	external-identifier
+P4058	property_type	external-identifier
+P4059	property_type	external-identifier
+P4060	property_type	external-identifier
+P4061	property_type	external-identifier
+P4062	property_type	external-identifier
+P4063	property_type	external-identifier
+P4065	property_type	external-identifier
+P4066	property_type	external-identifier
+P4067	property_type	external-identifier
+P4068	property_type	external-identifier
+P4069	property_type	external-identifier
+P4070	property_type	item
+P4071	property_type	external-identifier
+P4072	property_type	external-identifier
+P4073	property_type	external-identifier
+P4074	property_type	external-identifier
+P4075	property_type	external-identifier
+P4076	property_type	external-identifier
+P4077	property_type	external-identifier
+P4078	property_type	string
+P4079	property_type	external-identifier
+P4080	property_type	quantity
+P4081	property_type	external-identifier
+P4082	property_type	item
+P4083	property_type	external-identifier
+P4084	property_type	external-identifier
+P4085	property_type	external-identifier
+P4086	property_type	external-identifier
+P4087	property_type	external-identifier
+P4088	property_type	external-identifier
+P4089	property_type	external-identifier
+P4090	property_type	external-identifier
+P4091	property_type	string
+P4092	property_type	string
+P4093	property_type	external-identifier
+P4094	property_type	external-identifier
+P4095	property_type	external-identifier
+P4096	property_type	external-identifier
+P4097	property_type	external-identifier
+P4098	property_type	external-identifier
+P4099	property_type	item
+P4100	property_type	item
+P4101	property_type	item
+P4102	property_type	external-identifier
+P4103	property_type	quantity
+P4104	property_type	external-identifier
+P4105	property_type	quantity
+P4106	property_type	external-identifier
+P4107	property_type	external-identifier
+P4108	property_type	external-identifier
+P4109	property_type	external-identifier
+P4110	property_type	external-identifier
+P4111	property_type	external-identifier
+P4112	property_type	external-identifier
+P4113	property_type	external-identifier
+P4114	property_type	external-identifier
+P4115	property_type	external-identifier
+P4116	property_type	external-identifier
+P4117	property_type	external-identifier
+P4118	property_type	external-identifier
+P4119	property_type	external-identifier
+P4120	property_type	external-identifier
+P4121	property_type	external-identifier
+P4122	property_type	external-identifier
+P4123	property_type	external-identifier
+P4124	property_type	external-identifier
+P4125	property_type	external-identifier
+P4126	property_type	external-identifier
+P4127	property_type	external-identifier
+P4128	property_type	external-identifier
+P4129	property_type	external-identifier
+P4130	property_type	external-identifier
+P4131	property_type	quantity
+P4132	property_type	item
+P4133	property_type	external-identifier
+P2450	property_type	external-identifier
+P2451	property_type	external-identifier
+P2452	property_type	external-identifier
+P2453	property_type	item
+P2454	property_type	external-identifier
+P2455	property_type	external-identifier
+P2456	property_type	external-identifier
+P2457	property_type	external-identifier
+P2458	property_type	external-identifier
+P2459	property_type	external-identifier
+P2460	property_type	external-identifier
+P2461	property_type	external-identifier
+P2462	property_type	item
+P2463	property_type	external-identifier
+P2464	property_type	external-identifier
+P2465	property_type	external-identifier
+P2467	property_type	external-identifier
+P2468	property_type	external-identifier
+P2469	property_type	external-identifier
+P2470	property_type	external-identifier
+P2471	property_type	external-identifier
+P2472	property_type	external-identifier
+P2473	property_type	external-identifier
+P2474	property_type	external-identifier
+P2475	property_type	external-identifier
+P2476	property_type	external-identifier
+P2477	property_type	external-identifier
+P2478	property_type	external-identifier
+P2479	property_type	external-identifier
+P2480	property_type	external-identifier
+P2481	property_type	external-identifier
+P2482	property_type	external-identifier
+P2483	property_type	external-identifier
+P2484	property_type	external-identifier
+P2485	property_type	external-identifier
+P2486	property_type	external-identifier
+P2487	property_type	external-identifier
+P2488	property_type	url
+P2489	property_type	external-identifier
+P2490	property_type	string
+P2491	property_type	external-identifier
+P2492	property_type	external-identifier
+P2493	property_type	external-identifier
+P2494	property_type	external-identifier
+P2496	property_type	external-identifier
+P2497	property_type	external-identifier
+P2498	property_type	external-identifier
+P2499	property_type	item
+P2500	property_type	item
+P2501	property_type	item
+P2502	property_type	item
+P2503	property_type	external-identifier
+P2504	property_type	external-identifier
+P2505	property_type	item
+P2506	property_type	external-identifier
+P2507	property_type	item
+P2508	property_type	external-identifier
+P2509	property_type	external-identifier
+P2510	property_type	external-identifier
+P2511	property_type	external-identifier
+P2512	property_type	item
+P2513	property_type	external-identifier
+P2514	property_type	external-identifier
+P2515	property_type	item
+P2516	property_type	external-identifier
+P2517	property_type	item
+P2518	property_type	external-identifier
+P2519	property_type	external-identifier
+P2520	property_type	url
+P2521	property_type	monolingualtext
+P2522	property_type	item
+P2524	property_type	external-identifier
+P2525	property_type	external-identifier
+P2526	property_type	external-identifier
+P2527	property_type	quantity
+P2528	property_type	quantity
+P2529	property_type	external-identifier
+P2530	property_type	external-identifier
+P2531	property_type	external-identifier
+P2532	property_type	quantity
+P2533	property_type	external-identifier
+P2534	property_type	string
+P2535	property_type	string
+P2536	property_type	external-identifier
+P2537	property_type	external-identifier
+P2538	property_type	external-identifier
+P2539	property_type	external-identifier
+P2540	property_type	string
+P2541	property_type	item
+P2542	property_type	quantity
+P2545	property_type	item
+P2546	property_type	item
+P2547	property_type	quantity
+P2548	property_type	item
+P2549	property_type	external-identifier
+P2550	property_type	item
+P2551	property_type	item
+P2552	property_type	string
+P2553	property_type	item
+P2554	property_type	item
+P4135	property_type	quantity
+P4136	property_type	external-identifier
+P4137	property_type	quantity
+P4138	property_type	external-identifier
+P4139	property_type	external-identifier
+P4140	property_type	quantity
+P4141	property_type	external-identifier
+P4142	property_type	external-identifier
+P4143	property_type	external-identifier
+P4144	property_type	external-identifier
+P4145	property_type	external-identifier
+P4146	property_type	external-identifier
+P4147	property_type	item
+P4149	property_type	item
+P4150	property_type	string
+P4151	property_type	item
+P4152	property_type	string
+P4153	property_type	quantity
+P4154	property_type	external-identifier
+P4155	property_type	string
+P4156	property_type	external-identifier
+P4157	property_type	external-identifier
+P4158	property_type	external-identifier
+P4159	property_type	external-identifier
+P4160	property_type	external-identifier
+P4161	property_type	external-identifier
+P4162	property_type	external-identifier
+P4163	property_type	quantity
+P4164	property_type	external-identifier
+P4165	property_type	external-identifier
+P4166	property_type	external-identifier
+P4167	property_type	external-identifier
+P4168	property_type	external-identifier
+P4169	property_type	external-identifier
+P4170	property_type	external-identifier
+P4171	property_type	external-identifier
+P4172	property_type	external-identifier
+P4173	property_type	external-identifier
+P4174	property_type	external-identifier
+P4175	property_type	external-identifier
+P4176	property_type	quantity
+P4177	property_type	external-identifier
+P4178	property_type	external-identifier
+P4179	property_type	string
+P4180	property_type	external-identifier
+P4181	property_type	external-identifier
+P4182	property_type	external-identifier
+P4183	property_type	quantity
+P4184	property_type	quantity
+P4185	property_type	item
+P4186	property_type	external-identifier
+P4187	property_type	string
+P4188	property_type	string
+P4189	property_type	string
+P4190	property_type	external-identifier
+P4191	property_type	external-identifier
+P4192	property_type	external-identifier
+P4193	property_type	external-identifier
+P4194	property_type	external-identifier
+P4195	property_type	item
+P4196	property_type	string
+P4197	property_type	external-identifier
+P4198	property_type	external-identifier
+P4199	property_type	external-identifier
+P4200	property_type	external-identifier
+P4201	property_type	external-identifier
+P4202	property_type	item
+P4203	property_type	external-identifier
+P4204	property_type	external-identifier
+P4206	property_type	external-identifier
+P4207	property_type	external-identifier
+P4208	property_type	external-identifier
+P4209	property_type	external-identifier
+P4210	property_type	external-identifier
+P4211	property_type	external-identifier
+P4212	property_type	external-identifier
+P4213	property_type	string
+P4214	property_type	quantity
+P4215	property_type	external-identifier
+P4216	property_type	external-identifier
+P4217	property_type	external-identifier
+P4218	property_type	quantity
+P4219	property_type	external-identifier
+P4220	property_type	item
+P4221	property_type	external-identifier
+P4222	property_type	external-identifier
+P4223	property_type	external-identifier
+P4224	property_type	item
+P4225	property_type	string
+P4226	property_type	external-identifier
+P4227	property_type	external-identifier
+P4228	property_type	external-identifier
+P4229	property_type	external-identifier
+P4230	property_type	external-identifier
+P4231	property_type	external-identifier
+P4232	property_type	external-identifier
+P4233	property_type	external-identifier
+P4235	property_type	external-identifier
+P4236	property_type	external-identifier
+P4238	property_type	url
+P2555	property_type	quantity
+P2556	property_type	quantity
+P2557	property_type	quantity
+P2558	property_type	external-identifier
+P2559	property_type	monolingualtext
+P2560	property_type	item
+P2561	property_type	monolingualtext
+P2562	property_type	monolingualtext
+P2563	property_type	item
+P2564	property_type	item
+P2565	property_type	quantity
+P2566	property_type	external-identifier
+P2567	property_type	item
+P2568	property_type	item
+P2571	property_type	item
+P2572	property_type	string
+P2573	property_type	quantity
+P2574	property_type	external-identifier
+P2575	property_type	item
+P2576	property_type	external-identifier
+P2577	property_type	item
+P2578	property_type	item
+P2579	property_type	item
+P2580	property_type	external-identifier
+P2581	property_type	external-identifier
+P2582	property_type	external-identifier
+P2583	property_type	quantity
+P2584	property_type	external-identifier
+P2585	property_type	external-identifier
+P2586	property_type	external-identifier
+P2587	property_type	item
+P2588	property_type	external-identifier
+P2589	property_type	external-identifier
+P2590	property_type	external-identifier
+P2591	property_type	item
+P2592	property_type	external-identifier
+P2593	property_type	external-identifier
+P2595	property_type	quantity
+P2596	property_type	item
+P2597	property_type	item
+P2598	property_type	string
+P2599	property_type	quantity
+P2600	property_type	external-identifier
+P2601	property_type	external-identifier
+P2602	property_type	external-identifier
+P2603	property_type	external-identifier
+P2604	property_type	external-identifier
+P2605	property_type	external-identifier
+P2606	property_type	external-identifier
+P2607	property_type	external-identifier
+P2610	property_type	quantity
+P2611	property_type	external-identifier
+P2612	property_type	external-identifier
+P2613	property_type	external-identifier
+P2614	property_type	item
+P2618	property_type	external-identifier
+P2619	property_type	external-identifier
+P2620	property_type	external-identifier
+P2621	property_type	external-identifier
+P2622	property_type	external-identifier
+P2623	property_type	external-identifier
+P2624	property_type	external-identifier
+P2625	property_type	external-identifier
+P2626	property_type	external-identifier
+P2627	property_type	external-identifier
+P2628	property_type	external-identifier
+P2629	property_type	item
+P2630	property_type	quantity
+P2631	property_type	external-identifier
+P2632	property_type	item
+P2633	property_type	item
+P2634	property_type	item
+P2635	property_type	quantity
+P2636	property_type	external-identifier
+P2637	property_type	item
+P2638	property_type	external-identifier
+P2639	property_type	external-identifier
+P2640	property_type	external-identifier
+P2641	property_type	external-identifier
+P2642	property_type	external-identifier
+P2643	property_type	item
+P2645	property_type	quantity
+P2646	property_type	external-identifier
+P2647	property_type	item
+P2648	property_type	external-identifier
+P2649	property_type	url
+P2650	property_type	item
+P2651	property_type	external-identifier
+P2652	property_type	item
+P2655	property_type	external-identifier
+P2657	property_type	external-identifier
+P2658	property_type	quantity
+P2659	property_type	quantity
+P2660	property_type	quantity
+P2661	property_type	quantity
+P2662	property_type	quantity
+P2663	property_type	quantity
+P2664	property_type	quantity
+P2665	property_type	quantity
+P2666	property_type	external-identifier
+P3099	property_type	external-identifier
+P3100	property_type	external-identifier
+P3101	property_type	external-identifier
+P3102	property_type	external-identifier
+P3103	property_type	item
+P3104	property_type	external-identifier
+P3105	property_type	external-identifier
+P3106	property_type	external-identifier
+P3107	property_type	external-identifier
+P3108	property_type	external-identifier
+P3109	property_type	external-identifier
+P3110	property_type	external-identifier
+P3111	property_type	external-identifier
+P3112	property_type	external-identifier
+P3113	property_type	item
+P3114	property_type	external-identifier
+P3115	property_type	external-identifier
+P3116	property_type	external-identifier
+P3117	property_type	external-identifier
+P3118	property_type	external-identifier
+P3119	property_type	external-identifier
+P3120	property_type	external-identifier
+P3121	property_type	external-identifier
+P3122	property_type	external-identifier
+P3123	property_type	external-identifier
+P3124	property_type	external-identifier
+P3125	property_type	external-identifier
+P3126	property_type	external-identifier
+P3127	property_type	external-identifier
+P3128	property_type	external-identifier
+P3129	property_type	external-identifier
+P3130	property_type	external-identifier
+P3131	property_type	external-identifier
+P3132	property_type	monolingualtext
+P3133	property_type	external-identifier
+P3134	property_type	external-identifier
+P3135	property_type	external-identifier
+P3136	property_type	external-identifier
+P3137	property_type	item
+P3138	property_type	external-identifier
+P3139	property_type	external-identifier
+P3140	property_type	external-identifier
+P3141	property_type	external-identifier
+P3142	property_type	external-identifier
+P3143	property_type	external-identifier
+P3144	property_type	external-identifier
+P3145	property_type	external-identifier
+P3146	property_type	external-identifier
+P3147	property_type	external-identifier
+P3148	property_type	item
+P3149	property_type	item
+P3150	property_type	item
+P3151	property_type	external-identifier
+P3152	property_type	external-identifier
+P3153	property_type	external-identifier
+P3154	property_type	external-identifier
+P3155	property_type	external-identifier
+P3156	property_type	item
+P3157	property_type	quantity
+P3158	property_type	item
+P3159	property_type	external-identifier
+P3160	property_type	external-identifier
+P3161	property_type	item
+P3162	property_type	external-identifier
+P3163	property_type	external-identifier
+P3165	property_type	external-identifier
+P3166	property_type	external-identifier
+P3167	property_type	external-identifier
+P3168	property_type	external-identifier
+P3169	property_type	external-identifier
+P3170	property_type	external-identifier
+P3171	property_type	external-identifier
+P3172	property_type	external-identifier
+P3173	property_type	item
+P3174	property_type	item
+P3175	property_type	external-identifier
+P3176	property_type	string
+P3177	property_type	external-identifier
+P3178	property_type	external-identifier
+P3179	property_type	item
+P3180	property_type	external-identifier
+P3181	property_type	external-identifier
+P3182	property_type	external-identifier
+P3183	property_type	external-identifier
+P3184	property_type	external-identifier
+P3185	property_type	external-identifier
+P3186	property_type	external-identifier
+P3187	property_type	external-identifier
+P3188	property_type	external-identifier
+P3189	property_type	item
+P3190	property_type	item
+P3191	property_type	external-identifier
+P3192	property_type	external-identifier
+P3193	property_type	external-identifier
+P3194	property_type	external-identifier
+P3195	property_type	item
+P3196	property_type	external-identifier
+P3197	property_type	external-identifier
+P3198	property_type	external-identifier
+P3199	property_type	external-identifier
+P948	property_type	string
+P949	property_type	external-identifier
+P950	property_type	external-identifier
+P951	property_type	external-identifier
+P952	property_type	string
+P953	property_type	url
+P954	property_type	external-identifier
+P957	property_type	external-identifier
+P958	property_type	string
+P959	property_type	external-identifier
+P960	property_type	external-identifier
+P961	property_type	external-identifier
+P962	property_type	external-identifier
+P963	property_type	url
+P964	property_type	external-identifier
+P965	property_type	string
+P966	property_type	external-identifier
+P967	property_type	item
+P968	property_type	url
+P969	property_type	string
+P970	property_type	item
+P971	property_type	item
+P972	property_type	item
+P973	property_type	url
+P974	property_type	item
+P980	property_type	external-identifier
+P981	property_type	external-identifier
+P982	property_type	external-identifier
+P984	property_type	external-identifier
+P988	property_type	external-identifier
+P989	property_type	string
+P990	property_type	string
+P991	property_type	item
+P993	property_type	string
+P994	property_type	string
+P995	property_type	string
+P996	property_type	string
+P998	property_type	external-identifier
+P999	property_type	external-identifier
+P1000	property_type	item
+P1001	property_type	item
+P1002	property_type	item
+P1003	property_type	external-identifier
+P1004	property_type	external-identifier
+P1005	property_type	external-identifier
+P1006	property_type	external-identifier
+P1007	property_type	external-identifier
+P1010	property_type	external-identifier
+P1011	property_type	item
+P1012	property_type	item
+P1013	property_type	item
+P1014	property_type	external-identifier
+P1015	property_type	external-identifier
+P1016	property_type	item
+P1017	property_type	external-identifier
+P1018	property_type	item
+P1019	property_type	url
+P1021	property_type	external-identifier
+P1022	property_type	external-identifier
+P1023	property_type	external-identifier
+P1024	property_type	external-identifier
+P1025	property_type	external-identifier
+P1026	property_type	item
+P1027	property_type	item
+P1028	property_type	item
+P1029	property_type	item
+P1030	property_type	string
+P1031	property_type	string
+P1032	property_type	item
+P1033	property_type	item
+P1034	property_type	item
+P1035	property_type	item
+P1036	property_type	string
+P1037	property_type	item
+P1038	property_type	item
+P1039	property_type	item
+P1040	property_type	item
+P1041	property_type	item
+P1042	property_type	external-identifier
+P1043	property_type	external-identifier
+P1044	property_type	external-identifier
+P1045	property_type	external-identifier
+P1046	property_type	item
+P1047	property_type	external-identifier
+P1048	property_type	external-identifier
+P1049	property_type	item
+P1050	property_type	item
+P1051	property_type	external-identifier
+P1052	property_type	external-identifier
+P1053	property_type	external-identifier
+P1054	property_type	external-identifier
+P1055	property_type	external-identifier
+P1056	property_type	item
+P1057	property_type	item
+P1058	property_type	external-identifier
+P1059	property_type	external-identifier
+P1060	property_type	item
+P1064	property_type	item
+P1065	property_type	url
+P1066	property_type	item
+P2667	property_type	item
+P2668	property_type	item
+P2669	property_type	time
+P2670	property_type	item
+P2671	property_type	external-identifier
+P2672	property_type	external-identifier
+P2673	property_type	item
+P2674	property_type	item
+P2675	property_type	item
+P2676	property_type	string
+P2677	property_type	string
+P2678	property_type	external-identifier
+P2679	property_type	item
+P2680	property_type	item
+P2681	property_type	item
+P2682	property_type	item
+P2683	property_type	external-identifier
+P2684	property_type	item
+P2685	property_type	external-identifier
+P2686	property_type	external-identifier
+P2687	property_type	external-identifier
+P2688	property_type	external-identifier
+P2689	property_type	external-identifier
+P2694	property_type	external-identifier
+P2695	property_type	item
+P2696	property_type	external-identifier
+P2697	property_type	external-identifier
+P2698	property_type	external-identifier
+P2699	property_type	url
+P2700	property_type	item
+P2701	property_type	item
+P2702	property_type	item
+P2703	property_type	external-identifier
+P2704	property_type	external-identifier
+P2705	property_type	external-identifier
+P2708	property_type	external-identifier
+P2709	property_type	external-identifier
+P2710	property_type	quantity
+P2712	property_type	quantity
+P2713	property_type	string
+P2715	property_type	item
+P2716	property_type	string
+P2717	property_type	quantity
+P2718	property_type	quantity
+P2719	property_type	string
+P2720	property_type	string
+P2721	property_type	external-identifier
+P2722	property_type	external-identifier
+P2723	property_type	external-identifier
+P2724	property_type	external-identifier
+P2725	property_type	external-identifier
+P2726	property_type	external-identifier
+P2727	property_type	external-identifier
+P2728	property_type	external-identifier
+P2729	property_type	external-identifier
+P2730	property_type	external-identifier
+P2732	property_type	external-identifier
+P2733	property_type	external-identifier
+P2734	property_type	external-identifier
+P2735	property_type	external-identifier
+P2736	property_type	external-identifier
+P2737	property_type	item
+P2738	property_type	item
+P2739	property_type	item
+P2740	property_type	external-identifier
+P2741	property_type	external-identifier
+P2742	property_type	external-identifier
+P2743	property_type	item
+P2744	property_type	string
+P2745	property_type	external-identifier
+P2746	property_type	item
+P2747	property_type	item
+P2748	property_type	external-identifier
+P2749	property_type	external-identifier
+P2750	property_type	external-identifier
+P2751	property_type	external-identifier
+P2752	property_type	external-identifier
+P2753	property_type	external-identifier
+P2754	property_type	time
+P2755	property_type	external-identifier
+P2756	property_type	item
+P2758	property_type	item
+P2759	property_type	external-identifier
+P2760	property_type	external-identifier
+P2761	property_type	external-identifier
+P2762	property_type	external-identifier
+P2763	property_type	external-identifier
+P2764	property_type	external-identifier
+P2765	property_type	external-identifier
+P2766	property_type	external-identifier
+P2767	property_type	external-identifier
+P2768	property_type	external-identifier
+P2769	property_type	quantity
+P2770	property_type	item
+P2771	property_type	external-identifier
+P2772	property_type	external-identifier
+P2773	property_type	external-identifier
+P2774	property_type	external-identifier
+P2775	property_type	external-identifier
+P2776	property_type	external-identifier
+P4851	property_type	quantity
+P4852	property_type	external-identifier
+P4853	property_type	external-identifier
+P4854	property_type	external-identifier
+P4855	property_type	external-identifier
+P4856	property_type	string
+P4857	property_type	external-identifier
+P4858	property_type	external-identifier
+P4859	property_type	external-identifier
+P4860	property_type	external-identifier
+P4861	property_type	external-identifier
+P4862	property_type	external-identifier
+P4863	property_type	string
+P4864	property_type	string
+P4866	property_type	external-identifier
+P4867	property_type	string
+P4868	property_type	external-identifier
+P4869	property_type	external-identifier
+P4870	property_type	external-identifier
+P4871	property_type	external-identifier
+P4872	property_type	external-identifier
+P4873	property_type	item
+P4875	property_type	item
+P4876	property_type	quantity
+P4878	property_type	item
+P4879	property_type	external-identifier
+P4880	property_type	external-identifier
+P4881	property_type	external-identifier
+P4882	property_type	item
+P4883	property_type	external-identifier
+P4884	property_type	item
+P4885	property_type	external-identifier
+P4886	property_type	external-identifier
+P4887	property_type	external-identifier
+P4888	property_type	external-identifier
+P4889	property_type	external-identifier
+P4890	property_type	external-identifier
+P4891	property_type	external-identifier
+P4892	property_type	external-identifier
+P4893	property_type	external-identifier
+P4894	property_type	external-identifier
+P4895	property_type	quantity
+P4896	property_type	string
+P4897	property_type	external-identifier
+P4898	property_type	external-identifier
+P4899	property_type	external-identifier
+P4900	property_type	item
+P4901	property_type	external-identifier
+P4902	property_type	external-identifier
+P4903	property_type	external-identifier
+P4904	property_type	external-identifier
+P4905	property_type	external-identifier
+P4906	property_type	external-identifier
+P4907	property_type	external-identifier
+P4908	property_type	item
+P4909	property_type	quantity
+P4910	property_type	external-identifier
+P4911	property_type	external-identifier
+P4912	property_type	quantity
+P4913	property_type	item
+P4914	property_type	string
+P4915	property_type	item
+P4916	property_type	external-identifier
+P4917	property_type	external-identifier
+P4919	property_type	external-identifier
+P4920	property_type	external-identifier
+P4921	property_type	external-identifier
+P4922	property_type	external-identifier
+P4923	property_type	external-identifier
+P4924	property_type	external-identifier
+P4925	property_type	external-identifier
+P4926	property_type	external-identifier
+P4927	property_type	external-identifier
+P4928	property_type	external-identifier
+P4929	property_type	external-identifier
+P4930	property_type	external-identifier
+P4931	property_type	external-identifier
+P4932	property_type	external-identifier
+P4933	property_type	external-identifier
+P4934	property_type	item
+P4935	property_type	external-identifier
+P4936	property_type	external-identifier
+P4937	property_type	external-identifier
+P4938	property_type	external-identifier
+P4939	property_type	external-identifier
+P4940	property_type	external-identifier
+P4941	property_type	external-identifier
+P4942	property_type	external-identifier
+P4943	property_type	external-identifier
+P4944	property_type	external-identifier
+P4945	property_type	url
+P4946	property_type	external-identifier
+P4947	property_type	external-identifier
+P4948	property_type	external-identifier
+P4949	property_type	external-identifier
+P4950	property_type	external-identifier
+P4951	property_type	string
+P4952	property_type	item
+P4953	property_type	external-identifier
+P4954	property_type	item
+P4239	property_type	monolingualtext
+P4240	property_type	item
+P4241	property_type	item
+P4242	property_type	quantity
+P4243	property_type	string
+P4244	property_type	external-identifier
+P4245	property_type	external-identifier
+P4246	property_type	external-identifier
+P4247	property_type	external-identifier
+P4248	property_type	external-identifier
+P4249	property_type	external-identifier
+P4250	property_type	quantity
+P4251	property_type	external-identifier
+P4252	property_type	external-identifier
+P4253	property_type	quantity
+P4254	property_type	external-identifier
+P4255	property_type	external-identifier
+P4256	property_type	external-identifier
+P4257	property_type	external-identifier
+P4258	property_type	external-identifier
+P4259	property_type	external-identifier
+P4260	property_type	external-identifier
+P4261	property_type	external-identifier
+P4262	property_type	external-identifier
+P4263	property_type	external-identifier
+P4264	property_type	external-identifier
+P4265	property_type	external-identifier
+P4266	property_type	external-identifier
+P4267	property_type	external-identifier
+P4268	property_type	quantity
+P4269	property_type	quantity
+P4270	property_type	external-identifier
+P4271	property_type	item
+P4272	property_type	external-identifier
+P4273	property_type	external-identifier
+P4274	property_type	external-identifier
+P4275	property_type	external-identifier
+P4276	property_type	external-identifier
+P4277	property_type	external-identifier
+P4278	property_type	external-identifier
+P4279	property_type	external-identifier
+P4280	property_type	external-identifier
+P4281	property_type	external-identifier
+P4282	property_type	external-identifier
+P4283	property_type	external-identifier
+P4284	property_type	external-identifier
+P4285	property_type	external-identifier
+P4286	property_type	external-identifier
+P4287	property_type	external-identifier
+P4288	property_type	external-identifier
+P4289	property_type	external-identifier
+P4290	property_type	item
+P4291	property_type	string
+P4292	property_type	item
+P4293	property_type	external-identifier
+P4294	property_type	external-identifier
+P4295	property_type	quantity
+P4296	property_type	quantity
+P4297	property_type	external-identifier
+P4298	property_type	external-identifier
+P4299	property_type	external-identifier
+P4300	property_type	external-identifier
+P4301	property_type	external-identifier
+P4302	property_type	external-identifier
+P4303	property_type	external-identifier
+P4304	property_type	external-identifier
+P4305	property_type	external-identifier
+P4306	property_type	external-identifier
+P4307	property_type	external-identifier
+P4308	property_type	external-identifier
+P4309	property_type	external-identifier
+P4310	property_type	external-identifier
+P4311	property_type	external-identifier
+P4312	property_type	item
+P4313	property_type	external-identifier
+P4314	property_type	external-identifier
+P4315	property_type	external-identifier
+P4316	property_type	string
+P4317	property_type	external-identifier
+P4318	property_type	external-identifier
+P4319	property_type	external-identifier
+P4320	property_type	item
+P4321	property_type	item
+P4322	property_type	item
+P4323	property_type	item
+P4324	property_type	item
+P4325	property_type	string
+P4326	property_type	external-identifier
+P4327	property_type	external-identifier
+P4328	property_type	external-identifier
+P4329	property_type	item
+P4330	property_type	item
+P4331	property_type	external-identifier
+P4332	property_type	external-identifier
+P4333	property_type	external-identifier
+P4334	property_type	external-identifier
+P4335	property_type	external-identifier
+P4336	property_type	external-identifier
+P4337	property_type	external-identifier
+P4338	property_type	external-identifier
+P3200	property_type	external-identifier
+P3201	property_type	external-identifier
+P3202	property_type	external-identifier
+P3203	property_type	external-identifier
+P3204	property_type	external-identifier
+P3205	property_type	item
+P3206	property_type	external-identifier
+P3207	property_type	external-identifier
+P3208	property_type	external-identifier
+P3209	property_type	external-identifier
+P3211	property_type	external-identifier
+P3212	property_type	external-identifier
+P3213	property_type	external-identifier
+P3215	property_type	external-identifier
+P3216	property_type	item
+P3217	property_type	external-identifier
+P3218	property_type	external-identifier
+P3219	property_type	external-identifier
+P3220	property_type	external-identifier
+P3221	property_type	external-identifier
+P3222	property_type	external-identifier
+P3223	property_type	external-identifier
+P3224	property_type	external-identifier
+P3225	property_type	external-identifier
+P3226	property_type	external-identifier
+P3227	property_type	external-identifier
+P3228	property_type	string
+P3229	property_type	external-identifier
+P3230	property_type	external-identifier
+P3231	property_type	external-identifier
+P3232	property_type	external-identifier
+P3233	property_type	external-identifier
+P3234	property_type	external-identifier
+P3235	property_type	external-identifier
+P3236	property_type	external-identifier
+P3237	property_type	external-identifier
+P3238	property_type	string
+P3240	property_type	external-identifier
+P3241	property_type	external-identifier
+P3242	property_type	external-identifier
+P3243	property_type	external-identifier
+P3245	property_type	external-identifier
+P3246	property_type	external-identifier
+P3248	property_type	external-identifier
+P3250	property_type	external-identifier
+P3251	property_type	quantity
+P3252	property_type	quantity
+P3253	property_type	quantity
+P3254	property_type	url
+P3256	property_type	external-identifier
+P3257	property_type	external-identifier
+P3258	property_type	external-identifier
+P3259	property_type	item
+P3260	property_type	quantity
+P3261	property_type	item
+P3262	property_type	item
+P3263	property_type	item
+P3264	property_type	item
+P3265	property_type	external-identifier
+P3266	property_type	external-identifier
+P3267	property_type	external-identifier
+P3268	property_type	url
+P3269	property_type	external-identifier
+P3270	property_type	quantity
+P3271	property_type	quantity
+P3272	property_type	external-identifier
+P3273	property_type	external-identifier
+P3274	property_type	item
+P3275	property_type	item
+P3276	property_type	external-identifier
+P3277	property_type	external-identifier
+P3279	property_type	item
+P3280	property_type	external-identifier
+P3281	property_type	external-identifier
+P3283	property_type	external-identifier
+P3284	property_type	external-identifier
+P3285	property_type	external-identifier
+P3286	property_type	external-identifier
+P3288	property_type	external-identifier
+P3289	property_type	external-identifier
+P3290	property_type	external-identifier
+P3291	property_type	external-identifier
+P3292	property_type	external-identifier
+P3293	property_type	external-identifier
+P3294	property_type	item
+P3295	property_type	string
+P3296	property_type	external-identifier
+P3297	property_type	external-identifier
+P3298	property_type	external-identifier
+P3299	property_type	external-identifier
+P3300	property_type	item
+P3301	property_type	item
+P3302	property_type	external-identifier
+P3303	property_type	string
+P3304	property_type	external-identifier
+P3305	property_type	external-identifier
+P3306	property_type	item
+P3307	property_type	external-identifier
+P3308	property_type	external-identifier
+P3309	property_type	external-identifier
+P1067	property_type	external-identifier
+P1068	property_type	item
+P1069	property_type	external-identifier
+P1070	property_type	external-identifier
+P1071	property_type	item
+P1072	property_type	item
+P1073	property_type	item
+P1074	property_type	item
+P1075	property_type	item
+P1076	property_type	external-identifier
+P1077	property_type	string
+P1078	property_type	item
+P1079	property_type	item
+P1080	property_type	item
+P1081	property_type	quantity
+P1082	property_type	quantity
+P1083	property_type	quantity
+P1084	property_type	external-identifier
+P1085	property_type	external-identifier
+P1086	property_type	quantity
+P1087	property_type	quantity
+P1088	property_type	quantity
+P1090	property_type	quantity
+P1092	property_type	quantity
+P1093	property_type	quantity
+P1096	property_type	quantity
+P1097	property_type	quantity
+P1098	property_type	quantity
+P1099	property_type	quantity
+P1100	property_type	quantity
+P1101	property_type	quantity
+P1102	property_type	quantity
+P1103	property_type	quantity
+P1104	property_type	quantity
+P1106	property_type	quantity
+P1107	property_type	quantity
+P1108	property_type	quantity
+P1109	property_type	quantity
+P1110	property_type	quantity
+P1111	property_type	quantity
+P1113	property_type	quantity
+P1114	property_type	quantity
+P1115	property_type	external-identifier
+P1116	property_type	external-identifier
+P1117	property_type	quantity
+P1120	property_type	quantity
+P1121	property_type	quantity
+P1122	property_type	quantity
+P1123	property_type	quantity
+P1125	property_type	quantity
+P1126	property_type	quantity
+P1127	property_type	quantity
+P1128	property_type	quantity
+P1129	property_type	quantity
+P1132	property_type	quantity
+P1133	property_type	external-identifier
+P1135	property_type	item
+P1136	property_type	item
+P1137	property_type	item
+P1138	property_type	external-identifier
+P1139	property_type	quantity
+P1140	property_type	external-identifier
+P1141	property_type	quantity
+P1142	property_type	item
+P1143	property_type	external-identifier
+P1144	property_type	external-identifier
+P1145	property_type	item
+P1146	property_type	external-identifier
+P1148	property_type	quantity
+P1149	property_type	string
+P1150	property_type	string
+P1151	property_type	item
+P1153	property_type	external-identifier
+P1154	property_type	external-identifier
+P1155	property_type	external-identifier
+P1156	property_type	external-identifier
+P1157	property_type	external-identifier
+P1158	property_type	item
+P1159	property_type	external-identifier
+P1160	property_type	external-identifier
+P1161	property_type	string
+P1162	property_type	string
+P1163	property_type	string
+P1164	property_type	quantity
+P1165	property_type	item
+P1167	property_type	external-identifier
+P1168	property_type	external-identifier
+P1170	property_type	item
+P1171	property_type	item
+P1172	property_type	external-identifier
+P1174	property_type	quantity
+P1181	property_type	quantity
+P1182	property_type	external-identifier
+P1183	property_type	string
+P1184	property_type	external-identifier
+P1185	property_type	external-identifier
+P1186	property_type	external-identifier
+P1187	property_type	external-identifier
+P1188	property_type	external-identifier
+P1189	property_type	external-identifier
+P5374	property_type	external-identifier
+P5375	property_type	external-identifier
+P5376	property_type	external-identifier
+P5377	property_type	external-identifier
+P5378	property_type	external-identifier
+P5379	property_type	external-identifier
+P5380	property_type	external-identifier
+P5381	property_type	external-identifier
+P5382	property_type	external-identifier
+P5383	property_type	external-identifier
+P5384	property_type	external-identifier
+P5385	property_type	external-identifier
+P5386	property_type	item
+P5387	property_type	external-identifier
+P5388	property_type	external-identifier
+P5389	property_type	item
+P5390	property_type	external-identifier
+P5391	property_type	external-identifier
+P5392	property_type	external-identifier
+P5393	property_type	external-identifier
+P5394	property_type	external-identifier
+P5395	property_type	external-identifier
+P5396	property_type	external-identifier
+P5397	property_type	external-identifier
+P5398	property_type	external-identifier
+P5400	property_type	external-identifier
+P5401	property_type	string
+P5402	property_type	string
+P5403	property_type	external-identifier
+P5404	property_type	external-identifier
+P5406	property_type	external-identifier
+P5407	property_type	external-identifier
+P5408	property_type	external-identifier
+P5409	property_type	external-identifier
+P5410	property_type	external-identifier
+P5411	property_type	external-identifier
+P5412	property_type	string
+P5413	property_type	external-identifier
+P5414	property_type	external-identifier
+P5415	property_type	external-identifier
+P5417	property_type	external-identifier
+P5418	property_type	external-identifier
+P5419	property_type	external-identifier
+P5420	property_type	external-identifier
+P5421	property_type	external-identifier
+P5422	property_type	item
+P5423	property_type	string
+P5424	property_type	external-identifier
+P5425	property_type	item
+P5426	property_type	item
+P5427	property_type	quantity
+P5428	property_type	quantity
+P5429	property_type	external-identifier
+P5430	property_type	external-identifier
+P5431	property_type	external-identifier
+P5432	property_type	external-identifier
+P5434	property_type	external-identifier
+P5435	property_type	external-identifier
+P5436	property_type	quantity
+P5437	property_type	external-identifier
+P5438	property_type	item
+P5439	property_type	item
+P5440	property_type	external-identifier
+P5441	property_type	external-identifier
+P5442	property_type	external-identifier
+P5443	property_type	external-identifier
+P5444	property_type	item
+P5445	property_type	external-identifier
+P5446	property_type	item
+P5447	property_type	quantity
+P5448	property_type	quantity
+P5449	property_type	external-identifier
+P5450	property_type	external-identifier
+P5451	property_type	external-identifier
+P5452	property_type	external-identifier
+P5453	property_type	external-identifier
+P5454	property_type	external-identifier
+P5455	property_type	external-identifier
+P5456	property_type	external-identifier
+P5457	property_type	external-identifier
+P5458	property_type	external-identifier
+P5459	property_type	external-identifier
+P5460	property_type	item
+P5461	property_type	string
+P5462	property_type	external-identifier
+P5463	property_type	external-identifier
+P5464	property_type	external-identifier
+P5465	property_type	external-identifier
+P5466	property_type	external-identifier
+P5467	property_type	external-identifier
+P5468	property_type	external-identifier
+P5469	property_type	external-identifier
+P5470	property_type	external-identifier
+P5471	property_type	string
+P5473	property_type	external-identifier
+P5474	property_type	quantity
+P5475	property_type	item
+P5476	property_type	external-identifier
+P5477	property_type	external-identifier
+P5478	property_type	external-identifier
+P4339	property_type	external-identifier
+P4340	property_type	external-identifier
+P4341	property_type	quantity
+P4342	property_type	external-identifier
+P4343	property_type	external-identifier
+P4344	property_type	external-identifier
+P4345	property_type	item
+P4346	property_type	external-identifier
+P4347	property_type	external-identifier
+P4348	property_type	external-identifier
+P4349	property_type	external-identifier
+P4350	property_type	quantity
+P4351	property_type	external-identifier
+P4352	property_type	external-identifier
+P4353	property_type	item
+P4354	property_type	string
+P4355	property_type	external-identifier
+P4356	property_type	external-identifier
+P4357	property_type	external-identifier
+P4358	property_type	external-identifier
+P4359	property_type	external-identifier
+P4360	property_type	external-identifier
+P4361	property_type	external-identifier
+P4362	property_type	external-identifier
+P4363	property_type	external-identifier
+P4364	property_type	external-identifier
+P4365	property_type	external-identifier
+P4366	property_type	external-identifier
+P4367	property_type	external-identifier
+P4368	property_type	external-identifier
+P4369	property_type	external-identifier
+P4370	property_type	external-identifier
+P4371	property_type	external-identifier
+P4372	property_type	external-identifier
+P4373	property_type	external-identifier
+P4374	property_type	external-identifier
+P4375	property_type	external-identifier
+P4376	property_type	external-identifier
+P4377	property_type	external-identifier
+P4379	property_type	item
+P4380	property_type	external-identifier
+P4381	property_type	external-identifier
+P4382	property_type	external-identifier
+P4383	property_type	external-identifier
+P4384	property_type	external-identifier
+P4385	property_type	external-identifier
+P4386	property_type	external-identifier
+P4387	property_type	item
+P4388	property_type	external-identifier
+P4389	property_type	external-identifier
+P4390	property_type	item
+P4391	property_type	external-identifier
+P4392	property_type	external-identifier
+P4393	property_type	external-identifier
+P4394	property_type	external-identifier
+P4395	property_type	external-identifier
+P4396	property_type	external-identifier
+P4397	property_type	external-identifier
+P4398	property_type	external-identifier
+P4399	property_type	external-identifier
+P4400	property_type	external-identifier
+P4401	property_type	external-identifier
+P4402	property_type	external-identifier
+P4403	property_type	quantity
+P4404	property_type	external-identifier
+P4405	property_type	external-identifier
+P4406	property_type	external-identifier
+P4407	property_type	external-identifier
+P4408	property_type	external-identifier
+P4409	property_type	external-identifier
+P4410	property_type	external-identifier
+P4411	property_type	external-identifier
+P4412	property_type	external-identifier
+P4413	property_type	external-identifier
+P4414	property_type	external-identifier
+P4415	property_type	external-identifier
+P4416	property_type	external-identifier
+P4417	property_type	external-identifier
+P4418	property_type	external-identifier
+P4419	property_type	external-identifier
+P4421	property_type	external-identifier
+P4422	property_type	external-identifier
+P4423	property_type	external-identifier
+P4424	property_type	item
+P4425	property_type	item
+P4426	property_type	item
+P4427	property_type	external-identifier
+P4428	property_type	item
+P4429	property_type	external-identifier
+P4430	property_type	external-identifier
+P4431	property_type	external-identifier
+P4432	property_type	external-identifier
+P4433	property_type	external-identifier
+P4434	property_type	external-identifier
+P4435	property_type	external-identifier
+P4436	property_type	external-identifier
+P4437	property_type	item
+P4438	property_type	external-identifier
+P4439	property_type	external-identifier
+P4440	property_type	external-identifier
+P4955	property_type	external-identifier
+P4956	property_type	external-identifier
+P4957	property_type	external-identifier
+P4958	property_type	item
+P4959	property_type	external-identifier
+P4960	property_type	external-identifier
+P4961	property_type	external-identifier
+P4962	property_type	external-identifier
+P4963	property_type	external-identifier
+P4964	property_type	external-identifier
+P4965	property_type	external-identifier
+P4966	property_type	external-identifier
+P4967	property_type	item
+P4968	property_type	item
+P4969	property_type	item
+P4970	property_type	string
+P4971	property_type	external-identifier
+P4972	property_type	external-identifier
+P4973	property_type	external-identifier
+P4974	property_type	external-identifier
+P4975	property_type	external-identifier
+P4976	property_type	external-identifier
+P4977	property_type	external-identifier
+P4978	property_type	external-identifier
+P4979	property_type	external-identifier
+P4980	property_type	external-identifier
+P4981	property_type	external-identifier
+P4982	property_type	external-identifier
+P4983	property_type	external-identifier
+P4985	property_type	external-identifier
+P4986	property_type	external-identifier
+P4987	property_type	external-identifier
+P4988	property_type	item
+P4989	property_type	external-identifier
+P4991	property_type	external-identifier
+P4992	property_type	external-identifier
+P4993	property_type	external-identifier
+P4994	property_type	external-identifier
+P4996	property_type	external-identifier
+P4997	property_type	url
+P4998	property_type	string
+P4999	property_type	quantity
+P5001	property_type	external-identifier
+P5002	property_type	external-identifier
+P5003	property_type	external-identifier
+P5004	property_type	item
+P5005	property_type	external-identifier
+P5006	property_type	external-identifier
+P5007	property_type	external-identifier
+P5008	property_type	item
+P5009	property_type	item
+P5010	property_type	external-identifier
+P5011	property_type	external-identifier
+P5012	property_type	item
+P5013	property_type	external-identifier
+P5014	property_type	external-identifier
+P5015	property_type	external-identifier
+P5016	property_type	external-identifier
+P5017	property_type	time
+P5018	property_type	external-identifier
+P5019	property_type	external-identifier
+P5020	property_type	external-identifier
+P5021	property_type	item
+P5022	property_type	quantity
+P5023	property_type	item
+P5024	property_type	item
+P5025	property_type	item
+P5026	property_type	item
+P5027	property_type	quantity
+P5028	property_type	item
+P5029	property_type	external-identifier
+P5030	property_type	item
+P5031	property_type	external-identifier
+P5032	property_type	external-identifier
+P5033	property_type	external-identifier
+P5034	property_type	external-identifier
+P5035	property_type	external-identifier
+P5036	property_type	external-identifier
+P5037	property_type	external-identifier
+P5038	property_type	external-identifier
+P5039	property_type	external-identifier
+P5040	property_type	item
+P5041	property_type	item
+P5042	property_type	item
+P5043	property_type	quantity
+P5044	property_type	quantity
+P5045	property_type	quantity
+P5046	property_type	string
+P5047	property_type	external-identifier
+P5048	property_type	external-identifier
+P5049	property_type	external-identifier
+P5050	property_type	external-identifier
+P5051	property_type	item
+P5052	property_type	item
+P5053	property_type	item
+P5054	property_type	item
+P5055	property_type	external-identifier
+P5056	property_type	item
+P5057	property_type	external-identifier
+P5058	property_type	external-identifier
+P3310	property_type	item
+P3311	property_type	string
+P3314	property_type	external-identifier
+P3315	property_type	external-identifier
+P3316	property_type	external-identifier
+P3318	property_type	external-identifier
+P3320	property_type	item
+P3321	property_type	monolingualtext
+P3322	property_type	external-identifier
+P3323	property_type	item
+P3324	property_type	external-identifier
+P3325	property_type	external-identifier
+P3326	property_type	external-identifier
+P3327	property_type	external-identifier
+P3328	property_type	external-identifier
+P3329	property_type	external-identifier
+P3330	property_type	external-identifier
+P3331	property_type	external-identifier
+P3332	property_type	external-identifier
+P3333	property_type	external-identifier
+P3335	property_type	item
+P3337	property_type	quantity
+P3338	property_type	external-identifier
+P3339	property_type	external-identifier
+P3340	property_type	external-identifier
+P3341	property_type	external-identifier
+P3342	property_type	item
+P3343	property_type	external-identifier
+P3344	property_type	external-identifier
+P3345	property_type	external-identifier
+P3346	property_type	external-identifier
+P3347	property_type	external-identifier
+P3348	property_type	external-identifier
+P3349	property_type	item
+P3350	property_type	external-identifier
+P3351	property_type	external-identifier
+P3352	property_type	external-identifier
+P3353	property_type	external-identifier
+P3354	property_type	item
+P3355	property_type	item
+P3356	property_type	item
+P3357	property_type	item
+P3358	property_type	item
+P3359	property_type	item
+P3360	property_type	external-identifier
+P3361	property_type	external-identifier
+P3362	property_type	quantity
+P3363	property_type	external-identifier
+P3364	property_type	item
+P3365	property_type	external-identifier
+P3366	property_type	external-identifier
+P3367	property_type	external-identifier
+P3368	property_type	external-identifier
+P3370	property_type	external-identifier
+P3371	property_type	external-identifier
+P3372	property_type	external-identifier
+P3373	property_type	item
+P3374	property_type	item
+P3375	property_type	external-identifier
+P3376	property_type	external-identifier
+P3377	property_type	external-identifier
+P3378	property_type	external-identifier
+P3379	property_type	external-identifier
+P3380	property_type	external-identifier
+P3381	property_type	external-identifier
+P3382	property_type	external-identifier
+P3383	property_type	string
+P3385	property_type	external-identifier
+P3386	property_type	external-identifier
+P3387	property_type	quantity
+P3388	property_type	external-identifier
+P3389	property_type	external-identifier
+P3390	property_type	external-identifier
+P3391	property_type	external-identifier
+P3392	property_type	external-identifier
+P3393	property_type	external-identifier
+P3394	property_type	external-identifier
+P3395	property_type	quantity
+P3396	property_type	external-identifier
+P3397	property_type	external-identifier
+P3398	property_type	external-identifier
+P3399	property_type	external-identifier
+P3400	property_type	external-identifier
+P3401	property_type	external-identifier
+P3402	property_type	item
+P3403	property_type	item
+P3404	property_type	external-identifier
+P3405	property_type	external-identifier
+P3406	property_type	external-identifier
+P3407	property_type	external-identifier
+P3408	property_type	external-identifier
+P3409	property_type	external-identifier
+P3410	property_type	external-identifier
+P3411	property_type	external-identifier
+P3412	property_type	external-identifier
+P3413	property_type	external-identifier
+P3414	property_type	external-identifier
+P3415	property_type	item
+P3416	property_type	item
+P3417	property_type	external-identifier
+P1190	property_type	string
+P1191	property_type	time
+P1192	property_type	item
+P1193	property_type	quantity
+P1194	property_type	item
+P1195	property_type	string
+P1196	property_type	item
+P1198	property_type	quantity
+P1199	property_type	item
+P1200	property_type	item
+P1201	property_type	item
+P1202	property_type	item
+P1203	property_type	external-identifier
+P1204	property_type	item
+P1207	property_type	external-identifier
+P1208	property_type	external-identifier
+P1209	property_type	external-identifier
+P1210	property_type	item
+P1211	property_type	item
+P1212	property_type	external-identifier
+P1213	property_type	external-identifier
+P1214	property_type	external-identifier
+P1215	property_type	quantity
+P1216	property_type	external-identifier
+P1217	property_type	external-identifier
+P1218	property_type	external-identifier
+P1219	property_type	external-identifier
+P1220	property_type	external-identifier
+P1221	property_type	item
+P1225	property_type	external-identifier
+P1227	property_type	item
+P1229	property_type	external-identifier
+P1230	property_type	external-identifier
+P1232	property_type	external-identifier
+P1233	property_type	external-identifier
+P1234	property_type	external-identifier
+P1235	property_type	external-identifier
+P1236	property_type	external-identifier
+P1237	property_type	external-identifier
+P1238	property_type	external-identifier
+P1239	property_type	external-identifier
+P1240	property_type	string
+P1241	property_type	external-identifier
+P1242	property_type	external-identifier
+P1243	property_type	external-identifier
+P1245	property_type	external-identifier
+P1246	property_type	external-identifier
+P1247	property_type	quantity
+P1248	property_type	external-identifier
+P1249	property_type	time
+P1250	property_type	external-identifier
+P1251	property_type	external-identifier
+P1252	property_type	external-identifier
+P1253	property_type	external-identifier
+P1254	property_type	external-identifier
+P1255	property_type	external-identifier
+P1256	property_type	external-identifier
+P1257	property_type	string
+P1258	property_type	external-identifier
+P1259	property_type	globe-coordinate
+P1260	property_type	external-identifier
+P1261	property_type	external-identifier
+P1262	property_type	external-identifier
+P1263	property_type	external-identifier
+P1264	property_type	item
+P1265	property_type	external-identifier
+P1266	property_type	external-identifier
+P1267	property_type	external-identifier
+P1268	property_type	item
+P1269	property_type	item
+P1270	property_type	external-identifier
+P1271	property_type	external-identifier
+P1272	property_type	external-identifier
+P1273	property_type	external-identifier
+P1274	property_type	external-identifier
+P1275	property_type	external-identifier
+P1276	property_type	external-identifier
+P1277	property_type	external-identifier
+P1278	property_type	external-identifier
+P1279	property_type	quantity
+P1280	property_type	external-identifier
+P1281	property_type	external-identifier
+P1282	property_type	string
+P1283	property_type	item
+P1284	property_type	external-identifier
+P1285	property_type	external-identifier
+P1286	property_type	external-identifier
+P1287	property_type	external-identifier
+P1288	property_type	external-identifier
+P1289	property_type	external-identifier
+P1290	property_type	item
+P1291	property_type	external-identifier
+P1292	property_type	external-identifier
+P1293	property_type	external-identifier
+P1294	property_type	external-identifier
+P1295	property_type	quantity
+P1296	property_type	external-identifier
+P1297	property_type	external-identifier
+P1299	property_type	item
+P1300	property_type	external-identifier
+P4441	property_type	quantity
+P4442	property_type	quantity
+P4443	property_type	item
+P4444	property_type	item
+P4445	property_type	quantity
+P4446	property_type	item
+P4447	property_type	quantity
+P4448	property_type	quantity
+P4449	property_type	external-identifier
+P4450	property_type	external-identifier
+P4451	property_type	external-identifier
+P4452	property_type	item
+P4453	property_type	external-identifier
+P4454	property_type	external-identifier
+P4455	property_type	external-identifier
+P4456	property_type	external-identifier
+P4457	property_type	external-identifier
+P4458	property_type	external-identifier
+P4459	property_type	external-identifier
+P4460	property_type	external-identifier
+P4461	property_type	external-identifier
+P4462	property_type	external-identifier
+P4463	property_type	external-identifier
+P4464	property_type	external-identifier
+P4465	property_type	external-identifier
+P4466	property_type	external-identifier
+P4467	property_type	external-identifier
+P4468	property_type	external-identifier
+P4469	property_type	external-identifier
+P4470	property_type	external-identifier
+P4471	property_type	external-identifier
+P4472	property_type	external-identifier
+P4473	property_type	external-identifier
+P4474	property_type	external-identifier
+P4475	property_type	external-identifier
+P4476	property_type	external-identifier
+P4477	property_type	external-identifier
+P4478	property_type	external-identifier
+P4479	property_type	external-identifier
+P4480	property_type	external-identifier
+P4481	property_type	external-identifier
+P4482	property_type	external-identifier
+P4483	property_type	external-identifier
+P4484	property_type	external-identifier
+P4485	property_type	external-identifier
+P4486	property_type	external-identifier
+P4487	property_type	external-identifier
+P4488	property_type	external-identifier
+P4489	property_type	external-identifier
+P4490	property_type	external-identifier
+P4491	property_type	external-identifier
+P4493	property_type	external-identifier
+P4494	property_type	external-identifier
+P4495	property_type	external-identifier
+P4496	property_type	string
+P4498	property_type	external-identifier
+P4500	property_type	quantity
+P4501	property_type	quantity
+P4502	property_type	external-identifier
+P4503	property_type	external-identifier
+P4504	property_type	external-identifier
+P4505	property_type	external-identifier
+P4506	property_type	string
+P4507	property_type	external-identifier
+P4508	property_type	external-identifier
+P4509	property_type	external-identifier
+P4510	property_type	item
+P4511	property_type	quantity
+P4512	property_type	external-identifier
+P4513	property_type	external-identifier
+P4514	property_type	external-identifier
+P4515	property_type	external-identifier
+P4516	property_type	external-identifier
+P4517	property_type	external-identifier
+P4518	property_type	external-identifier
+P4519	property_type	quantity
+P4520	property_type	external-identifier
+P4521	property_type	external-identifier
+P4522	property_type	external-identifier
+P4523	property_type	external-identifier
+P4524	property_type	external-identifier
+P4525	property_type	external-identifier
+P4526	property_type	external-identifier
+P4527	property_type	external-identifier
+P4528	property_type	external-identifier
+P4529	property_type	external-identifier
+P4530	property_type	external-identifier
+P4531	property_type	external-identifier
+P4532	property_type	external-identifier
+P4533	property_type	external-identifier
+P4534	property_type	external-identifier
+P4535	property_type	external-identifier
+P4536	property_type	external-identifier
+P4537	property_type	external-identifier
+P4538	property_type	external-identifier
+P4539	property_type	external-identifier
+P4540	property_type	external-identifier
+P4541	property_type	external-identifier
+P4542	property_type	external-identifier
+P4543	property_type	item
+P5479	property_type	quantity
+P5480	property_type	quantity
+P5481	property_type	quantity
+P5483	property_type	quantity
+P5485	property_type	external-identifier
+P5488	property_type	external-identifier
+P5489	property_type	external-identifier
+P5490	property_type	external-identifier
+P5491	property_type	external-identifier
+P5492	property_type	external-identifier
+P5493	property_type	external-identifier
+P5494	property_type	external-identifier
+P5495	property_type	external-identifier
+P5496	property_type	external-identifier
+P5497	property_type	external-identifier
+P5498	property_type	external-identifier
+P5499	property_type	external-identifier
+P5500	property_type	external-identifier
+P5501	property_type	external-identifier
+P5502	property_type	external-identifier
+P5503	property_type	external-identifier
+P5504	property_type	external-identifier
+P5505	property_type	external-identifier
+P5506	property_type	external-identifier
+P5507	property_type	external-identifier
+P5508	property_type	external-identifier
+P5509	property_type	external-identifier
+P5510	property_type	external-identifier
+P5513	property_type	external-identifier
+P5514	property_type	item
+P5515	property_type	external-identifier
+P5516	property_type	external-identifier
+P5517	property_type	external-identifier
+P5518	property_type	string
+P5519	property_type	string
+P5520	property_type	quantity
+P5522	property_type	item
+P5523	property_type	item
+P5524	property_type	quantity
+P5525	property_type	external-identifier
+P5526	property_type	quantity
+P5527	property_type	external-identifier
+P5528	property_type	external-identifier
+P5529	property_type	quantity
+P5530	property_type	external-identifier
+P5531	property_type	external-identifier
+P5532	property_type	external-identifier
+P5533	property_type	external-identifier
+P5534	property_type	external-identifier
+P5535	property_type	external-identifier
+P5536	property_type	external-identifier
+P5537	property_type	item
+P5538	property_type	external-identifier
+P5539	property_type	external-identifier
+P5540	property_type	external-identifier
+P5541	property_type	external-identifier
+P5542	property_type	external-identifier
+P5543	property_type	external-identifier
+P5544	property_type	external-identifier
+P5545	property_type	external-identifier
+P5546	property_type	external-identifier
+P5547	property_type	external-identifier
+P5548	property_type	string
+P5549	property_type	external-identifier
+P5550	property_type	external-identifier
+P5551	property_type	external-identifier
+P5552	property_type	external-identifier
+P5553	property_type	external-identifier
+P5554	property_type	external-identifier
+P5555	property_type	string
+P5556	property_type	external-identifier
+P5557	property_type	external-identifier
+P5558	property_type	external-identifier
+P5559	property_type	external-identifier
+P5560	property_type	item
+P5561	property_type	external-identifier
+P5562	property_type	external-identifier
+P5563	property_type	external-identifier
+P5564	property_type	item
+P5565	property_type	external-identifier
+P5566	property_type	external-identifier
+P5567	property_type	external-identifier
+P5568	property_type	external-identifier
+P5569	property_type	external-identifier
+P5570	property_type	external-identifier
+P5571	property_type	external-identifier
+P5572	property_type	item
+P5573	property_type	external-identifier
+P5574	property_type	external-identifier
+P5575	property_type	quantity
+P5576	property_type	external-identifier
+P5578	property_type	external-identifier
+P5579	property_type	external-identifier
+P5580	property_type	external-identifier
+P5581	property_type	external-identifier
+P5582	property_type	quantity
+P5584	property_type	external-identifier
+P5585	property_type	external-identifier
+P5586	property_type	external-identifier
+P5587	property_type	external-identifier
+P1301	property_type	quantity
+P1302	property_type	item
+P1303	property_type	item
+P1304	property_type	item
+P1305	property_type	external-identifier
+P1307	property_type	external-identifier
+P1308	property_type	item
+P1309	property_type	external-identifier
+P1310	property_type	item
+P1311	property_type	external-identifier
+P1312	property_type	item
+P1313	property_type	item
+P1314	property_type	quantity
+P1315	property_type	external-identifier
+P1316	property_type	external-identifier
+P1317	property_type	time
+P1318	property_type	item
+P1319	property_type	time
+P1320	property_type	external-identifier
+P1321	property_type	item
+P1322	property_type	item
+P1323	property_type	external-identifier
+P1324	property_type	url
+P1325	property_type	url
+P1326	property_type	time
+P1327	property_type	item
+P1329	property_type	string
+P1330	property_type	external-identifier
+P1331	property_type	external-identifier
+P1332	property_type	globe-coordinate
+P1333	property_type	globe-coordinate
+P1334	property_type	globe-coordinate
+P1335	property_type	globe-coordinate
+P1336	property_type	item
+P1338	property_type	external-identifier
+P1339	property_type	quantity
+P1340	property_type	item
+P1341	property_type	external-identifier
+P1342	property_type	quantity
+P1343	property_type	item
+P1344	property_type	item
+P1345	property_type	quantity
+P1346	property_type	item
+P1347	property_type	item
+P1348	property_type	url
+P1349	property_type	item
+P1350	property_type	quantity
+P1351	property_type	quantity
+P1352	property_type	quantity
+P1353	property_type	string
+P1354	property_type	item
+P1355	property_type	quantity
+P1356	property_type	quantity
+P1357	property_type	quantity
+P1358	property_type	quantity
+P1359	property_type	quantity
+P1360	property_type	string
+P1362	property_type	external-identifier
+P1363	property_type	item
+P1364	property_type	external-identifier
+P1365	property_type	item
+P1366	property_type	item
+P1367	property_type	external-identifier
+P1368	property_type	external-identifier
+P1369	property_type	external-identifier
+P1370	property_type	external-identifier
+P1371	property_type	external-identifier
+P1372	property_type	item
+P1373	property_type	quantity
+P1375	property_type	external-identifier
+P1376	property_type	item
+P1377	property_type	external-identifier
+P1378	property_type	external-identifier
+P1380	property_type	external-identifier
+P1381	property_type	external-identifier
+P1382	property_type	item
+P1383	property_type	item
+P1385	property_type	external-identifier
+P1386	property_type	external-identifier
+P1387	property_type	item
+P1388	property_type	external-identifier
+P1389	property_type	item
+P1390	property_type	quantity
+P1391	property_type	external-identifier
+P1392	property_type	external-identifier
+P1393	property_type	item
+P1394	property_type	external-identifier
+P1395	property_type	external-identifier
+P1396	property_type	string
+P1397	property_type	external-identifier
+P1398	property_type	item
+P1399	property_type	item
+P1400	property_type	external-identifier
+P1401	property_type	url
+P1402	property_type	string
+P1403	property_type	item
+P1404	property_type	external-identifier
+P1406	property_type	item
+P1407	property_type	external-identifier
+P1408	property_type	item
+P4544	property_type	external-identifier
+P4545	property_type	item
+P4546	property_type	external-identifier
+P4547	property_type	external-identifier
+P4548	property_type	external-identifier
+P4549	property_type	external-identifier
+P4550	property_type	external-identifier
+P4551	property_type	external-identifier
+P4552	property_type	item
+P4553	property_type	external-identifier
+P4554	property_type	external-identifier
+P4555	property_type	external-identifier
+P4556	property_type	external-identifier
+P4557	property_type	external-identifier
+P4558	property_type	external-identifier
+P4559	property_type	external-identifier
+P4560	property_type	external-identifier
+P4561	property_type	external-identifier
+P4562	property_type	external-identifier
+P4563	property_type	external-identifier
+P4564	property_type	external-identifier
+P4565	property_type	string
+P4566	property_type	time
+P4567	property_type	external-identifier
+P4568	property_type	external-identifier
+P4569	property_type	external-identifier
+P4571	property_type	external-identifier
+P4572	property_type	external-identifier
+P4573	property_type	string
+P4574	property_type	external-identifier
+P4575	property_type	string
+P4576	property_type	external-identifier
+P4577	property_type	external-identifier
+P4578	property_type	external-identifier
+P4579	property_type	external-identifier
+P4580	property_type	external-identifier
+P4581	property_type	external-identifier
+P4582	property_type	external-identifier
+P4583	property_type	external-identifier
+P4584	property_type	item
+P4585	property_type	external-identifier
+P4586	property_type	item
+P4587	property_type	external-identifier
+P4588	property_type	external-identifier
+P4589	property_type	external-identifier
+P4590	property_type	external-identifier
+P4591	property_type	external-identifier
+P4592	property_type	external-identifier
+P4593	property_type	external-identifier
+P4594	property_type	external-identifier
+P4595	property_type	string
+P4596	property_type	external-identifier
+P4597	property_type	external-identifier
+P4598	property_type	external-identifier
+P4599	property_type	item
+P4600	property_type	item
+P4601	property_type	external-identifier
+P4602	property_type	time
+P4604	property_type	external-identifier
+P4605	property_type	external-identifier
+P4606	property_type	external-identifier
+P4607	property_type	external-identifier
+P4608	property_type	item
+P4609	property_type	external-identifier
+P4610	property_type	external-identifier
+P4611	property_type	external-identifier
+P4612	property_type	external-identifier
+P4613	property_type	external-identifier
+P4614	property_type	item
+P4615	property_type	external-identifier
+P4616	property_type	external-identifier
+P4617	property_type	external-identifier
+P4618	property_type	external-identifier
+P4619	property_type	external-identifier
+P4620	property_type	external-identifier
+P4621	property_type	external-identifier
+P4622	property_type	item
+P4623	property_type	external-identifier
+P4624	property_type	item
+P4625	property_type	external-identifier
+P4626	property_type	item
+P4627	property_type	external-identifier
+P4628	property_type	item
+P4629	property_type	external-identifier
+P4630	property_type	external-identifier
+P4631	property_type	external-identifier
+P4632	property_type	external-identifier
+P4633	property_type	string
+P4634	property_type	item
+P4635	property_type	external-identifier
+P4636	property_type	external-identifier
+P4637	property_type	external-identifier
+P4638	property_type	external-identifier
+P4639	property_type	external-identifier
+P4640	property_type	string
+P4641	property_type	external-identifier
+P4642	property_type	external-identifier
+P4643	property_type	external-identifier
+P4644	property_type	external-identifier
+P4645	property_type	external-identifier
+P3418	property_type	external-identifier
+P3419	property_type	external-identifier
+P3420	property_type	external-identifier
+P3421	property_type	external-identifier
+P3422	property_type	external-identifier
+P3423	property_type	external-identifier
+P3424	property_type	external-identifier
+P3425	property_type	external-identifier
+P3426	property_type	external-identifier
+P3427	property_type	external-identifier
+P3428	property_type	item
+P3429	property_type	external-identifier
+P3430	property_type	external-identifier
+P3431	property_type	external-identifier
+P3432	property_type	item
+P3433	property_type	item
+P3434	property_type	external-identifier
+P3435	property_type	external-identifier
+P3436	property_type	external-identifier
+P3437	property_type	item
+P3438	property_type	item
+P3439	property_type	quantity
+P3440	property_type	item
+P3441	property_type	string
+P3442	property_type	external-identifier
+P3443	property_type	external-identifier
+P3444	property_type	external-identifier
+P3445	property_type	external-identifier
+P3446	property_type	external-identifier
+P3447	property_type	item
+P3448	property_type	item
+P3449	property_type	external-identifier
+P3450	property_type	item
+P3451	property_type	string
+P3452	property_type	item
+P3453	property_type	external-identifier
+P3454	property_type	external-identifier
+P3455	property_type	external-identifier
+P3456	property_type	external-identifier
+P3457	property_type	quantity
+P3458	property_type	external-identifier
+P3459	property_type	external-identifier
+P3460	property_type	item
+P3461	property_type	item
+P3462	property_type	external-identifier
+P3463	property_type	external-identifier
+P3464	property_type	item
+P3465	property_type	quantity
+P3466	property_type	external-identifier
+P3467	property_type	external-identifier
+P3468	property_type	external-identifier
+P3469	property_type	external-identifier
+P3470	property_type	external-identifier
+P3471	property_type	external-identifier
+P3472	property_type	external-identifier
+P3473	property_type	external-identifier
+P3475	property_type	external-identifier
+P3476	property_type	external-identifier
+P3477	property_type	external-identifier
+P3478	property_type	external-identifier
+P3479	property_type	external-identifier
+P3480	property_type	external-identifier
+P3481	property_type	external-identifier
+P3482	property_type	external-identifier
+P3483	property_type	external-identifier
+P3485	property_type	quantity
+P3486	property_type	quantity
+P3487	property_type	quantity
+P3488	property_type	quantity
+P3489	property_type	item
+P3490	property_type	item
+P3491	property_type	item
+P3492	property_type	quantity
+P3493	property_type	item
+P3494	property_type	item
+P3495	property_type	external-identifier
+P3496	property_type	item
+P3497	property_type	item
+P3498	property_type	external-identifier
+P3499	property_type	external-identifier
+P3500	property_type	external-identifier
+P3501	property_type	item
+P3502	property_type	external-identifier
+P3503	property_type	external-identifier
+P3504	property_type	external-identifier
+P3505	property_type	external-identifier
+P3506	property_type	external-identifier
+P3507	property_type	external-identifier
+P3509	property_type	external-identifier
+P3511	property_type	external-identifier
+P3512	property_type	item
+P3513	property_type	external-identifier
+P3514	property_type	external-identifier
+P3515	property_type	external-identifier
+P3516	property_type	external-identifier
+P3517	property_type	external-identifier
+P3518	property_type	external-identifier
+P3519	property_type	external-identifier
+P3520	property_type	external-identifier
+P3521	property_type	external-identifier
+P5059	property_type	item
+P5061	property_type	monolingualtext
+P5062	property_type	external-identifier
+P5063	property_type	external-identifier
+P5064	property_type	external-identifier
+P5065	property_type	quantity
+P5066	property_type	quantity
+P5067	property_type	quantity
+P5068	property_type	external-identifier
+P5069	property_type	quantity
+P5070	property_type	item
+P5071	property_type	quantity
+P5072	property_type	item
+P5073	property_type	external-identifier
+P5075	property_type	external-identifier
+P5076	property_type	external-identifier
+P5077	property_type	external-identifier
+P5078	property_type	external-identifier
+P5079	property_type	external-identifier
+P5080	property_type	external-identifier
+P5081	property_type	external-identifier
+P5082	property_type	external-identifier
+P5083	property_type	external-identifier
+P5084	property_type	external-identifier
+P5085	property_type	external-identifier
+P5086	property_type	external-identifier
+P5087	property_type	external-identifier
+P5088	property_type	external-identifier
+P5090	property_type	external-identifier
+P5091	property_type	external-identifier
+P5092	property_type	external-identifier
+P5093	property_type	external-identifier
+P5094	property_type	external-identifier
+P5095	property_type	item
+P5096	property_type	item
+P5097	property_type	external-identifier
+P5098	property_type	external-identifier
+P5099	property_type	external-identifier
+P5101	property_type	external-identifier
+P5102	property_type	item
+P5103	property_type	external-identifier
+P5104	property_type	external-identifier
+P5105	property_type	item
+P5106	property_type	external-identifier
+P5107	property_type	external-identifier
+P5108	property_type	external-identifier
+P5109	property_type	item
+P5110	property_type	item
+P5114	property_type	external-identifier
+P5115	property_type	external-identifier
+P5116	property_type	external-identifier
+P5117	property_type	external-identifier
+P5118	property_type	external-identifier
+P5119	property_type	external-identifier
+P5120	property_type	external-identifier
+P5121	property_type	external-identifier
+P5122	property_type	external-identifier
+P5123	property_type	external-identifier
+P5124	property_type	external-identifier
+P5125	property_type	item
+P5126	property_type	item
+P5127	property_type	external-identifier
+P5128	property_type	external-identifier
+P5129	property_type	external-identifier
+P5131	property_type	item
+P5132	property_type	item
+P5133	property_type	item
+P5134	property_type	item
+P5135	property_type	item
+P5136	property_type	item
+P5137	property_type	item
+P5138	property_type	item
+P5139	property_type	string
+P5140	property_type	globe-coordinate
+P5141	property_type	quantity
+P5142	property_type	external-identifier
+P5143	property_type	external-identifier
+P5144	property_type	external-identifier
+P5145	property_type	external-identifier
+P5146	property_type	external-identifier
+P5147	property_type	external-identifier
+P5148	property_type	external-identifier
+P5149	property_type	external-identifier
+P5150	property_type	item
+P5151	property_type	external-identifier
+P5152	property_type	item
+P5153	property_type	external-identifier
+P5154	property_type	external-identifier
+P5156	property_type	external-identifier
+P5157	property_type	external-identifier
+P5158	property_type	external-identifier
+P5159	property_type	external-identifier
+P5160	property_type	external-identifier
+P5161	property_type	external-identifier
+P5162	property_type	external-identifier
+P5163	property_type	external-identifier
+P5164	property_type	external-identifier
+P5165	property_type	external-identifier
+P5166	property_type	item
+P5167	property_type	quantity
+P5588	property_type	item
+P5589	property_type	item
+P5590	property_type	external-identifier
+P5591	property_type	item
+P5592	property_type	quantity
+P5593	property_type	quantity
+P5594	property_type	quantity
+P5595	property_type	quantity
+P5596	property_type	quantity
+P5597	property_type	external-identifier
+P5598	property_type	external-identifier
+P5599	property_type	external-identifier
+P5600	property_type	external-identifier
+P5601	property_type	external-identifier
+P5602	property_type	external-identifier
+P5603	property_type	external-identifier
+P5604	property_type	external-identifier
+P5605	property_type	external-identifier
+P5606	property_type	item
+P5607	property_type	item
+P5608	property_type	quantity
+P5609	property_type	external-identifier
+P5610	property_type	external-identifier
+P5611	property_type	external-identifier
+P5612	property_type	external-identifier
+P5613	property_type	external-identifier
+P5614	property_type	external-identifier
+P5615	property_type	external-identifier
+P5616	property_type	external-identifier
+P5617	property_type	external-identifier
+P5618	property_type	external-identifier
+P5619	property_type	external-identifier
+P5620	property_type	external-identifier
+P5621	property_type	external-identifier
+P5622	property_type	external-identifier
+P5623	property_type	item
+P5624	property_type	quantity
+P5625	property_type	string
+P5626	property_type	external-identifier
+P5627	property_type	external-identifier
+P5628	property_type	external-identifier
+P5629	property_type	external-identifier
+P5630	property_type	quantity
+P5631	property_type	external-identifier
+P5632	property_type	external-identifier
+P5633	property_type	external-identifier
+P5634	property_type	external-identifier
+P5635	property_type	external-identifier
+P5636	property_type	external-identifier
+P5637	property_type	external-identifier
+P5638	property_type	external-identifier
+P5639	property_type	external-identifier
+P5640	property_type	external-identifier
+P5641	property_type	external-identifier
+P5642	property_type	item
+P5643	property_type	external-identifier
+P5644	property_type	external-identifier
+P5645	property_type	external-identifier
+P5646	property_type	external-identifier
+P5647	property_type	external-identifier
+P5648	property_type	external-identifier
+P5649	property_type	external-identifier
+P5650	property_type	external-identifier
+P5651	property_type	external-identifier
+P5652	property_type	external-identifier
+P5653	property_type	external-identifier
+P5654	property_type	external-identifier
+P5655	property_type	external-identifier
+P5656	property_type	external-identifier
+P5657	property_type	external-identifier
+P5658	property_type	item
+P5659	property_type	external-identifier
+P5661	property_type	external-identifier
+P5662	property_type	external-identifier
+P5663	property_type	external-identifier
+P5664	property_type	external-identifier
+P5665	property_type	external-identifier
+P5666	property_type	external-identifier
+P5667	property_type	external-identifier
+P5668	property_type	quantity
+P5669	property_type	quantity
+P5670	property_type	quantity
+P5672	property_type	quantity
+P5673	property_type	quantity
+P5674	property_type	quantity
+P5675	property_type	quantity
+P5676	property_type	quantity
+P5677	property_type	quantity
+P5678	property_type	quantity
+P5679	property_type	quantity
+P5680	property_type	external-identifier
+P5681	property_type	quantity
+P5682	property_type	quantity
+P5683	property_type	external-identifier
+P5685	property_type	quantity
+P5686	property_type	external-identifier
+P5687	property_type	external-identifier
+P5688	property_type	external-identifier
+P5690	property_type	external-identifier
+P5691	property_type	external-identifier
+P4646	property_type	item
+P4647	property_type	item
+P4649	property_type	item
+P4650	property_type	external-identifier
+P4651	property_type	external-identifier
+P4652	property_type	external-identifier
+P4653	property_type	item
+P4654	property_type	string
+P4655	property_type	external-identifier
+P4656	property_type	url
+P4657	property_type	external-identifier
+P4658	property_type	external-identifier
+P4659	property_type	external-identifier
+P4660	property_type	external-identifier
+P4661	property_type	item
+P4662	property_type	external-identifier
+P4663	property_type	external-identifier
+P4664	property_type	external-identifier
+P4665	property_type	external-identifier
+P4666	property_type	external-identifier
+P4667	property_type	external-identifier
+P4668	property_type	external-identifier
+P4669	property_type	string
+P4670	property_type	external-identifier
+P4671	property_type	external-identifier
+P4672	property_type	external-identifier
+P4673	property_type	external-identifier
+P4674	property_type	external-identifier
+P4675	property_type	item
+P4676	property_type	external-identifier
+P4677	property_type	external-identifier
+P4678	property_type	external-identifier
+P4679	property_type	external-identifier
+P4680	property_type	item
+P4681	property_type	external-identifier
+P4682	property_type	external-identifier
+P4683	property_type	external-identifier
+P4684	property_type	external-identifier
+P4685	property_type	external-identifier
+P4686	property_type	external-identifier
+P4687	property_type	external-identifier
+P4688	property_type	item
+P4689	property_type	external-identifier
+P4690	property_type	external-identifier
+P4691	property_type	external-identifier
+P4692	property_type	external-identifier
+P4693	property_type	external-identifier
+P4694	property_type	external-identifier
+P4695	property_type	external-identifier
+P4696	property_type	external-identifier
+P4697	property_type	external-identifier
+P4698	property_type	external-identifier
+P4699	property_type	external-identifier
+P4700	property_type	external-identifier
+P4701	property_type	external-identifier
+P4702	property_type	external-identifier
+P4703	property_type	external-identifier
+P4704	property_type	external-identifier
+P4705	property_type	external-identifier
+P4706	property_type	external-identifier
+P4707	property_type	external-identifier
+P4708	property_type	external-identifier
+P4709	property_type	external-identifier
+P4710	property_type	external-identifier
+P4711	property_type	external-identifier
+P4712	property_type	external-identifier
+P4713	property_type	external-identifier
+P4714	property_type	quantity
+P4715	property_type	external-identifier
+P4716	property_type	external-identifier
+P4717	property_type	external-identifier
+P4718	property_type	external-identifier
+P4720	property_type	external-identifier
+P4721	property_type	external-identifier
+P4722	property_type	external-identifier
+P4723	property_type	external-identifier
+P4724	property_type	external-identifier
+P4725	property_type	external-identifier
+P4726	property_type	external-identifier
+P4727	property_type	external-identifier
+P4728	property_type	external-identifier
+P4729	property_type	external-identifier
+P4730	property_type	external-identifier
+P4731	property_type	external-identifier
+P4732	property_type	external-identifier
+P4733	property_type	item
+P4734	property_type	external-identifier
+P4735	property_type	external-identifier
+P4736	property_type	external-identifier
+P4737	property_type	external-identifier
+P4738	property_type	external-identifier
+P4739	property_type	external-identifier
+P4740	property_type	external-identifier
+P4741	property_type	external-identifier
+P4742	property_type	external-identifier
+P4743	property_type	item
+P4744	property_type	external-identifier
+P4745	property_type	item
+P4746	property_type	external-identifier
+P4747	property_type	external-identifier
+P3522	property_type	external-identifier
+P3523	property_type	external-identifier
+P3524	property_type	external-identifier
+P3525	property_type	external-identifier
+P3526	property_type	external-identifier
+P3527	property_type	external-identifier
+P3528	property_type	external-identifier
+P3529	property_type	quantity
+P3530	property_type	quantity
+P3531	property_type	external-identifier
+P3532	property_type	external-identifier
+P3533	property_type	external-identifier
+P3534	property_type	external-identifier
+P3535	property_type	external-identifier
+P3536	property_type	external-identifier
+P3537	property_type	external-identifier
+P3538	property_type	external-identifier
+P3539	property_type	external-identifier
+P3541	property_type	external-identifier
+P3542	property_type	external-identifier
+P3544	property_type	external-identifier
+P3545	property_type	external-identifier
+P3546	property_type	external-identifier
+P3547	property_type	external-identifier
+P3548	property_type	external-identifier
+P3549	property_type	external-identifier
+P3550	property_type	external-identifier
+P3551	property_type	external-identifier
+P3552	property_type	external-identifier
+P3553	property_type	external-identifier
+P3554	property_type	external-identifier
+P3555	property_type	external-identifier
+P3556	property_type	external-identifier
+P3557	property_type	external-identifier
+P3558	property_type	external-identifier
+P3559	property_type	quantity
+P3560	property_type	external-identifier
+P3561	property_type	external-identifier
+P3562	property_type	external-identifier
+P3563	property_type	external-identifier
+P3564	property_type	external-identifier
+P3565	property_type	external-identifier
+P3566	property_type	external-identifier
+P3567	property_type	external-identifier
+P3568	property_type	external-identifier
+P3569	property_type	external-identifier
+P3570	property_type	external-identifier
+P3571	property_type	external-identifier
+P3572	property_type	external-identifier
+P3573	property_type	external-identifier
+P3574	property_type	external-identifier
+P3575	property_type	quantity
+P3576	property_type	external-identifier
+P3577	property_type	external-identifier
+P3578	property_type	item
+P3579	property_type	external-identifier
+P3580	property_type	external-identifier
+P3581	property_type	external-identifier
+P3582	property_type	external-identifier
+P3583	property_type	external-identifier
+P3584	property_type	external-identifier
+P3585	property_type	external-identifier
+P3586	property_type	external-identifier
+P3587	property_type	external-identifier
+P3588	property_type	external-identifier
+P3589	property_type	external-identifier
+P3590	property_type	external-identifier
+P3591	property_type	external-identifier
+P3592	property_type	item
+P3593	property_type	external-identifier
+P3594	property_type	external-identifier
+P3595	property_type	external-identifier
+P3596	property_type	external-identifier
+P3597	property_type	external-identifier
+P3598	property_type	external-identifier
+P3599	property_type	external-identifier
+P3600	property_type	external-identifier
+P3601	property_type	external-identifier
+P3602	property_type	item
+P3603	property_type	external-identifier
+P3604	property_type	external-identifier
+P3605	property_type	external-identifier
+P3606	property_type	external-identifier
+P3607	property_type	external-identifier
+P3608	property_type	external-identifier
+P3609	property_type	external-identifier
+P3610	property_type	item
+P3611	property_type	external-identifier
+P3612	property_type	external-identifier
+P3613	property_type	external-identifier
+P3614	property_type	external-identifier
+P3615	property_type	external-identifier
+P3616	property_type	external-identifier
+P3618	property_type	quantity
+P3619	property_type	external-identifier
+P3620	property_type	external-identifier
+P3621	property_type	external-identifier
+P3622	property_type	external-identifier
+P3623	property_type	external-identifier
+P3624	property_type	external-identifier
+P1409	property_type	external-identifier
+P1410	property_type	quantity
+P1411	property_type	item
+P1412	property_type	item
+P1414	property_type	item
+P1415	property_type	external-identifier
+P1416	property_type	item
+P1417	property_type	external-identifier
+P1418	property_type	quantity
+P1419	property_type	item
+P1420	property_type	item
+P1421	property_type	url
+P1422	property_type	external-identifier
+P1423	property_type	item
+P1424	property_type	item
+P1425	property_type	item
+P1427	property_type	item
+P1428	property_type	external-identifier
+P1429	property_type	item
+P1430	property_type	external-identifier
+P1431	property_type	item
+P1433	property_type	item
+P1434	property_type	item
+P1435	property_type	item
+P1436	property_type	quantity
+P1437	property_type	item
+P1438	property_type	string
+P1439	property_type	external-identifier
+P1440	property_type	external-identifier
+P1441	property_type	item
+P1442	property_type	string
+P1443	property_type	item
+P1444	property_type	item
+P1445	property_type	item
+P1446	property_type	quantity
+P1447	property_type	external-identifier
+P1448	property_type	monolingualtext
+P1449	property_type	monolingualtext
+P1450	property_type	monolingualtext
+P1451	property_type	monolingualtext
+P1453	property_type	external-identifier
+P1454	property_type	item
+P1455	property_type	item
+P1456	property_type	item
+P1457	property_type	quantity
+P1458	property_type	quantity
+P1459	property_type	external-identifier
+P1460	property_type	external-identifier
+P1461	property_type	string
+P1462	property_type	item
+P1463	property_type	external-identifier
+P1464	property_type	item
+P1465	property_type	item
+P1466	property_type	external-identifier
+P1467	property_type	external-identifier
+P1468	property_type	external-identifier
+P1469	property_type	external-identifier
+P1470	property_type	quantity
+P1471	property_type	string
+P1472	property_type	string
+P1473	property_type	external-identifier
+P1474	property_type	external-identifier
+P1476	property_type	monolingualtext
+P1477	property_type	monolingualtext
+P1478	property_type	item
+P1479	property_type	item
+P1480	property_type	item
+P1481	property_type	external-identifier
+P1482	property_type	url
+P1483	property_type	external-identifier
+P1529	property_type	external-identifier
+P1531	property_type	item
+P1532	property_type	item
+P1533	property_type	item
+P1534	property_type	item
+P1535	property_type	item
+P1536	property_type	item
+P1537	property_type	item
+P1538	property_type	quantity
+P1539	property_type	quantity
+P1540	property_type	quantity
+P1541	property_type	external-identifier
+P1542	property_type	item
+P1543	property_type	string
+P1544	property_type	external-identifier
+P1545	property_type	string
+P1546	property_type	item
+P1547	property_type	item
+P1548	property_type	quantity
+P1549	property_type	monolingualtext
+P1550	property_type	external-identifier
+P1551	property_type	external-identifier
+P1552	property_type	item
+P1553	property_type	external-identifier
+P1554	property_type	external-identifier
+P1555	property_type	external-identifier
+P1556	property_type	external-identifier
+P1557	property_type	item
+P1558	property_type	item
+P1559	property_type	monolingualtext
+P5168	property_type	monolingualtext
+P5169	property_type	external-identifier
+P5170	property_type	external-identifier
+P5171	property_type	external-identifier
+P5172	property_type	external-identifier
+P5173	property_type	external-identifier
+P5174	property_type	external-identifier
+P5175	property_type	external-identifier
+P5176	property_type	external-identifier
+P5177	property_type	external-identifier
+P5178	property_type	url
+P5179	property_type	external-identifier
+P5180	property_type	external-identifier
+P5181	property_type	external-identifier
+P5182	property_type	external-identifier
+P5183	property_type	external-identifier
+P5184	property_type	external-identifier
+P5185	property_type	item
+P5186	property_type	item
+P5187	property_type	monolingualtext
+P5188	property_type	string
+P5189	property_type	string
+P5191	property_type	string
+P5192	property_type	string
+P5193	property_type	string
+P5194	property_type	item
+P5195	property_type	url
+P5196	property_type	item
+P5197	property_type	external-identifier
+P5198	property_type	external-identifier
+P5199	property_type	external-identifier
+P5200	property_type	external-identifier
+P5201	property_type	item
+P5202	property_type	item
+P5203	property_type	item
+P5204	property_type	time
+P5205	property_type	quantity
+P5206	property_type	item
+P5207	property_type	external-identifier
+P5208	property_type	external-identifier
+P5209	property_type	external-identifier
+P5210	property_type	external-identifier
+P5211	property_type	external-identifier
+P5212	property_type	external-identifier
+P5213	property_type	external-identifier
+P5214	property_type	external-identifier
+P5215	property_type	external-identifier
+P5216	property_type	external-identifier
+P5217	property_type	external-identifier
+P5218	property_type	external-identifier
+P5219	property_type	external-identifier
+P5220	property_type	external-identifier
+P5221	property_type	external-identifier
+P5222	property_type	external-identifier
+P5223	property_type	external-identifier
+P5224	property_type	external-identifier
+P5225	property_type	external-identifier
+P5226	property_type	external-identifier
+P5227	property_type	external-identifier
+P5229	property_type	external-identifier
+P5230	property_type	quantity
+P5231	property_type	external-identifier
+P5232	property_type	external-identifier
+P5233	property_type	external-identifier
+P5234	property_type	external-identifier
+P5235	property_type	external-identifier
+P5236	property_type	item
+P5237	property_type	item
+P5238	property_type	string
+P5239	property_type	external-identifier
+P5240	property_type	external-identifier
+P5241	property_type	external-identifier
+P5242	property_type	external-identifier
+P5243	property_type	external-identifier
+P5244	property_type	item
+P5245	property_type	external-identifier
+P5246	property_type	external-identifier
+P5247	property_type	external-identifier
+P5248	property_type	item
+P5249	property_type	item
+P5250	property_type	external-identifier
+P5251	property_type	external-identifier
+P5252	property_type	string
+P5253	property_type	external-identifier
+P5254	property_type	external-identifier
+P5255	property_type	external-identifier
+P5256	property_type	external-identifier
+P5257	property_type	external-identifier
+P5258	property_type	external-identifier
+P5259	property_type	external-identifier
+P5260	property_type	external-identifier
+P5261	property_type	external-identifier
+P5262	property_type	external-identifier
+P5263	property_type	external-identifier
+P5264	property_type	external-identifier
+P5265	property_type	external-identifier
+P5266	property_type	external-identifier
+P5267	property_type	external-identifier
+P5268	property_type	external-identifier
+P5269	property_type	external-identifier
+P5692	property_type	string
+P5693	property_type	external-identifier
+P5694	property_type	external-identifier
+P5695	property_type	external-identifier
+P5696	property_type	external-identifier
+P5698	property_type	external-identifier
+P5699	property_type	external-identifier
+P5700	property_type	external-identifier
+P5703	property_type	string
+P5704	property_type	external-identifier
+P5705	property_type	external-identifier
+P5706	property_type	quantity
+P5707	property_type	item
+P5708	property_type	quantity
+P5709	property_type	quantity
+P5710	property_type	external-identifier
+P5711	property_type	external-identifier
+P5712	property_type	external-identifier
+P5713	property_type	item
+P5714	property_type	external-identifier
+P5715	property_type	url
+P5716	property_type	external-identifier
+P5717	property_type	external-identifier
+P5718	property_type	external-identifier
+P5719	property_type	external-identifier
+P5720	property_type	external-identifier
+P5721	property_type	external-identifier
+P5722	property_type	external-identifier
+P5723	property_type	external-identifier
+P5724	property_type	external-identifier
+P5725	property_type	external-identifier
+P5726	property_type	external-identifier
+P5727	property_type	external-identifier
+P5731	property_type	external-identifier
+P5732	property_type	external-identifier
+P5733	property_type	external-identifier
+P5734	property_type	external-identifier
+P5735	property_type	external-identifier
+P5736	property_type	external-identifier
+P5737	property_type	external-identifier
+P5738	property_type	external-identifier
+P5739	property_type	external-identifier
+P5740	property_type	external-identifier
+P5742	property_type	external-identifier
+P5743	property_type	external-identifier
+P5744	property_type	external-identifier
+P5745	property_type	external-identifier
+P5746	property_type	external-identifier
+P5747	property_type	external-identifier
+P5748	property_type	external-identifier
+P5749	property_type	external-identifier
+P5750	property_type	external-identifier
+P5752	property_type	external-identifier
+P5753	property_type	item
+P5755	property_type	external-identifier
+P5756	property_type	external-identifier
+P5757	property_type	external-identifier
+P5758	property_type	external-identifier
+P5759	property_type	external-identifier
+P5760	property_type	external-identifier
+P5761	property_type	external-identifier
+P5762	property_type	external-identifier
+P5763	property_type	external-identifier
+P5764	property_type	external-identifier
+P5765	property_type	external-identifier
+P5768	property_type	external-identifier
+P5769	property_type	item
+P5770	property_type	external-identifier
+P5771	property_type	external-identifier
+P5772	property_type	external-identifier
+P5773	property_type	external-identifier
+P5774	property_type	external-identifier
+P5775	property_type	string
+P5776	property_type	external-identifier
+P5777	property_type	external-identifier
+P5778	property_type	string
+P5779	property_type	external-identifier
+P5780	property_type	external-identifier
+P5782	property_type	external-identifier
+P5783	property_type	external-identifier
+P5784	property_type	external-identifier
+P5785	property_type	external-identifier
+P5786	property_type	external-identifier
+P5787	property_type	external-identifier
+P5788	property_type	external-identifier
+P5789	property_type	external-identifier
+P5790	property_type	external-identifier
+P5791	property_type	external-identifier
+P5792	property_type	external-identifier
+P5793	property_type	external-identifier
+P5794	property_type	external-identifier
+P5795	property_type	external-identifier
+P5796	property_type	external-identifier
+P5797	property_type	external-identifier
+P5798	property_type	string
+P5799	property_type	external-identifier
+P5800	property_type	item
+P5801	property_type	external-identifier
+P5802	property_type	item
+P5803	property_type	external-identifier
+P3625	property_type	string
+P3626	property_type	external-identifier
+P3627	property_type	external-identifier
+P3628	property_type	external-identifier
+P3629	property_type	quantity
+P3630	property_type	external-identifier
+P3631	property_type	external-identifier
+P3632	property_type	external-identifier
+P3633	property_type	external-identifier
+P3634	property_type	external-identifier
+P3635	property_type	external-identifier
+P3636	property_type	external-identifier
+P3637	property_type	external-identifier
+P3638	property_type	external-identifier
+P3639	property_type	external-identifier
+P3640	property_type	external-identifier
+P3641	property_type	external-identifier
+P3642	property_type	external-identifier
+P3643	property_type	item
+P3644	property_type	external-identifier
+P3645	property_type	external-identifier
+P3646	property_type	external-identifier
+P3647	property_type	external-identifier
+P3648	property_type	item
+P3650	property_type	item
+P3651	property_type	external-identifier
+P3652	property_type	external-identifier
+P3653	property_type	external-identifier
+P3654	property_type	external-identifier
+P3655	property_type	external-identifier
+P3656	property_type	external-identifier
+P3657	property_type	external-identifier
+P3658	property_type	external-identifier
+P3659	property_type	external-identifier
+P3660	property_type	external-identifier
+P3661	property_type	external-identifier
+P3662	property_type	external-identifier
+P3663	property_type	external-identifier
+P3664	property_type	external-identifier
+P3665	property_type	external-identifier
+P3666	property_type	external-identifier
+P3667	property_type	external-identifier
+P3668	property_type	external-identifier
+P3669	property_type	external-identifier
+P3670	property_type	external-identifier
+P3671	property_type	external-identifier
+P3672	property_type	external-identifier
+P3673	property_type	external-identifier
+P3674	property_type	external-identifier
+P3675	property_type	external-identifier
+P3676	property_type	external-identifier
+P3677	property_type	external-identifier
+P3678	property_type	external-identifier
+P3679	property_type	item
+P3680	property_type	item
+P3681	property_type	external-identifier
+P3682	property_type	external-identifier
+P3683	property_type	external-identifier
+P3684	property_type	external-identifier
+P3685	property_type	external-identifier
+P3686	property_type	external-identifier
+P3687	property_type	external-identifier
+P3689	property_type	external-identifier
+P3690	property_type	external-identifier
+P3691	property_type	external-identifier
+P3692	property_type	external-identifier
+P3693	property_type	external-identifier
+P3694	property_type	external-identifier
+P3695	property_type	external-identifier
+P3696	property_type	external-identifier
+P3697	property_type	external-identifier
+P3698	property_type	external-identifier
+P3699	property_type	external-identifier
+P3700	property_type	external-identifier
+P3701	property_type	item
+P3702	property_type	external-identifier
+P3703	property_type	external-identifier
+P3704	property_type	external-identifier
+P3705	property_type	external-identifier
+P3706	property_type	external-identifier
+P3707	property_type	external-identifier
+P3708	property_type	external-identifier
+P3709	property_type	item
+P3710	property_type	external-identifier
+P3711	property_type	external-identifier
+P3712	property_type	item
+P3713	property_type	item
+P3714	property_type	external-identifier
+P3715	property_type	external-identifier
+P3716	property_type	item
+P3717	property_type	external-identifier
+P3718	property_type	external-identifier
+P3719	property_type	item
+P3720	property_type	external-identifier
+P3721	property_type	string
+P3723	property_type	external-identifier
+P3724	property_type	external-identifier
+P3725	property_type	external-identifier
+P3726	property_type	external-identifier
+P3727	property_type	external-identifier
+P4748	property_type	external-identifier
+P4749	property_type	external-identifier
+P4750	property_type	external-identifier
+P4751	property_type	external-identifier
+P4752	property_type	external-identifier
+P4753	property_type	external-identifier
+P4754	property_type	external-identifier
+P4755	property_type	external-identifier
+P4756	property_type	external-identifier
+P4757	property_type	external-identifier
+P4758	property_type	external-identifier
+P4759	property_type	external-identifier
+P4760	property_type	external-identifier
+P4761	property_type	external-identifier
+P4762	property_type	external-identifier
+P4763	property_type	external-identifier
+P4764	property_type	external-identifier
+P4765	property_type	url
+P4766	property_type	external-identifier
+P4768	property_type	external-identifier
+P4769	property_type	external-identifier
+P4770	property_type	item
+P4771	property_type	external-identifier
+P4772	property_type	external-identifier
+P4773	property_type	external-identifier
+P4774	property_type	item
+P4775	property_type	string
+P4776	property_type	string
+P4777	property_type	item
+P4778	property_type	external-identifier
+P4779	property_type	external-identifier
+P4780	property_type	external-identifier
+P4781	property_type	external-identifier
+P4782	property_type	external-identifier
+P4783	property_type	external-identifier
+P4784	property_type	external-identifier
+P4785	property_type	external-identifier
+P4786	property_type	external-identifier
+P4787	property_type	external-identifier
+P4788	property_type	item
+P4789	property_type	external-identifier
+P4790	property_type	external-identifier
+P4791	property_type	item
+P4792	property_type	item
+P4793	property_type	external-identifier
+P4794	property_type	item
+P4795	property_type	external-identifier
+P4796	property_type	external-identifier
+P4797	property_type	external-identifier
+P4798	property_type	external-identifier
+P4799	property_type	external-identifier
+P4800	property_type	external-identifier
+P4801	property_type	external-identifier
+P4802	property_type	external-identifier
+P4803	property_type	external-identifier
+P4804	property_type	external-identifier
+P4805	property_type	item
+P4806	property_type	external-identifier
+P4807	property_type	external-identifier
+P4808	property_type	external-identifier
+P4809	property_type	item
+P4810	property_type	item
+P4811	property_type	external-identifier
+P4812	property_type	external-identifier
+P4813	property_type	external-identifier
+P4814	property_type	external-identifier
+P4815	property_type	quantity
+P4816	property_type	external-identifier
+P4818	property_type	external-identifier
+P4819	property_type	external-identifier
+P4820	property_type	external-identifier
+P4821	property_type	external-identifier
+P4822	property_type	external-identifier
+P4823	property_type	external-identifier
+P4824	property_type	external-identifier
+P4825	property_type	quantity
+P4826	property_type	quantity
+P4827	property_type	external-identifier
+P4829	property_type	external-identifier
+P4830	property_type	external-identifier
+P4831	property_type	external-identifier
+P4832	property_type	external-identifier
+P4833	property_type	external-identifier
+P4834	property_type	external-identifier
+P4835	property_type	external-identifier
+P4836	property_type	external-identifier
+P4837	property_type	string
+P4838	property_type	external-identifier
+P4839	property_type	external-identifier
+P4840	property_type	external-identifier
+P4841	property_type	quantity
+P4842	property_type	external-identifier
+P4843	property_type	item
+P4844	property_type	item
+P4845	property_type	external-identifier
+P4846	property_type	external-identifier
+P4847	property_type	external-identifier
+P4848	property_type	external-identifier
+P4849	property_type	external-identifier
+P4850	property_type	item
+P5270	property_type	external-identifier
+P5271	property_type	external-identifier
+P5272	property_type	external-identifier
+P5273	property_type	external-identifier
+P5274	property_type	external-identifier
+P5275	property_type	external-identifier
+P5276	property_type	string
+P5277	property_type	item
+P5278	property_type	item
+P5279	property_type	string
+P5280	property_type	item
+P5281	property_type	quantity
+P5282	property_type	url
+P5283	property_type	external-identifier
+P5284	property_type	external-identifier
+P5285	property_type	external-identifier
+P5286	property_type	string
+P5287	property_type	external-identifier
+P5288	property_type	external-identifier
+P5289	property_type	external-identifier
+P5290	property_type	external-identifier
+P5291	property_type	external-identifier
+P5292	property_type	external-identifier
+P5293	property_type	external-identifier
+P5294	property_type	external-identifier
+P5295	property_type	external-identifier
+P5296	property_type	external-identifier
+P5297	property_type	external-identifier
+P5298	property_type	external-identifier
+P5299	property_type	external-identifier
+P5300	property_type	external-identifier
+P5301	property_type	external-identifier
+P5302	property_type	external-identifier
+P5303	property_type	external-identifier
+P5304	property_type	item
+P5305	property_type	url
+P5306	property_type	external-identifier
+P5307	property_type	item
+P5308	property_type	external-identifier
+P5309	property_type	external-identifier
+P5310	property_type	external-identifier
+P5311	property_type	external-identifier
+P5312	property_type	external-identifier
+P5313	property_type	external-identifier
+P5314	property_type	item
+P5315	property_type	external-identifier
+P5316	property_type	external-identifier
+P5317	property_type	item
+P5318	property_type	external-identifier
+P5319	property_type	external-identifier
+P5320	property_type	external-identifier
+P5321	property_type	external-identifier
+P5323	property_type	item
+P5324	property_type	external-identifier
+P5325	property_type	external-identifier
+P5326	property_type	item
+P5327	property_type	external-identifier
+P5328	property_type	item
+P5329	property_type	external-identifier
+P5330	property_type	item
+P5331	property_type	external-identifier
+P5332	property_type	external-identifier
+P5333	property_type	external-identifier
+P5334	property_type	external-identifier
+P5335	property_type	external-identifier
+P5336	property_type	external-identifier
+P5337	property_type	external-identifier
+P5338	property_type	external-identifier
+P5339	property_type	external-identifier
+P5340	property_type	external-identifier
+P5341	property_type	external-identifier
+P5343	property_type	external-identifier
+P5344	property_type	external-identifier
+P5345	property_type	external-identifier
+P5346	property_type	external-identifier
+P5348	property_type	quantity
+P5349	property_type	quantity
+P5350	property_type	string
+P5351	property_type	string
+P5352	property_type	string
+P5353	property_type	item
+P5354	property_type	external-identifier
+P5355	property_type	external-identifier
+P5356	property_type	external-identifier
+P5357	property_type	external-identifier
+P5358	property_type	external-identifier
+P5359	property_type	external-identifier
+P5360	property_type	external-identifier
+P5361	property_type	external-identifier
+P5362	property_type	external-identifier
+P5363	property_type	external-identifier
+P5364	property_type	external-identifier
+P5365	property_type	external-identifier
+P5366	property_type	external-identifier
+P5368	property_type	external-identifier
+P5369	property_type	external-identifier
+P5370	property_type	external-identifier
+P5371	property_type	external-identifier
+P5372	property_type	external-identifier
+P5373	property_type	external-identifier
+P1560	property_type	item
+P1561	property_type	quantity
+P1562	property_type	external-identifier
+P1563	property_type	external-identifier
+P1564	property_type	external-identifier
+P1565	property_type	external-identifier
+P1566	property_type	external-identifier
+P1567	property_type	external-identifier
+P1568	property_type	item
+P1571	property_type	item
+P1573	property_type	external-identifier
+P1574	property_type	item
+P1575	property_type	external-identifier
+P1576	property_type	item
+P1577	property_type	external-identifier
+P1578	property_type	external-identifier
+P1579	property_type	external-identifier
+P1580	property_type	external-identifier
+P1581	property_type	url
+P1582	property_type	item
+P1583	property_type	external-identifier
+P1584	property_type	external-identifier
+P1585	property_type	external-identifier
+P1586	property_type	external-identifier
+P1587	property_type	external-identifier
+P1588	property_type	string
+P1589	property_type	item
+P1590	property_type	quantity
+P1591	property_type	item
+P1592	property_type	item
+P1593	property_type	item
+P1594	property_type	item
+P1595	property_type	item
+P1596	property_type	item
+P1598	property_type	item
+P1599	property_type	external-identifier
+P1600	property_type	external-identifier
+P1601	property_type	external-identifier
+P1602	property_type	external-identifier
+P1603	property_type	quantity
+P1604	property_type	item
+P1605	property_type	item
+P1606	property_type	item
+P1607	property_type	external-identifier
+P1608	property_type	external-identifier
+P1609	property_type	external-identifier
+P1610	property_type	external-identifier
+P1611	property_type	item
+P1612	property_type	string
+P1613	property_type	url
+P1614	property_type	external-identifier
+P1615	property_type	external-identifier
+P1616	property_type	external-identifier
+P1617	property_type	external-identifier
+P1618	property_type	string
+P1619	property_type	time
+P1620	property_type	item
+P1621	property_type	string
+P1622	property_type	item
+P1624	property_type	external-identifier
+P1625	property_type	item
+P1626	property_type	external-identifier
+P1627	property_type	external-identifier
+P1628	property_type	url
+P1629	property_type	item
+P1630	property_type	string
+P1631	property_type	external-identifier
+P1632	property_type	external-identifier
+P1635	property_type	monolingualtext
+P1636	property_type	time
+P1637	property_type	item
+P1638	property_type	monolingualtext
+P1639	property_type	item
+P1640	property_type	item
+P1641	property_type	quantity
+P1642	property_type	item
+P1643	property_type	item
+P1644	property_type	external-identifier
+P1645	property_type	external-identifier
+P1647	property_type	string
+P1648	property_type	external-identifier
+P1649	property_type	external-identifier
+P1650	property_type	external-identifier
+P1651	property_type	external-identifier
+P1652	property_type	item
+P1653	property_type	external-identifier
+P1654	property_type	item
+P1656	property_type	item
+P1657	property_type	item
+P1659	property_type	string
+P1660	property_type	item
+P1661	property_type	quantity
+P1662	property_type	external-identifier
+P1663	property_type	external-identifier
+P1664	property_type	external-identifier
+P1665	property_type	external-identifier
+P1666	property_type	external-identifier
+P1667	property_type	external-identifier
+P1668	property_type	external-identifier
+P1669	property_type	external-identifier
+P3728	property_type	external-identifier
+P3729	property_type	item
+P3730	property_type	item
+P3731	property_type	external-identifier
+P3732	property_type	external-identifier
+P3733	property_type	external-identifier
+P3734	property_type	item
+P3735	property_type	external-identifier
+P3736	property_type	external-identifier
+P3737	property_type	quantity
+P3738	property_type	quantity
+P3739	property_type	item
+P3740	property_type	quantity
+P3741	property_type	item
+P3742	property_type	external-identifier
+P3743	property_type	external-identifier
+P3744	property_type	quantity
+P3745	property_type	external-identifier
+P3746	property_type	external-identifier
+P3747	property_type	external-identifier
+P3748	property_type	external-identifier
+P3749	property_type	external-identifier
+P3750	property_type	external-identifier
+P3751	property_type	external-identifier
+P3752	property_type	string
+P3753	property_type	string
+P3754	property_type	string
+P3755	property_type	string
+P3756	property_type	string
+P3757	property_type	string
+P3758	property_type	external-identifier
+P3759	property_type	external-identifier
+P3760	property_type	external-identifier
+P3761	property_type	string
+P3762	property_type	external-identifier
+P3763	property_type	external-identifier
+P3764	property_type	item
+P3765	property_type	external-identifier
+P3766	property_type	external-identifier
+P3767	property_type	external-identifier
+P3768	property_type	external-identifier
+P3769	property_type	external-identifier
+P3770	property_type	external-identifier
+P3771	property_type	item
+P3772	property_type	item
+P3773	property_type	item
+P3774	property_type	item
+P3775	property_type	item
+P3776	property_type	item
+P3777	property_type	item
+P3778	property_type	item
+P3779	property_type	item
+P3780	property_type	item
+P3781	property_type	item
+P3782	property_type	external-identifier
+P3783	property_type	external-identifier
+P3784	property_type	external-identifier
+P3785	property_type	external-identifier
+P3786	property_type	external-identifier
+P3787	property_type	external-identifier
+P3788	property_type	external-identifier
+P3789	property_type	external-identifier
+P3790	property_type	external-identifier
+P3791	property_type	external-identifier
+P3792	property_type	quantity
+P3793	property_type	string
+P3794	property_type	external-identifier
+P3795	property_type	external-identifier
+P3796	property_type	external-identifier
+P3797	property_type	external-identifier
+P3798	property_type	external-identifier
+P3799	property_type	external-identifier
+P3800	property_type	external-identifier
+P3801	property_type	external-identifier
+P3802	property_type	external-identifier
+P3803	property_type	item
+P3804	property_type	external-identifier
+P3805	property_type	external-identifier
+P3806	property_type	external-identifier
+P3807	property_type	external-identifier
+P3808	property_type	external-identifier
+P3809	property_type	external-identifier
+P3810	property_type	external-identifier
+P3811	property_type	external-identifier
+P3812	property_type	external-identifier
+P3813	property_type	external-identifier
+P3814	property_type	external-identifier
+P3815	property_type	item
+P3816	property_type	item
+P3817	property_type	external-identifier
+P3818	property_type	item
+P3819	property_type	external-identifier
+P3820	property_type	external-identifier
+P3821	property_type	external-identifier
+P3822	property_type	item
+P3823	property_type	item
+P3824	property_type	external-identifier
+P3825	property_type	external-identifier
+P3826	property_type	external-identifier
+P3827	property_type	external-identifier
+P5804	property_type	item
+P5805	property_type	item
+P5806	property_type	external-identifier
+P5807	property_type	external-identifier
+P5808	property_type	external-identifier
+P5809	property_type	external-identifier
+P5810	property_type	string
+P5811	property_type	quantity
+P5813	property_type	external-identifier
+P5814	property_type	external-identifier
+P5815	property_type	external-identifier
+P5816	property_type	item
+P5817	property_type	item
+P5818	property_type	external-identifier
+P5819	property_type	external-identifier
+P5820	property_type	external-identifier
+P5821	property_type	external-identifier
+P5822	property_type	quantity
+P5823	property_type	external-identifier
+P5824	property_type	item
+P5825	property_type	string
+P5826	property_type	item
+P5827	property_type	external-identifier
+P5828	property_type	item
+P5829	property_type	external-identifier
+P5830	property_type	string
+P5831	property_type	monolingualtext
+P5832	property_type	item
+P5833	property_type	external-identifier
+P5834	property_type	external-identifier
+P5835	property_type	external-identifier
+P5836	property_type	external-identifier
+P5838	property_type	external-identifier
+P5839	property_type	external-identifier
+P5840	property_type	external-identifier
+P5841	property_type	item
+P5842	property_type	external-identifier
+P5843	property_type	external-identifier
+P5844	property_type	external-identifier
+P5845	property_type	external-identifier
+P5846	property_type	external-identifier
+P5847	property_type	external-identifier
+P5848	property_type	external-identifier
+P5849	property_type	external-identifier
+P5851	property_type	external-identifier
+P5852	property_type	item
+P5858	property_type	string
+P5859	property_type	external-identifier
+P5860	property_type	external-identifier
+P5862	property_type	external-identifier
+P5863	property_type	external-identifier
+P5864	property_type	external-identifier
+P5865	property_type	external-identifier
+P5866	property_type	external-identifier
+P5867	property_type	external-identifier
+P5868	property_type	external-identifier
+P5869	property_type	item
+P5870	property_type	external-identifier
+P5871	property_type	external-identifier
+P5872	property_type	item
+P5873	property_type	item
+P5874	property_type	external-identifier
+P5875	property_type	external-identifier
+P5876	property_type	external-identifier
+P5877	property_type	external-identifier
+P5878	property_type	string
+P5879	property_type	external-identifier
+P5880	property_type	item
+P5881	property_type	item
+P5882	property_type	external-identifier
+P5883	property_type	external-identifier
+P5884	property_type	external-identifier
+P5885	property_type	external-identifier
+P5886	property_type	item
+P5887	property_type	external-identifier
+P5888	property_type	external-identifier
+P5890	property_type	external-identifier
+P5891	property_type	external-identifier
+P5892	property_type	external-identifier
+P5893	property_type	quantity
+P5894	property_type	quantity
+P5895	property_type	quantity
+P5896	property_type	quantity
+P5897	property_type	quantity
+P5898	property_type	quantity
+P5899	property_type	quantity
+P5900	property_type	quantity
+P5901	property_type	string
+P5902	property_type	external-identifier
+P5903	property_type	external-identifier
+P5904	property_type	external-identifier
+P5905	property_type	external-identifier
+P5906	property_type	external-identifier
+P5908	property_type	external-identifier
+P5909	property_type	external-identifier
+P5910	property_type	string
+P5911	property_type	item
+P5912	property_type	external-identifier
+P5913	property_type	item
+P5914	property_type	external-identifier
+P1670	property_type	external-identifier
+P1671	property_type	string
+P1672	property_type	item
+P1673	property_type	string
+P1674	property_type	quantity
+P1675	property_type	quantity
+P1676	property_type	quantity
+P1677	property_type	item
+P1678	property_type	item
+P1679	property_type	external-identifier
+P1680	property_type	monolingualtext
+P1683	property_type	monolingualtext
+P1684	property_type	monolingualtext
+P1685	property_type	string
+P1686	property_type	item
+P1687	property_type	string
+P1689	property_type	quantity
+P1690	property_type	external-identifier
+P1691	property_type	external-identifier
+P1692	property_type	string
+P1693	property_type	external-identifier
+P1694	property_type	external-identifier
+P1695	property_type	external-identifier
+P1696	property_type	string
+P1697	property_type	quantity
+P1699	property_type	external-identifier
+P1700	property_type	external-identifier
+P1702	property_type	external-identifier
+P1703	property_type	item
+P1704	property_type	item
+P1705	property_type	monolingualtext
+P1706	property_type	item
+P1707	property_type	external-identifier
+P1708	property_type	external-identifier
+P1709	property_type	url
+P1710	property_type	external-identifier
+P1711	property_type	external-identifier
+P1712	property_type	external-identifier
+P1713	property_type	url
+P1714	property_type	external-identifier
+P1715	property_type	external-identifier
+P1716	property_type	item
+P1717	property_type	external-identifier
+P1721	property_type	string
+P1725	property_type	quantity
+P1726	property_type	external-identifier
+P1727	property_type	external-identifier
+P1728	property_type	external-identifier
+P1729	property_type	external-identifier
+P1730	property_type	external-identifier
+P1731	property_type	item
+P1732	property_type	external-identifier
+P1733	property_type	external-identifier
+P1734	property_type	time
+P1735	property_type	external-identifier
+P1736	property_type	external-identifier
+P1738	property_type	external-identifier
+P1739	property_type	external-identifier
+P1740	property_type	item
+P1741	property_type	external-identifier
+P1743	property_type	external-identifier
+P1744	property_type	external-identifier
+P1745	property_type	external-identifier
+P1746	property_type	external-identifier
+P1747	property_type	external-identifier
+P1748	property_type	string
+P1749	property_type	external-identifier
+P1750	property_type	item
+P1751	property_type	external-identifier
+P1752	property_type	quantity
+P1753	property_type	item
+P1754	property_type	item
+P1755	property_type	external-identifier
+P1760	property_type	external-identifier
+P1761	property_type	external-identifier
+P1762	property_type	string
+P1763	property_type	external-identifier
+P1764	property_type	external-identifier
+P1766	property_type	string
+P1769	property_type	external-identifier
+P1770	property_type	external-identifier
+P1771	property_type	external-identifier
+P1772	property_type	external-identifier
+P1774	property_type	item
+P1775	property_type	item
+P1776	property_type	item
+P1777	property_type	item
+P1778	property_type	item
+P1779	property_type	item
+P1780	property_type	item
+P1782	property_type	string
+P1785	property_type	string
+P1786	property_type	string
+P1787	property_type	string
+P1788	property_type	external-identifier
+P1789	property_type	item
+P1791	property_type	item
+P1792	property_type	item
+P1793	property_type	string
+P1794	property_type	external-identifier
+P7486	property_type	item
+P7487	property_type	external-identifier
+P7488	property_type	external-identifier
+P7489	property_type	external-identifier
+P7490	property_type	external-identifier
+P7491	property_type	external-identifier
+P7492	property_type	external-identifier
+P7493	property_type	external-identifier
+P7494	property_type	external-identifier
+P7495	property_type	external-identifier
+P7496	property_type	external-identifier
+P7497	property_type	external-identifier
+P7498	property_type	external-identifier
+P7499	property_type	external-identifier
+P7500	property_type	item
+P7501	property_type	item
+P7502	property_type	external-identifier
+P7503	property_type	external-identifier
+P7504	property_type	external-identifier
+P7505	property_type	external-identifier
+P7506	property_type	external-identifier
+P7507	property_type	external-identifier
+P7508	property_type	item
+P7509	property_type	external-identifier
+P7510	property_type	url
+P7511	property_type	external-identifier
+P7512	property_type	external-identifier
+P7513	property_type	external-identifier
+P7514	property_type	item
+P7515	property_type	external-identifier
+P7516	property_type	external-identifier
+P7517	property_type	external-identifier
+P7518	property_type	external-identifier
+P7519	property_type	external-identifier
+P7520	property_type	external-identifier
+P7521	property_type	external-identifier
+P7522	property_type	external-identifier
+P7523	property_type	external-identifier
+P7524	property_type	external-identifier
+P7525	property_type	external-identifier
+P7526	property_type	external-identifier
+P7527	property_type	quantity
+P7528	property_type	item
+P7529	property_type	external-identifier
+P7530	property_type	external-identifier
+P7531	property_type	external-identifier
+P7532	property_type	string
+P7533	property_type	external-identifier
+P7534	property_type	external-identifier
+P7535	property_type	monolingualtext
+P7536	property_type	external-identifier
+P7537	property_type	external-identifier
+P7538	property_type	external-identifier
+P7539	property_type	external-identifier
+P7540	property_type	external-identifier
+P7541	property_type	external-identifier
+P7542	property_type	external-identifier
+P7543	property_type	external-identifier
+P7544	property_type	external-identifier
+P7545	property_type	external-identifier
+P7546	property_type	external-identifier
+P7547	property_type	external-identifier
+P7548	property_type	external-identifier
+P7549	property_type	external-identifier
+P7550	property_type	external-identifier
+P7551	property_type	external-identifier
+P7552	property_type	external-identifier
+P7553	property_type	external-identifier
+P7554	property_type	external-identifier
+P7555	property_type	external-identifier
+P7556	property_type	external-identifier
+P7558	property_type	external-identifier
+P7559	property_type	external-identifier
+P7560	property_type	external-identifier
+P7561	property_type	item
+P7562	property_type	external-identifier
+P7563	property_type	external-identifier
+P7564	property_type	external-identifier
+P7565	property_type	external-identifier
+P7566	property_type	external-identifier
+P7567	property_type	external-identifier
+P7568	property_type	external-identifier
+P7569	property_type	url
+P7570	property_type	external-identifier
+P7571	property_type	external-identifier
+P7572	property_type	external-identifier
+P7573	property_type	item
+P7574	property_type	external-identifier
+P7575	property_type	external-identifier
+P7576	property_type	external-identifier
+P7577	property_type	external-identifier
+P7578	property_type	external-identifier
+P7579	property_type	external-identifier
+P7580	property_type	external-identifier
+P7581	property_type	string
+P7582	property_type	item
+P7583	property_type	external-identifier
+P7584	property_type	quantity
+P7585	property_type	external-identifier
+P7586	property_type	external-identifier
+P3828	property_type	item
+P3829	property_type	external-identifier
+P3830	property_type	external-identifier
+P3831	property_type	item
+P3832	property_type	external-identifier
+P3833	property_type	item
+P3834	property_type	item
+P3835	property_type	external-identifier
+P3836	property_type	external-identifier
+P3837	property_type	external-identifier
+P3838	property_type	external-identifier
+P3839	property_type	external-identifier
+P3840	property_type	quantity
+P3841	property_type	external-identifier
+P3842	property_type	item
+P3843	property_type	external-identifier
+P3844	property_type	external-identifier
+P3845	property_type	external-identifier
+P3846	property_type	external-identifier
+P3847	property_type	external-identifier
+P3848	property_type	external-identifier
+P3849	property_type	external-identifier
+P3850	property_type	external-identifier
+P3851	property_type	external-identifier
+P3852	property_type	external-identifier
+P3853	property_type	external-identifier
+P3854	property_type	external-identifier
+P3855	property_type	external-identifier
+P3856	property_type	external-identifier
+P3857	property_type	external-identifier
+P3858	property_type	item
+P3859	property_type	external-identifier
+P3860	property_type	external-identifier
+P3861	property_type	external-identifier
+P3862	property_type	external-identifier
+P3863	property_type	external-identifier
+P3864	property_type	quantity
+P3865	property_type	item
+P3866	property_type	external-identifier
+P3867	property_type	external-identifier
+P3868	property_type	external-identifier
+P3869	property_type	external-identifier
+P3870	property_type	external-identifier
+P3871	property_type	item
+P3872	property_type	quantity
+P3874	property_type	external-identifier
+P3875	property_type	external-identifier
+P3876	property_type	item
+P3877	property_type	external-identifier
+P3878	property_type	string
+P3879	property_type	string
+P3880	property_type	string
+P3881	property_type	external-identifier
+P3882	property_type	external-identifier
+P3883	property_type	external-identifier
+P3884	property_type	external-identifier
+P3885	property_type	external-identifier
+P3886	property_type	quantity
+P3887	property_type	external-identifier
+P3888	property_type	external-identifier
+P3889	property_type	external-identifier
+P3890	property_type	external-identifier
+P3891	property_type	quantity
+P3892	property_type	external-identifier
+P3893	property_type	time
+P3894	property_type	external-identifier
+P3895	property_type	external-identifier
+P3896	property_type	string
+P3897	property_type	external-identifier
+P3898	property_type	external-identifier
+P3899	property_type	external-identifier
+P3900	property_type	external-identifier
+P3901	property_type	external-identifier
+P3902	property_type	item
+P3903	property_type	string
+P3904	property_type	external-identifier
+P3906	property_type	external-identifier
+P3907	property_type	external-identifier
+P3908	property_type	external-identifier
+P3909	property_type	monolingualtext
+P3910	property_type	external-identifier
+P3911	property_type	external-identifier
+P3912	property_type	item
+P3913	property_type	external-identifier
+P3914	property_type	external-identifier
+P3915	property_type	external-identifier
+P3916	property_type	external-identifier
+P3917	property_type	quantity
+P3918	property_type	external-identifier
+P3919	property_type	item
+P3920	property_type	external-identifier
+P3921	property_type	string
+P3922	property_type	string
+P3923	property_type	external-identifier
+P3924	property_type	external-identifier
+P3925	property_type	external-identifier
+P3926	property_type	external-identifier
+P3927	property_type	external-identifier
+P3928	property_type	external-identifier
+P3929	property_type	external-identifier
+P6437	property_type	item
+P6438	property_type	quantity
+P6439	property_type	item
+P6440	property_type	item
+P6441	property_type	external-identifier
+P6442	property_type	external-identifier
+P6443	property_type	external-identifier
+P6444	property_type	external-identifier
+P6445	property_type	external-identifier
+P6446	property_type	external-identifier
+P6447	property_type	external-identifier
+P6448	property_type	external-identifier
+P6449	property_type	external-identifier
+P6450	property_type	external-identifier
+P6451	property_type	external-identifier
+P6452	property_type	item
+P6453	property_type	external-identifier
+P6454	property_type	external-identifier
+P6455	property_type	external-identifier
+P6456	property_type	external-identifier
+P6457	property_type	external-identifier
+P6458	property_type	external-identifier
+P6459	property_type	external-identifier
+P6460	property_type	external-identifier
+P6461	property_type	external-identifier
+P6462	property_type	external-identifier
+P6463	property_type	external-identifier
+P6464	property_type	external-identifier
+P6465	property_type	external-identifier
+P6466	property_type	external-identifier
+P6467	property_type	external-identifier
+P6468	property_type	external-identifier
+P6469	property_type	external-identifier
+P6470	property_type	external-identifier
+P6471	property_type	external-identifier
+P6472	property_type	external-identifier
+P6473	property_type	external-identifier
+P6474	property_type	external-identifier
+P6475	property_type	external-identifier
+P6476	property_type	external-identifier
+P6477	property_type	item
+P6478	property_type	external-identifier
+P6479	property_type	external-identifier
+P6480	property_type	external-identifier
+P6481	property_type	external-identifier
+P6482	property_type	external-identifier
+P6483	property_type	external-identifier
+P6484	property_type	external-identifier
+P6485	property_type	external-identifier
+P6486	property_type	external-identifier
+P6487	property_type	external-identifier
+P6488	property_type	external-identifier
+P6489	property_type	external-identifier
+P6490	property_type	external-identifier
+P6491	property_type	external-identifier
+P6492	property_type	external-identifier
+P6493	property_type	external-identifier
+P6494	property_type	external-identifier
+P6495	property_type	external-identifier
+P6496	property_type	external-identifier
+P6497	property_type	quantity
+P6498	property_type	quantity
+P6499	property_type	quantity
+P6500	property_type	url
+P6501	property_type	external-identifier
+P6502	property_type	external-identifier
+P6503	property_type	external-identifier
+P6504	property_type	external-identifier
+P6506	property_type	external-identifier
+P6507	property_type	string
+P6509	property_type	quantity
+P6510	property_type	quantity
+P6512	property_type	external-identifier
+P6513	property_type	external-identifier
+P6514	property_type	external-identifier
+P6515	property_type	external-identifier
+P6516	property_type	external-identifier
+P6517	property_type	external-identifier
+P6518	property_type	external-identifier
+P6519	property_type	external-identifier
+P6520	property_type	external-identifier
+P6521	property_type	external-identifier
+P6524	property_type	item
+P6525	property_type	external-identifier
+P6526	property_type	external-identifier
+P6527	property_type	external-identifier
+P6528	property_type	external-identifier
+P6529	property_type	string
+P6530	property_type	item
+P6531	property_type	item
+P6532	property_type	item
+P6533	property_type	item
+P6534	property_type	item
+P6535	property_type	external-identifier
+P6536	property_type	external-identifier
+P6537	property_type	external-identifier
+P6538	property_type	external-identifier
+P6539	property_type	external-identifier
+P6540	property_type	item
+P6541	property_type	url
+P3930	property_type	external-identifier
+P3931	property_type	item
+P3932	property_type	external-identifier
+P3933	property_type	external-identifier
+P3934	property_type	quantity
+P3935	property_type	external-identifier
+P3936	property_type	external-identifier
+P3937	property_type	external-identifier
+P3938	property_type	item
+P3939	property_type	external-identifier
+P3940	property_type	external-identifier
+P3941	property_type	external-identifier
+P3942	property_type	external-identifier
+P3943	property_type	external-identifier
+P3944	property_type	external-identifier
+P3945	property_type	external-identifier
+P3946	property_type	external-identifier
+P3948	property_type	external-identifier
+P3949	property_type	external-identifier
+P3950	property_type	url
+P3951	property_type	external-identifier
+P3952	property_type	external-identifier
+P3953	property_type	external-identifier
+P3954	property_type	external-identifier
+P3955	property_type	external-identifier
+P3956	property_type	external-identifier
+P3957	property_type	external-identifier
+P3958	property_type	external-identifier
+P3959	property_type	external-identifier
+P3960	property_type	external-identifier
+P3961	property_type	external-identifier
+P3962	property_type	external-identifier
+P3963	property_type	external-identifier
+P3964	property_type	external-identifier
+P3965	property_type	external-identifier
+P3966	property_type	item
+P3967	property_type	item
+P3968	property_type	external-identifier
+P3969	property_type	item
+P3970	property_type	string
+P3971	property_type	external-identifier
+P3972	property_type	external-identifier
+P3973	property_type	external-identifier
+P3974	property_type	external-identifier
+P3975	property_type	item
+P3976	property_type	external-identifier
+P3977	property_type	external-identifier
+P3978	property_type	external-identifier
+P3979	property_type	external-identifier
+P3980	property_type	external-identifier
+P3981	property_type	external-identifier
+P3982	property_type	external-identifier
+P3983	property_type	quantity
+P3984	property_type	external-identifier
+P3985	property_type	item
+P3986	property_type	external-identifier
+P3987	property_type	external-identifier
+P3988	property_type	external-identifier
+P3989	property_type	item
+P3990	property_type	external-identifier
+P3991	property_type	external-identifier
+P3992	property_type	external-identifier
+P3993	property_type	external-identifier
+P3994	property_type	string
+P3995	property_type	external-identifier
+P3996	property_type	external-identifier
+P3997	property_type	external-identifier
+P3998	property_type	external-identifier
+P3999	property_type	time
+P4000	property_type	item
+P4001	property_type	url
+P4002	property_type	item
+P4003	property_type	external-identifier
+P4004	property_type	string
+P4005	property_type	external-identifier
+P4006	property_type	item
+P4007	property_type	external-identifier
+P4008	property_type	external-identifier
+P4009	property_type	external-identifier
+P4010	property_type	quantity
+P4011	property_type	external-identifier
+P4012	property_type	external-identifier
+P4013	property_type	external-identifier
+P4014	property_type	external-identifier
+P4015	property_type	external-identifier
+P4016	property_type	external-identifier
+P4017	property_type	external-identifier
+P4018	property_type	external-identifier
+P4019	property_type	external-identifier
+P4020	property_type	string
+P4021	property_type	external-identifier
+P4022	property_type	external-identifier
+P4023	property_type	external-identifier
+P4024	property_type	external-identifier
+P4025	property_type	external-identifier
+P4026	property_type	external-identifier
+P4027	property_type	external-identifier
+P4028	property_type	external-identifier
+P4029	property_type	external-identifier
+P4030	property_type	external-identifier
+P5915	property_type	external-identifier
+P5916	property_type	external-identifier
+P5917	property_type	external-identifier
+P5918	property_type	external-identifier
+P5920	property_type	string
+P5921	property_type	external-identifier
+P5922	property_type	external-identifier
+P5923	property_type	item
+P5925	property_type	external-identifier
+P5926	property_type	external-identifier
+P5927	property_type	external-identifier
+P5928	property_type	external-identifier
+P5929	property_type	quantity
+P5930	property_type	external-identifier
+P5931	property_type	external-identifier
+P5932	property_type	external-identifier
+P5933	property_type	external-identifier
+P5934	property_type	external-identifier
+P5935	property_type	external-identifier
+P5936	property_type	external-identifier
+P5937	property_type	external-identifier
+P5938	property_type	external-identifier
+P5939	property_type	external-identifier
+P5940	property_type	item
+P5941	property_type	external-identifier
+P5942	property_type	external-identifier
+P5944	property_type	external-identifier
+P5945	property_type	external-identifier
+P5946	property_type	external-identifier
+P5947	property_type	quantity
+P5948	property_type	external-identifier
+P5949	property_type	string
+P5950	property_type	external-identifier
+P5951	property_type	external-identifier
+P5952	property_type	external-identifier
+P5953	property_type	external-identifier
+P5954	property_type	external-identifier
+P5955	property_type	external-identifier
+P5956	property_type	external-identifier
+P5957	property_type	external-identifier
+P5958	property_type	external-identifier
+P5959	property_type	external-identifier
+P5960	property_type	external-identifier
+P5961	property_type	item
+P5962	property_type	string
+P5963	property_type	external-identifier
+P5964	property_type	external-identifier
+P5965	property_type	external-identifier
+P5966	property_type	external-identifier
+P5967	property_type	item
+P5968	property_type	external-identifier
+P5969	property_type	external-identifier
+P5970	property_type	item
+P5971	property_type	external-identifier
+P5972	property_type	string
+P5973	property_type	string
+P5974	property_type	string
+P5975	property_type	string
+P5976	property_type	string
+P5977	property_type	string
+P5978	property_type	string
+P5979	property_type	string
+P5980	property_type	string
+P5981	property_type	external-identifier
+P5982	property_type	quantity
+P5983	property_type	external-identifier
+P5984	property_type	external-identifier
+P5985	property_type	external-identifier
+P5986	property_type	external-identifier
+P5987	property_type	external-identifier
+P5988	property_type	external-identifier
+P5989	property_type	external-identifier
+P5990	property_type	external-identifier
+P5991	property_type	quantity
+P5992	property_type	quantity
+P5993	property_type	quantity
+P5994	property_type	string
+P5995	property_type	item
+P5996	property_type	item
+P5997	property_type	string
+P5998	property_type	item
+P5999	property_type	external-identifier
+P6000	property_type	quantity
+P6001	property_type	item
+P6002	property_type	external-identifier
+P6003	property_type	external-identifier
+P6004	property_type	external-identifier
+P6005	property_type	external-identifier
+P6006	property_type	external-identifier
+P6007	property_type	external-identifier
+P6008	property_type	external-identifier
+P6009	property_type	external-identifier
+P6010	property_type	external-identifier
+P6011	property_type	external-identifier
+P6012	property_type	external-identifier
+P6013	property_type	external-identifier
+P6014	property_type	quantity
+P6015	property_type	external-identifier
+P6016	property_type	external-identifier
+P6017	property_type	external-identifier
+P1795	property_type	external-identifier
+P1796	property_type	string
+P1798	property_type	external-identifier
+P1799	property_type	external-identifier
+P1800	property_type	external-identifier
+P1801	property_type	string
+P1802	property_type	external-identifier
+P1803	property_type	external-identifier
+P1804	property_type	external-identifier
+P1806	property_type	external-identifier
+P1807	property_type	external-identifier
+P1808	property_type	external-identifier
+P1809	property_type	item
+P1810	property_type	string
+P1811	property_type	item
+P1813	property_type	monolingualtext
+P1814	property_type	string
+P1815	property_type	string
+P1816	property_type	external-identifier
+P1817	property_type	item
+P1818	property_type	external-identifier
+P1819	property_type	external-identifier
+P1820	property_type	string
+P1821	property_type	external-identifier
+P1822	property_type	external-identifier
+P1823	property_type	external-identifier
+P1824	property_type	string
+P1825	property_type	external-identifier
+P1826	property_type	external-identifier
+P1827	property_type	external-identifier
+P1828	property_type	external-identifier
+P1829	property_type	external-identifier
+P1830	property_type	item
+P1831	property_type	quantity
+P1832	property_type	external-identifier
+P1833	property_type	quantity
+P1836	property_type	string
+P1837	property_type	external-identifier
+P1838	property_type	external-identifier
+P1839	property_type	external-identifier
+P1840	property_type	item
+P1841	property_type	external-identifier
+P1842	property_type	external-identifier
+P1843	property_type	monolingualtext
+P1844	property_type	external-identifier
+P1845	property_type	string
+P1846	property_type	string
+P1847	property_type	external-identifier
+P1848	property_type	external-identifier
+P1849	property_type	external-identifier
+P1850	property_type	external-identifier
+P1851	property_type	item
+P1852	property_type	external-identifier
+P1853	property_type	item
+P1854	property_type	external-identifier
+P1855	property_type	item
+P1866	property_type	external-identifier
+P1867	property_type	quantity
+P1868	property_type	quantity
+P1869	property_type	external-identifier
+P1870	property_type	external-identifier
+P1871	property_type	external-identifier
+P1872	property_type	quantity
+P1873	property_type	quantity
+P1874	property_type	external-identifier
+P1875	property_type	item
+P1876	property_type	item
+P1877	property_type	item
+P1878	property_type	item
+P1879	property_type	item
+P1880	property_type	item
+P1881	property_type	item
+P1882	property_type	external-identifier
+P1883	property_type	external-identifier
+P1884	property_type	item
+P1885	property_type	item
+P1886	property_type	external-identifier
+P1887	property_type	item
+P1888	property_type	external-identifier
+P1889	property_type	item
+P1890	property_type	external-identifier
+P1891	property_type	item
+P1893	property_type	external-identifier
+P1894	property_type	external-identifier
+P1895	property_type	external-identifier
+P1896	property_type	url
+P1897	property_type	item
+P1898	property_type	item
+P1899	property_type	external-identifier
+P1900	property_type	external-identifier
+P1901	property_type	external-identifier
+P1902	property_type	external-identifier
+P1903	property_type	item
+P1906	property_type	item
+P1907	property_type	external-identifier
+P1908	property_type	external-identifier
+P1909	property_type	item
+P1910	property_type	item
+P1911	property_type	item
+P1912	property_type	item
+P7587	property_type	external-identifier
+P7588	property_type	time
+P7589	property_type	time
+P7590	property_type	external-identifier
+P7591	property_type	external-identifier
+P7592	property_type	external-identifier
+P7593	property_type	external-identifier
+P7594	property_type	external-identifier
+P7595	property_type	external-identifier
+P7596	property_type	external-identifier
+P7597	property_type	external-identifier
+P7598	property_type	string
+P7599	property_type	string
+P7600	property_type	item
+P7601	property_type	item
+P7602	property_type	external-identifier
+P7603	property_type	item
+P7604	property_type	item
+P7605	property_type	string
+P7606	property_type	external-identifier
+P7607	property_type	external-identifier
+P7608	property_type	external-identifier
+P7609	property_type	external-identifier
+P7610	property_type	external-identifier
+P7611	property_type	external-identifier
+P7612	property_type	external-identifier
+P7613	property_type	external-identifier
+P7614	property_type	external-identifier
+P7615	property_type	external-identifier
+P7616	property_type	external-identifier
+P7617	property_type	external-identifier
+P7618	property_type	external-identifier
+P7619	property_type	external-identifier
+P7620	property_type	external-identifier
+P7621	property_type	external-identifier
+P7622	property_type	external-identifier
+P7623	property_type	external-identifier
+P7624	property_type	external-identifier
+P7625	property_type	external-identifier
+P7626	property_type	external-identifier
+P7627	property_type	external-identifier
+P7630	property_type	external-identifier
+P7631	property_type	external-identifier
+P7632	property_type	external-identifier
+P7633	property_type	external-identifier
+P7634	property_type	external-identifier
+P7635	property_type	external-identifier
+P7636	property_type	external-identifier
+P7637	property_type	external-identifier
+P7638	property_type	external-identifier
+P7639	property_type	external-identifier
+P7641	property_type	external-identifier
+P7642	property_type	external-identifier
+P7643	property_type	item
+P7644	property_type	external-identifier
+P7645	property_type	external-identifier
+P7646	property_type	external-identifier
+P7647	property_type	external-identifier
+P7648	property_type	external-identifier
+P7649	property_type	external-identifier
+P7650	property_type	external-identifier
+P7651	property_type	external-identifier
+P7652	property_type	external-identifier
+P7653	property_type	external-identifier
+P7654	property_type	external-identifier
+P7655	property_type	external-identifier
+P7656	property_type	external-identifier
+P7657	property_type	external-identifier
+P7658	property_type	external-identifier
+P7659	property_type	external-identifier
+P7660	property_type	external-identifier
+P7661	property_type	external-identifier
+P7662	property_type	external-identifier
+P7663	property_type	external-identifier
+P7665	property_type	external-identifier
+P7666	property_type	external-identifier
+P7667	property_type	external-identifier
+P7668	property_type	quantity
+P7669	property_type	external-identifier
+P7670	property_type	external-identifier
+P7671	property_type	external-identifier
+P7672	property_type	external-identifier
+P7673	property_type	external-identifier
+P7674	property_type	external-identifier
+P7675	property_type	external-identifier
+P7676	property_type	external-identifier
+P7677	property_type	external-identifier
+P7678	property_type	external-identifier
+P7679	property_type	external-identifier
+P7680	property_type	external-identifier
+P7681	property_type	external-identifier
+P7682	property_type	external-identifier
+P7683	property_type	external-identifier
+P7684	property_type	external-identifier
+P7685	property_type	external-identifier
+P7686	property_type	external-identifier
+P7687	property_type	external-identifier
+P7688	property_type	external-identifier
+P7689	property_type	external-identifier
+P7690	property_type	external-identifier
+P6542	property_type	external-identifier
+P6543	property_type	quantity
+P6544	property_type	quantity
+P6545	property_type	quantity
+P6546	property_type	quantity
+P6547	property_type	quantity
+P6548	property_type	external-identifier
+P6549	property_type	external-identifier
+P6550	property_type	external-identifier
+P6551	property_type	external-identifier
+P6552	property_type	external-identifier
+P6553	property_type	string
+P6554	property_type	external-identifier
+P6555	property_type	external-identifier
+P6556	property_type	external-identifier
+P6557	property_type	external-identifier
+P6558	property_type	external-identifier
+P6559	property_type	external-identifier
+P6560	property_type	external-identifier
+P6561	property_type	external-identifier
+P6562	property_type	external-identifier
+P6563	property_type	item
+P6564	property_type	external-identifier
+P6565	property_type	external-identifier
+P6566	property_type	external-identifier
+P6567	property_type	external-identifier
+P6568	property_type	item
+P6569	property_type	item
+P6570	property_type	quantity
+P6571	property_type	string
+P6572	property_type	external-identifier
+P6573	property_type	external-identifier
+P6574	property_type	external-identifier
+P6575	property_type	external-identifier
+P6576	property_type	external-identifier
+P6577	property_type	external-identifier
+P6578	property_type	external-identifier
+P6579	property_type	external-identifier
+P6580	property_type	external-identifier
+P6581	property_type	external-identifier
+P6582	property_type	external-identifier
+P6583	property_type	external-identifier
+P6584	property_type	external-identifier
+P6585	property_type	external-identifier
+P6586	property_type	string
+P6587	property_type	item
+P6589	property_type	quantity
+P6590	property_type	quantity
+P6591	property_type	quantity
+P6592	property_type	string
+P6593	property_type	string
+P6594	property_type	external-identifier
+P6595	property_type	external-identifier
+P6596	property_type	external-identifier
+P6597	property_type	external-identifier
+P6598	property_type	external-identifier
+P6599	property_type	external-identifier
+P6600	property_type	external-identifier
+P6601	property_type	external-identifier
+P6602	property_type	external-identifier
+P6603	property_type	external-identifier
+P6604	property_type	string
+P6605	property_type	external-identifier
+P6606	property_type	item
+P6607	property_type	monolingualtext
+P6608	property_type	item
+P6609	property_type	string
+P6610	property_type	external-identifier
+P6611	property_type	external-identifier
+P6612	property_type	external-identifier
+P6613	property_type	external-identifier
+P6614	property_type	external-identifier
+P6615	property_type	external-identifier
+P6616	property_type	external-identifier
+P6617	property_type	external-identifier
+P6618	property_type	external-identifier
+P6619	property_type	external-identifier
+P6620	property_type	external-identifier
+P6621	property_type	external-identifier
+P6622	property_type	external-identifier
+P6623	property_type	external-identifier
+P6624	property_type	external-identifier
+P6625	property_type	external-identifier
+P6626	property_type	external-identifier
+P6627	property_type	external-identifier
+P6628	property_type	external-identifier
+P6629	property_type	external-identifier
+P6630	property_type	external-identifier
+P6631	property_type	external-identifier
+P6632	property_type	external-identifier
+P6633	property_type	external-identifier
+P6634	property_type	external-identifier
+P6635	property_type	external-identifier
+P6636	property_type	external-identifier
+P6637	property_type	external-identifier
+P6639	property_type	quantity
+P6640	property_type	external-identifier
+P6641	property_type	external-identifier
+P6643	property_type	external-identifier
+P6644	property_type	external-identifier
+P7792	property_type	external-identifier
+P7793	property_type	string
+P7794	property_type	external-identifier
+P7795	property_type	external-identifier
+P7796	property_type	external-identifier
+P7797	property_type	external-identifier
+P7798	property_type	external-identifier
+P7799	property_type	external-identifier
+P7800	property_type	external-identifier
+P7801	property_type	external-identifier
+P7802	property_type	external-identifier
+P7803	property_type	external-identifier
+P7804	property_type	external-identifier
+P7805	property_type	external-identifier
+P7806	property_type	external-identifier
+P7807	property_type	external-identifier
+P7808	property_type	external-identifier
+P7809	property_type	external-identifier
+P7810	property_type	external-identifier
+P7811	property_type	external-identifier
+P7812	property_type	external-identifier
+P7813	property_type	external-identifier
+P7814	property_type	external-identifier
+P7815	property_type	external-identifier
+P7816	property_type	external-identifier
+P7817	property_type	external-identifier
+P7818	property_type	external-identifier
+P7819	property_type	external-identifier
+P7820	property_type	external-identifier
+P7821	property_type	external-identifier
+P7822	property_type	external-identifier
+P7823	property_type	external-identifier
+P7824	property_type	external-identifier
+P7825	property_type	external-identifier
+P7826	property_type	external-identifier
+P7827	property_type	external-identifier
+P7828	property_type	external-identifier
+P7829	property_type	external-identifier
+P7830	property_type	external-identifier
+P7831	property_type	external-identifier
+P7832	property_type	external-identifier
+P7834	property_type	external-identifier
+P7835	property_type	external-identifier
+P7836	property_type	external-identifier
+P7837	property_type	external-identifier
+P7838	property_type	external-identifier
+P7839	property_type	external-identifier
+P7840	property_type	external-identifier
+P7841	property_type	external-identifier
+P7842	property_type	external-identifier
+P7843	property_type	external-identifier
+P7844	property_type	external-identifier
+P7845	property_type	external-identifier
+P7846	property_type	external-identifier
+P7847	property_type	external-identifier
+P7848	property_type	external-identifier
+P7849	property_type	external-identifier
+P7850	property_type	external-identifier
+P7851	property_type	external-identifier
+P7852	property_type	external-identifier
+P7853	property_type	external-identifier
+P7854	property_type	external-identifier
+P7855	property_type	string
+P7856	property_type	external-identifier
+P7857	property_type	external-identifier
+P7858	property_type	external-identifier
+P7859	property_type	external-identifier
+P7860	property_type	external-identifier
+P7861	property_type	item
+P7862	property_type	quantity
+P7863	property_type	quantity
+P7864	property_type	external-identifier
+P7865	property_type	external-identifier
+P7866	property_type	external-identifier
+P7867	property_type	item
+P7868	property_type	external-identifier
+P7869	property_type	external-identifier
+P7870	property_type	external-identifier
+P7871	property_type	external-identifier
+P7872	property_type	external-identifier
+P7873	property_type	external-identifier
+P7874	property_type	external-identifier
+P7875	property_type	external-identifier
+P7876	property_type	external-identifier
+P7877	property_type	external-identifier
+P7878	property_type	external-identifier
+P7879	property_type	external-identifier
+P7880	property_type	external-identifier
+P7881	property_type	external-identifier
+P7882	property_type	external-identifier
+P7883	property_type	external-identifier
+P7884	property_type	external-identifier
+P7885	property_type	external-identifier
+P7886	property_type	external-identifier
+P7887	property_type	quantity
+P7888	property_type	item
+P7889	property_type	external-identifier
+P7890	property_type	url
+P7891	property_type	external-identifier
+P7892	property_type	external-identifier
+P6018	property_type	external-identifier
+P6019	property_type	external-identifier
+P6020	property_type	external-identifier
+P6021	property_type	external-identifier
+P6022	property_type	item
+P6023	property_type	external-identifier
+P6024	property_type	external-identifier
+P6025	property_type	external-identifier
+P6028	property_type	external-identifier
+P6030	property_type	external-identifier
+P6032	property_type	external-identifier
+P6033	property_type	external-identifier
+P6034	property_type	external-identifier
+P6035	property_type	external-identifier
+P6036	property_type	external-identifier
+P6037	property_type	external-identifier
+P6038	property_type	external-identifier
+P6039	property_type	external-identifier
+P6040	property_type	external-identifier
+P6041	property_type	external-identifier
+P6042	property_type	external-identifier
+P6043	property_type	external-identifier
+P6044	property_type	external-identifier
+P6045	property_type	external-identifier
+P6046	property_type	external-identifier
+P6047	property_type	external-identifier
+P6048	property_type	external-identifier
+P6049	property_type	external-identifier
+P6050	property_type	external-identifier
+P6051	property_type	external-identifier
+P6052	property_type	external-identifier
+P6053	property_type	external-identifier
+P6054	property_type	external-identifier
+P6055	property_type	external-identifier
+P6056	property_type	external-identifier
+P6057	property_type	external-identifier
+P6058	property_type	external-identifier
+P6059	property_type	external-identifier
+P6060	property_type	external-identifier
+P6061	property_type	external-identifier
+P6062	property_type	external-identifier
+P6063	property_type	external-identifier
+P6064	property_type	external-identifier
+P6065	property_type	external-identifier
+P6066	property_type	external-identifier
+P6067	property_type	external-identifier
+P6068	property_type	external-identifier
+P6069	property_type	quantity
+P6070	property_type	external-identifier
+P6071	property_type	external-identifier
+P6072	property_type	string
+P6073	property_type	quantity
+P6075	property_type	quantity
+P6076	property_type	quantity
+P6077	property_type	external-identifier
+P6078	property_type	external-identifier
+P6079	property_type	external-identifier
+P6080	property_type	external-identifier
+P6081	property_type	external-identifier
+P6082	property_type	external-identifier
+P6083	property_type	external-identifier
+P6084	property_type	item
+P6086	property_type	item
+P6087	property_type	item
+P6088	property_type	quantity
+P6089	property_type	quantity
+P6090	property_type	external-identifier
+P6091	property_type	external-identifier
+P6092	property_type	external-identifier
+P6093	property_type	external-identifier
+P6094	property_type	external-identifier
+P6095	property_type	item
+P6096	property_type	external-identifier
+P6097	property_type	external-identifier
+P6098	property_type	external-identifier
+P6099	property_type	item
+P6100	property_type	external-identifier
+P6101	property_type	external-identifier
+P6102	property_type	external-identifier
+P6103	property_type	external-identifier
+P6104	property_type	item
+P6105	property_type	external-identifier
+P6106	property_type	item
+P6107	property_type	url
+P6108	property_type	url
+P6109	property_type	external-identifier
+P6110	property_type	external-identifier
+P6111	property_type	external-identifier
+P6112	property_type	item
+P6113	property_type	external-identifier
+P6114	property_type	external-identifier
+P6115	property_type	external-identifier
+P6116	property_type	item
+P6117	property_type	external-identifier
+P6118	property_type	item
+P6119	property_type	external-identifier
+P6120	property_type	external-identifier
+P6122	property_type	external-identifier
+P6123	property_type	external-identifier
+P6124	property_type	external-identifier
+P7691	property_type	external-identifier
+P7692	property_type	external-identifier
+P7693	property_type	external-identifier
+P7694	property_type	external-identifier
+P7695	property_type	external-identifier
+P7696	property_type	external-identifier
+P7697	property_type	external-identifier
+P7698	property_type	external-identifier
+P7699	property_type	external-identifier
+P7700	property_type	external-identifier
+P7701	property_type	external-identifier
+P7702	property_type	external-identifier
+P7703	property_type	external-identifier
+P7704	property_type	external-identifier
+P7705	property_type	url
+P7706	property_type	string
+P7707	property_type	url
+P7708	property_type	external-identifier
+P7709	property_type	external-identifier
+P7710	property_type	external-identifier
+P7711	property_type	external-identifier
+P7712	property_type	external-identifier
+P7713	property_type	external-identifier
+P7714	property_type	external-identifier
+P7715	property_type	external-identifier
+P7716	property_type	external-identifier
+P7717	property_type	external-identifier
+P7718	property_type	external-identifier
+P7719	property_type	item
+P7720	property_type	external-identifier
+P7721	property_type	external-identifier
+P7722	property_type	external-identifier
+P7723	property_type	external-identifier
+P7724	property_type	external-identifier
+P7725	property_type	quantity
+P7726	property_type	external-identifier
+P7727	property_type	item
+P7729	property_type	external-identifier
+P7730	property_type	external-identifier
+P7731	property_type	external-identifier
+P7732	property_type	external-identifier
+P7733	property_type	external-identifier
+P7734	property_type	external-identifier
+P7735	property_type	external-identifier
+P7736	property_type	external-identifier
+P7737	property_type	external-identifier
+P7738	property_type	external-identifier
+P7739	property_type	external-identifier
+P7740	property_type	external-identifier
+P7741	property_type	external-identifier
+P7742	property_type	external-identifier
+P7743	property_type	external-identifier
+P7744	property_type	external-identifier
+P7745	property_type	external-identifier
+P7746	property_type	external-identifier
+P7747	property_type	external-identifier
+P7748	property_type	external-identifier
+P7749	property_type	external-identifier
+P7750	property_type	external-identifier
+P7751	property_type	external-identifier
+P7752	property_type	external-identifier
+P7753	property_type	external-identifier
+P7754	property_type	external-identifier
+P7755	property_type	external-identifier
+P7756	property_type	external-identifier
+P7757	property_type	external-identifier
+P7758	property_type	external-identifier
+P7759	property_type	external-identifier
+P7760	property_type	external-identifier
+P7761	property_type	external-identifier
+P7762	property_type	external-identifier
+P7763	property_type	item
+P7764	property_type	external-identifier
+P7765	property_type	external-identifier
+P7766	property_type	external-identifier
+P7767	property_type	item
+P7768	property_type	external-identifier
+P7769	property_type	external-identifier
+P7770	property_type	quantity
+P7771	property_type	external-identifier
+P7772	property_type	external-identifier
+P7773	property_type	external-identifier
+P7774	property_type	external-identifier
+P7775	property_type	external-identifier
+P7776	property_type	external-identifier
+P7777	property_type	external-identifier
+P7778	property_type	external-identifier
+P7779	property_type	item
+P7780	property_type	external-identifier
+P7781	property_type	item
+P7782	property_type	item
+P7783	property_type	external-identifier
+P7784	property_type	external-identifier
+P7785	property_type	external-identifier
+P7786	property_type	external-identifier
+P7787	property_type	quantity
+P7788	property_type	external-identifier
+P7789	property_type	external-identifier
+P7790	property_type	external-identifier
+P7791	property_type	external-identifier
+P7893	property_type	external-identifier
+P7894	property_type	external-identifier
+P7895	property_type	external-identifier
+P7896	property_type	external-identifier
+P7897	property_type	external-identifier
+P7898	property_type	external-identifier
+P7899	property_type	external-identifier
+P7900	property_type	external-identifier
+P7901	property_type	external-identifier
+P7902	property_type	external-identifier
+P7903	property_type	item
+P7904	property_type	item
+P7905	property_type	external-identifier
+P7906	property_type	external-identifier
+P7907	property_type	external-identifier
+P7908	property_type	external-identifier
+P7909	property_type	external-identifier
+P7910	property_type	external-identifier
+P7911	property_type	external-identifier
+P7912	property_type	external-identifier
+P7913	property_type	external-identifier
+P7914	property_type	external-identifier
+P7915	property_type	external-identifier
+P7916	property_type	external-identifier
+P7917	property_type	external-identifier
+P7918	property_type	external-identifier
+P7919	property_type	external-identifier
+P7920	property_type	external-identifier
+P7921	property_type	external-identifier
+P7922	property_type	external-identifier
+P7923	property_type	external-identifier
+P7924	property_type	external-identifier
+P7925	property_type	external-identifier
+P7926	property_type	external-identifier
+P7927	property_type	external-identifier
+P7928	property_type	external-identifier
+P7929	property_type	external-identifier
+P7930	property_type	url
+P7931	property_type	external-identifier
+P7932	property_type	external-identifier
+P7934	property_type	external-identifier
+P7935	property_type	external-identifier
+P7936	property_type	item
+P7937	property_type	item
+P7938	property_type	item
+P7939	property_type	external-identifier
+P7940	property_type	external-identifier
+P7941	property_type	external-identifier
+P7942	property_type	external-identifier
+P7943	property_type	external-identifier
+P7944	property_type	external-identifier
+P7945	property_type	external-identifier
+P7946	property_type	external-identifier
+P7947	property_type	external-identifier
+P7948	property_type	external-identifier
+P7949	property_type	external-identifier
+P7950	property_type	external-identifier
+P7951	property_type	external-identifier
+P7952	property_type	external-identifier
+P7953	property_type	external-identifier
+P7954	property_type	external-identifier
+P7955	property_type	external-identifier
+P7956	property_type	external-identifier
+P7957	property_type	external-identifier
+P7958	property_type	external-identifier
+P7959	property_type	item
+P7960	property_type	external-identifier
+P7961	property_type	external-identifier
+P7962	property_type	external-identifier
+P7963	property_type	external-identifier
+P7964	property_type	string
+P7965	property_type	external-identifier
+P7966	property_type	external-identifier
+P7967	property_type	external-identifier
+P7968	property_type	external-identifier
+P7969	property_type	external-identifier
+P7970	property_type	external-identifier
+P7971	property_type	quantity
+P7972	property_type	external-identifier
+P7973	property_type	string
+P7974	property_type	external-identifier
+P7975	property_type	external-identifier
+P7976	property_type	external-identifier
+P7977	property_type	external-identifier
+P7978	property_type	external-identifier
+P7979	property_type	external-identifier
+P7980	property_type	external-identifier
+P7981	property_type	external-identifier
+P7982	property_type	external-identifier
+P7983	property_type	external-identifier
+P7984	property_type	item
+P7985	property_type	external-identifier
+P7986	property_type	external-identifier
+P7987	property_type	external-identifier
+P7988	property_type	external-identifier
+P7989	property_type	external-identifier
+P7990	property_type	external-identifier
+P7991	property_type	external-identifier
+P7992	property_type	external-identifier
+P7993	property_type	external-identifier
+P6125	property_type	quantity
+P6126	property_type	external-identifier
+P6127	property_type	external-identifier
+P6128	property_type	external-identifier
+P6130	property_type	external-identifier
+P6131	property_type	external-identifier
+P6132	property_type	external-identifier
+P6133	property_type	external-identifier
+P6134	property_type	external-identifier
+P6135	property_type	external-identifier
+P6136	property_type	external-identifier
+P6137	property_type	external-identifier
+P6138	property_type	external-identifier
+P6139	property_type	external-identifier
+P6140	property_type	external-identifier
+P6141	property_type	external-identifier
+P6142	property_type	external-identifier
+P6143	property_type	external-identifier
+P6144	property_type	external-identifier
+P6145	property_type	external-identifier
+P6146	property_type	external-identifier
+P6147	property_type	external-identifier
+P6148	property_type	external-identifier
+P6149	property_type	item
+P6150	property_type	external-identifier
+P6151	property_type	external-identifier
+P6152	property_type	external-identifier
+P6153	property_type	item
+P6154	property_type	external-identifier
+P6155	property_type	external-identifier
+P6156	property_type	external-identifier
+P6157	property_type	external-identifier
+P6158	property_type	external-identifier
+P6159	property_type	external-identifier
+P6160	property_type	external-identifier
+P6161	property_type	external-identifier
+P6162	property_type	external-identifier
+P6163	property_type	external-identifier
+P6164	property_type	external-identifier
+P6165	property_type	external-identifier
+P6166	property_type	item
+P6167	property_type	external-identifier
+P6168	property_type	external-identifier
+P6169	property_type	external-identifier
+P6170	property_type	external-identifier
+P6171	property_type	external-identifier
+P6172	property_type	external-identifier
+P6173	property_type	external-identifier
+P6174	property_type	external-identifier
+P6175	property_type	external-identifier
+P6176	property_type	external-identifier
+P6177	property_type	external-identifier
+P6178	property_type	external-identifier
+P6179	property_type	external-identifier
+P6180	property_type	external-identifier
+P6181	property_type	external-identifier
+P6182	property_type	external-identifier
+P6183	property_type	external-identifier
+P6184	property_type	item
+P6185	property_type	item
+P6186	property_type	item
+P6187	property_type	external-identifier
+P6188	property_type	external-identifier
+P6189	property_type	external-identifier
+P6190	property_type	external-identifier
+P6191	property_type	item
+P6192	property_type	external-identifier
+P6193	property_type	item
+P6194	property_type	external-identifier
+P6195	property_type	item
+P6196	property_type	external-identifier
+P6197	property_type	external-identifier
+P6198	property_type	external-identifier
+P6199	property_type	external-identifier
+P6200	property_type	external-identifier
+P6201	property_type	external-identifier
+P6202	property_type	external-identifier
+P6204	property_type	external-identifier
+P6205	property_type	external-identifier
+P6206	property_type	external-identifier
+P6208	property_type	monolingualtext
+P6209	property_type	external-identifier
+P6210	property_type	external-identifier
+P6211	property_type	external-identifier
+P6212	property_type	item
+P6213	property_type	external-identifier
+P6214	property_type	external-identifier
+P6215	property_type	external-identifier
+P6216	property_type	item
+P6217	property_type	external-identifier
+P6218	property_type	external-identifier
+P6219	property_type	external-identifier
+P6220	property_type	external-identifier
+P6221	property_type	external-identifier
+P6222	property_type	external-identifier
+P6223	property_type	external-identifier
+P6224	property_type	item
+P6225	property_type	external-identifier
+P6226	property_type	external-identifier
+P6227	property_type	external-identifier
+P6645	property_type	external-identifier
+P6646	property_type	external-identifier
+P6647	property_type	external-identifier
+P6648	property_type	string
+P6649	property_type	external-identifier
+P6650	property_type	external-identifier
+P6652	property_type	external-identifier
+P6653	property_type	external-identifier
+P6654	property_type	external-identifier
+P6655	property_type	string
+P6656	property_type	external-identifier
+P6657	property_type	item
+P6658	property_type	item
+P6659	property_type	external-identifier
+P6660	property_type	external-identifier
+P6661	property_type	external-identifier
+P6662	property_type	external-identifier
+P6663	property_type	external-identifier
+P6664	property_type	external-identifier
+P6665	property_type	external-identifier
+P6666	property_type	external-identifier
+P6667	property_type	external-identifier
+P6668	property_type	external-identifier
+P6669	property_type	external-identifier
+P6670	property_type	string
+P6671	property_type	external-identifier
+P6672	property_type	external-identifier
+P6673	property_type	external-identifier
+P6674	property_type	external-identifier
+P6676	property_type	external-identifier
+P6677	property_type	external-identifier
+P6678	property_type	external-identifier
+P6679	property_type	external-identifier
+P6680	property_type	external-identifier
+P6681	property_type	external-identifier
+P6682	property_type	external-identifier
+P6683	property_type	external-identifier
+P6684	property_type	item
+P6685	property_type	string
+P6686	property_type	string
+P6687	property_type	external-identifier
+P6688	property_type	external-identifier
+P6689	property_type	external-identifier
+P6690	property_type	external-identifier
+P6691	property_type	external-identifier
+P6692	property_type	external-identifier
+P6693	property_type	external-identifier
+P6694	property_type	external-identifier
+P6695	property_type	quantity
+P6696	property_type	external-identifier
+P6697	property_type	quantity
+P6698	property_type	external-identifier
+P6699	property_type	external-identifier
+P6700	property_type	external-identifier
+P6701	property_type	external-identifier
+P6702	property_type	external-identifier
+P6703	property_type	external-identifier
+P6704	property_type	external-identifier
+P6705	property_type	external-identifier
+P6706	property_type	external-identifier
+P6707	property_type	quantity
+P6708	property_type	quantity
+P6709	property_type	string
+P6710	property_type	quantity
+P6711	property_type	quantity
+P6712	property_type	string
+P6713	property_type	external-identifier
+P6714	property_type	external-identifier
+P6715	property_type	external-identifier
+P6716	property_type	external-identifier
+P6717	property_type	external-identifier
+P6718	property_type	item
+P6719	property_type	string
+P6720	property_type	external-identifier
+P6721	property_type	external-identifier
+P6722	property_type	external-identifier
+P6723	property_type	external-identifier
+P6724	property_type	external-identifier
+P6725	property_type	external-identifier
+P6726	property_type	external-identifier
+P6727	property_type	external-identifier
+P6728	property_type	external-identifier
+P6729	property_type	external-identifier
+P6730	property_type	external-identifier
+P6731	property_type	item
+P6732	property_type	external-identifier
+P6733	property_type	string
+P6734	property_type	external-identifier
+P6735	property_type	external-identifier
+P6736	property_type	external-identifier
+P6737	property_type	external-identifier
+P6738	property_type	external-identifier
+P6739	property_type	external-identifier
+P6740	property_type	external-identifier
+P6741	property_type	external-identifier
+P6742	property_type	external-identifier
+P6743	property_type	external-identifier
+P6744	property_type	external-identifier
+P6745	property_type	external-identifier
+P6746	property_type	external-identifier
+P7994	property_type	external-identifier
+P7995	property_type	external-identifier
+P7996	property_type	external-identifier
+P7997	property_type	external-identifier
+P7998	property_type	external-identifier
+P7999	property_type	external-identifier
+P8000	property_type	string
+P8001	property_type	item
+P8002	property_type	external-identifier
+P8003	property_type	external-identifier
+P8004	property_type	item
+P8005	property_type	item
+P8006	property_type	item
+P8007	property_type	external-identifier
+P8008	property_type	external-identifier
+P8009	property_type	string
+P8010	property_type	quantity
+P8011	property_type	quantity
+P8012	property_type	external-identifier
+P8013	property_type	external-identifier
+P8014	property_type	external-identifier
+P8015	property_type	external-identifier
+P8016	property_type	external-identifier
+P8017	property_type	string
+P8018	property_type	external-identifier
+P8019	property_type	external-identifier
+P8020	property_type	external-identifier
+P8021	property_type	external-identifier
+P8022	property_type	external-identifier
+P8023	property_type	external-identifier
+P8024	property_type	external-identifier
+P8025	property_type	external-identifier
+P8026	property_type	item
+P8027	property_type	external-identifier
+P8028	property_type	external-identifier
+P8029	property_type	external-identifier
+P8030	property_type	item
+P8031	property_type	item
+P8032	property_type	item
+P8033	property_type	external-identifier
+P8034	property_type	external-identifier
+P8035	property_type	external-identifier
+P8036	property_type	external-identifier
+P8037	property_type	external-identifier
+P8038	property_type	external-identifier
+P8039	property_type	external-identifier
+P8040	property_type	external-identifier
+P8041	property_type	external-identifier
+P8042	property_type	external-identifier
+P8043	property_type	external-identifier
+P8044	property_type	external-identifier
+P8045	property_type	item
+P8046	property_type	string
+P8047	property_type	item
+P8048	property_type	external-identifier
+P8049	property_type	quantity
+P8050	property_type	external-identifier
+P8051	property_type	external-identifier
+P8052	property_type	external-identifier
+P8053	property_type	external-identifier
+P8054	property_type	string
+P8055	property_type	external-identifier
+P8056	property_type	external-identifier
+P8057	property_type	external-identifier
+P8058	property_type	item
+P8059	property_type	external-identifier
+P8060	property_type	external-identifier
+P8061	property_type	external-identifier
+P8062	property_type	external-identifier
+P8063	property_type	external-identifier
+P8064	property_type	external-identifier
+P8065	property_type	external-identifier
+P8066	property_type	external-identifier
+P8067	property_type	external-identifier
+P8068	property_type	external-identifier
+P8069	property_type	external-identifier
+P8070	property_type	external-identifier
+P8071	property_type	external-identifier
+P8072	property_type	external-identifier
+P8073	property_type	external-identifier
+P8074	property_type	external-identifier
+P8075	property_type	external-identifier
+P8076	property_type	external-identifier
+P8077	property_type	external-identifier
+P8078	property_type	external-identifier
+P8079	property_type	external-identifier
+P8080	property_type	external-identifier
+P8081	property_type	external-identifier
+P8082	property_type	external-identifier
+P8083	property_type	external-identifier
+P8084	property_type	external-identifier
+P8085	property_type	external-identifier
+P8086	property_type	external-identifier
+P8087	property_type	external-identifier
+P8088	property_type	external-identifier
+P8089	property_type	external-identifier
+P8090	property_type	external-identifier
+P8091	property_type	external-identifier
+P8092	property_type	external-identifier
+P8093	property_type	quantity
+P8094	property_type	external-identifier
+P8095	property_type	external-identifier
+P8096	property_type	external-identifier
+P8097	property_type	item
+P8098	property_type	external-identifier
+P8099	property_type	external-identifier
+P8100	property_type	external-identifier
+P8101	property_type	external-identifier
+P8102	property_type	external-identifier
+P8103	property_type	external-identifier
+P8104	property_type	external-identifier
+P8105	property_type	external-identifier
+P8106	property_type	external-identifier
+P8107	property_type	item
+P8108	property_type	external-identifier
+P8109	property_type	external-identifier
+P8110	property_type	external-identifier
+P8111	property_type	item
+P8112	property_type	string
+P8113	property_type	external-identifier
+P8114	property_type	external-identifier
+P8115	property_type	item
+P8116	property_type	external-identifier
+P8117	property_type	external-identifier
+P8118	property_type	external-identifier
+P8119	property_type	external-identifier
+P8120	property_type	external-identifier
+P8121	property_type	external-identifier
+P8122	property_type	external-identifier
+P8123	property_type	external-identifier
+P8124	property_type	external-identifier
+P8125	property_type	external-identifier
+P8126	property_type	external-identifier
+P8127	property_type	item
+P8128	property_type	external-identifier
+P8129	property_type	external-identifier
+P8130	property_type	external-identifier
+P8131	property_type	item
+P8132	property_type	external-identifier
+P6228	property_type	external-identifier
+P6229	property_type	external-identifier
+P6230	property_type	external-identifier
+P6231	property_type	external-identifier
+P6232	property_type	external-identifier
+P6233	property_type	external-identifier
+P6234	property_type	external-identifier
+P6235	property_type	external-identifier
+P6237	property_type	item
+P6238	property_type	external-identifier
+P6239	property_type	external-identifier
+P6240	property_type	external-identifier
+P6241	property_type	item
+P6242	property_type	external-identifier
+P6243	property_type	item
+P6244	property_type	external-identifier
+P6245	property_type	external-identifier
+P6246	property_type	external-identifier
+P6247	property_type	external-identifier
+P6248	property_type	external-identifier
+P6249	property_type	quantity
+P6250	property_type	external-identifier
+P6251	property_type	monolingualtext
+P6252	property_type	external-identifier
+P6253	property_type	external-identifier
+P6254	property_type	string
+P6255	property_type	external-identifier
+P6256	property_type	external-identifier
+P6257	property_type	quantity
+P6258	property_type	quantity
+P6259	property_type	item
+P6260	property_type	quantity
+P6261	property_type	quantity
+P6262	property_type	external-identifier
+P6263	property_type	external-identifier
+P6264	property_type	external-identifier
+P6265	property_type	external-identifier
+P6266	property_type	external-identifier
+P6267	property_type	external-identifier
+P6268	property_type	external-identifier
+P6269	property_type	url
+P6271	property_type	item
+P6272	property_type	quantity
+P6274	property_type	quantity
+P6275	property_type	item
+P6276	property_type	external-identifier
+P6277	property_type	external-identifier
+P6278	property_type	external-identifier
+P6279	property_type	external-identifier
+P6280	property_type	string
+P6281	property_type	external-identifier
+P6282	property_type	external-identifier
+P6283	property_type	external-identifier
+P6284	property_type	external-identifier
+P6285	property_type	external-identifier
+P6286	property_type	external-identifier
+P6287	property_type	external-identifier
+P6288	property_type	external-identifier
+P6289	property_type	external-identifier
+P6290	property_type	external-identifier
+P6291	property_type	item
+P6292	property_type	external-identifier
+P6293	property_type	external-identifier
+P6294	property_type	external-identifier
+P6295	property_type	external-identifier
+P6296	property_type	external-identifier
+P6297	property_type	external-identifier
+P6298	property_type	external-identifier
+P6299	property_type	external-identifier
+P6300	property_type	external-identifier
+P6301	property_type	external-identifier
+P6302	property_type	external-identifier
+P6303	property_type	external-identifier
+P6304	property_type	external-identifier
+P6305	property_type	external-identifier
+P6306	property_type	external-identifier
+P6307	property_type	external-identifier
+P6308	property_type	external-identifier
+P6309	property_type	external-identifier
+P6310	property_type	external-identifier
+P6311	property_type	external-identifier
+P6312	property_type	external-identifier
+P6313	property_type	external-identifier
+P6314	property_type	external-identifier
+P6315	property_type	external-identifier
+P6316	property_type	external-identifier
+P6317	property_type	external-identifier
+P6318	property_type	external-identifier
+P6319	property_type	external-identifier
+P6320	property_type	external-identifier
+P6321	property_type	external-identifier
+P6322	property_type	external-identifier
+P6323	property_type	external-identifier
+P6324	property_type	external-identifier
+P6325	property_type	external-identifier
+P6326	property_type	external-identifier
+P6327	property_type	external-identifier
+P6328	property_type	external-identifier
+P6329	property_type	external-identifier
+P6330	property_type	external-identifier
+P6747	property_type	external-identifier
+P6748	property_type	external-identifier
+P6749	property_type	external-identifier
+P6750	property_type	external-identifier
+P6751	property_type	external-identifier
+P6752	property_type	external-identifier
+P6753	property_type	quantity
+P6754	property_type	external-identifier
+P6756	property_type	external-identifier
+P6757	property_type	quantity
+P6758	property_type	item
+P6759	property_type	external-identifier
+P6760	property_type	external-identifier
+P6761	property_type	external-identifier
+P6762	property_type	external-identifier
+P6763	property_type	external-identifier
+P6764	property_type	external-identifier
+P6765	property_type	external-identifier
+P6766	property_type	external-identifier
+P6767	property_type	external-identifier
+P6768	property_type	external-identifier
+P6769	property_type	external-identifier
+P6770	property_type	external-identifier
+P6771	property_type	external-identifier
+P6772	property_type	external-identifier
+P6773	property_type	external-identifier
+P6774	property_type	external-identifier
+P6775	property_type	external-identifier
+P6776	property_type	external-identifier
+P6777	property_type	external-identifier
+P6778	property_type	external-identifier
+P6780	property_type	external-identifier
+P6781	property_type	external-identifier
+P6782	property_type	external-identifier
+P6783	property_type	external-identifier
+P6784	property_type	external-identifier
+P6785	property_type	external-identifier
+P6786	property_type	external-identifier
+P6787	property_type	external-identifier
+P6788	property_type	external-identifier
+P6789	property_type	quantity
+P6790	property_type	quantity
+P6791	property_type	external-identifier
+P6792	property_type	external-identifier
+P6793	property_type	string
+P6794	property_type	quantity
+P6795	property_type	external-identifier
+P6796	property_type	external-identifier
+P6797	property_type	external-identifier
+P6798	property_type	string
+P6799	property_type	external-identifier
+P6800	property_type	url
+P6801	property_type	quantity
+P6802	property_type	string
+P6803	property_type	item
+P6804	property_type	external-identifier
+P6805	property_type	external-identifier
+P6806	property_type	external-identifier
+P6807	property_type	external-identifier
+P6808	property_type	external-identifier
+P6809	property_type	external-identifier
+P6810	property_type	external-identifier
+P6811	property_type	external-identifier
+P6812	property_type	external-identifier
+P6813	property_type	external-identifier
+P6814	property_type	external-identifier
+P6815	property_type	external-identifier
+P6816	property_type	external-identifier
+P6817	property_type	external-identifier
+P6818	property_type	url
+P6819	property_type	item
+P6820	property_type	external-identifier
+P6821	property_type	external-identifier
+P6822	property_type	external-identifier
+P6823	property_type	external-identifier
+P6824	property_type	string
+P6825	property_type	external-identifier
+P6826	property_type	quantity
+P6827	property_type	external-identifier
+P6828	property_type	external-identifier
+P6829	property_type	external-identifier
+P6830	property_type	external-identifier
+P6831	property_type	external-identifier
+P6832	property_type	external-identifier
+P6833	property_type	monolingualtext
+P6835	property_type	string
+P6836	property_type	external-identifier
+P6837	property_type	external-identifier
+P6838	property_type	external-identifier
+P6839	property_type	external-identifier
+P6840	property_type	item
+P6841	property_type	external-identifier
+P6842	property_type	external-identifier
+P6843	property_type	external-identifier
+P6844	property_type	external-identifier
+P6845	property_type	external-identifier
+P6846	property_type	external-identifier
+P6847	property_type	external-identifier
+P6848	property_type	external-identifier
+P6849	property_type	external-identifier
+P6331	property_type	external-identifier
+P6332	property_type	external-identifier
+P6333	property_type	monolingualtext
+P6334	property_type	external-identifier
+P6335	property_type	external-identifier
+P6336	property_type	external-identifier
+P6337	property_type	external-identifier
+P6338	property_type	item
+P6339	property_type	item
+P6340	property_type	external-identifier
+P6341	property_type	external-identifier
+P6342	property_type	external-identifier
+P6343	property_type	quantity
+P6344	property_type	quantity
+P6346	property_type	monolingualtext
+P6347	property_type	external-identifier
+P6348	property_type	external-identifier
+P6349	property_type	external-identifier
+P6350	property_type	external-identifier
+P6351	property_type	external-identifier
+P6352	property_type	external-identifier
+P6353	property_type	external-identifier
+P6354	property_type	quantity
+P6355	property_type	external-identifier
+P6356	property_type	external-identifier
+P6357	property_type	external-identifier
+P6358	property_type	external-identifier
+P6359	property_type	external-identifier
+P6360	property_type	external-identifier
+P6361	property_type	external-identifier
+P6362	property_type	external-identifier
+P6363	property_type	url
+P6364	property_type	item
+P6365	property_type	item
+P6366	property_type	external-identifier
+P6367	property_type	external-identifier
+P6368	property_type	external-identifier
+P6369	property_type	external-identifier
+P6370	property_type	external-identifier
+P6371	property_type	external-identifier
+P6372	property_type	external-identifier
+P6373	property_type	external-identifier
+P6374	property_type	external-identifier
+P6375	property_type	monolingualtext
+P6376	property_type	external-identifier
+P6377	property_type	external-identifier
+P6378	property_type	url
+P6379	property_type	item
+P6381	property_type	external-identifier
+P6382	property_type	external-identifier
+P6383	property_type	external-identifier
+P6384	property_type	external-identifier
+P6385	property_type	external-identifier
+P6386	property_type	external-identifier
+P6387	property_type	external-identifier
+P6388	property_type	external-identifier
+P6389	property_type	external-identifier
+P6390	property_type	external-identifier
+P6391	property_type	external-identifier
+P6392	property_type	external-identifier
+P6394	property_type	external-identifier
+P6395	property_type	external-identifier
+P6398	property_type	external-identifier
+P6399	property_type	external-identifier
+P6400	property_type	external-identifier
+P6401	property_type	external-identifier
+P6402	property_type	external-identifier
+P6403	property_type	external-identifier
+P6404	property_type	external-identifier
+P6405	property_type	external-identifier
+P6406	property_type	external-identifier
+P6407	property_type	external-identifier
+P6408	property_type	external-identifier
+P6409	property_type	external-identifier
+P6410	property_type	external-identifier
+P6411	property_type	external-identifier
+P6412	property_type	external-identifier
+P6413	property_type	external-identifier
+P6414	property_type	external-identifier
+P6415	property_type	external-identifier
+P6416	property_type	external-identifier
+P6417	property_type	external-identifier
+P6418	property_type	external-identifier
+P6419	property_type	external-identifier
+P6420	property_type	external-identifier
+P6421	property_type	external-identifier
+P6422	property_type	external-identifier
+P6423	property_type	external-identifier
+P6424	property_type	string
+P6425	property_type	external-identifier
+P6426	property_type	item
+P6427	property_type	monolingualtext
+P6428	property_type	external-identifier
+P6429	property_type	external-identifier
+P6430	property_type	external-identifier
+P6431	property_type	external-identifier
+P6432	property_type	string
+P6433	property_type	external-identifier
+P6434	property_type	external-identifier
+P6436	property_type	external-identifier
+P6850	property_type	external-identifier
+P6851	property_type	external-identifier
+P6852	property_type	external-identifier
+P6853	property_type	external-identifier
+P6854	property_type	external-identifier
+P6855	property_type	item
+P6856	property_type	quantity
+P6857	property_type	external-identifier
+P6858	property_type	external-identifier
+P6859	property_type	external-identifier
+P6861	property_type	external-identifier
+P6862	property_type	external-identifier
+P6863	property_type	external-identifier
+P6864	property_type	external-identifier
+P6865	property_type	external-identifier
+P6866	property_type	external-identifier
+P6867	property_type	external-identifier
+P6868	property_type	external-identifier
+P6869	property_type	external-identifier
+P6870	property_type	external-identifier
+P6871	property_type	external-identifier
+P6872	property_type	item
+P6873	property_type	external-identifier
+P6874	property_type	external-identifier
+P6875	property_type	item
+P6876	property_type	quantity
+P6877	property_type	external-identifier
+P6878	property_type	external-identifier
+P6879	property_type	quantity
+P6880	property_type	external-identifier
+P6881	property_type	external-identifier
+P6882	property_type	external-identifier
+P6883	property_type	string
+P6884	property_type	item
+P6885	property_type	item
+P6886	property_type	item
+P6887	property_type	item
+P6888	property_type	external-identifier
+P6889	property_type	item
+P6890	property_type	external-identifier
+P6891	property_type	external-identifier
+P6892	property_type	external-identifier
+P6893	property_type	external-identifier
+P6894	property_type	external-identifier
+P6895	property_type	external-identifier
+P6896	property_type	external-identifier
+P6897	property_type	quantity
+P6898	property_type	external-identifier
+P6899	property_type	external-identifier
+P6900	property_type	external-identifier
+P6901	property_type	external-identifier
+P6902	property_type	item
+P6903	property_type	external-identifier
+P6904	property_type	external-identifier
+P6905	property_type	external-identifier
+P6906	property_type	external-identifier
+P6907	property_type	external-identifier
+P6908	property_type	external-identifier
+P6909	property_type	external-identifier
+P6910	property_type	external-identifier
+P6911	property_type	external-identifier
+P6912	property_type	external-identifier
+P6913	property_type	external-identifier
+P6914	property_type	external-identifier
+P6915	property_type	external-identifier
+P6916	property_type	external-identifier
+P6917	property_type	external-identifier
+P6918	property_type	external-identifier
+P6919	property_type	external-identifier
+P6920	property_type	external-identifier
+P6921	property_type	external-identifier
+P6922	property_type	external-identifier
+P6923	property_type	external-identifier
+P6924	property_type	external-identifier
+P6925	property_type	external-identifier
+P6926	property_type	external-identifier
+P6927	property_type	external-identifier
+P6928	property_type	external-identifier
+P6929	property_type	external-identifier
+P6930	property_type	external-identifier
+P6931	property_type	external-identifier
+P6932	property_type	external-identifier
+P6933	property_type	external-identifier
+P6934	property_type	external-identifier
+P6935	property_type	external-identifier
+P6936	property_type	external-identifier
+P6937	property_type	external-identifier
+P6938	property_type	item
+P6939	property_type	item
+P6940	property_type	external-identifier
+P6941	property_type	external-identifier
+P6942	property_type	item
+P6943	property_type	external-identifier
+P6944	property_type	external-identifier
+P6945	property_type	external-identifier
+P6946	property_type	external-identifier
+P6947	property_type	external-identifier
+P6948	property_type	item
+P6949	property_type	time
+P6950	property_type	external-identifier
+P6953	property_type	external-identifier
+P6954	property_type	item
+P6955	property_type	external-identifier
+P6956	property_type	external-identifier
+P6957	property_type	external-identifier
+P6958	property_type	external-identifier
+P6959	property_type	external-identifier
+P6960	property_type	external-identifier
+P6962	property_type	item
+P6963	property_type	external-identifier
+P6964	property_type	external-identifier
+P6965	property_type	external-identifier
+P6966	property_type	external-identifier
+P6967	property_type	external-identifier
+P6968	property_type	external-identifier
+P6969	property_type	external-identifier
+P6970	property_type	external-identifier
+P6971	property_type	external-identifier
+P6972	property_type	external-identifier
+P6973	property_type	external-identifier
+P6975	property_type	external-identifier
+P6976	property_type	external-identifier
+P6977	property_type	item
+P6978	property_type	item
+P6979	property_type	external-identifier
+P6980	property_type	external-identifier
+P6981	property_type	external-identifier
+P6982	property_type	external-identifier
+P6983	property_type	external-identifier
+P6984	property_type	external-identifier
+P6985	property_type	external-identifier
+P6987	property_type	external-identifier
+P6988	property_type	external-identifier
+P6989	property_type	external-identifier
+P6992	property_type	external-identifier
+P6993	property_type	external-identifier
+P6994	property_type	external-identifier
+P6995	property_type	external-identifier
+P6996	property_type	external-identifier
+P6997	property_type	external-identifier
+P6998	property_type	external-identifier
+P6999	property_type	external-identifier
+P7000	property_type	external-identifier
+P7001	property_type	external-identifier
+P7002	property_type	external-identifier
+P7003	property_type	external-identifier
+P7004	property_type	external-identifier
+P7005	property_type	external-identifier
+P7006	property_type	external-identifier
+P7007	property_type	external-identifier
+P7008	property_type	monolingualtext
+P7009	property_type	string
+P7010	property_type	item
+P7011	property_type	external-identifier
+P7012	property_type	external-identifier
+P7013	property_type	external-identifier
+P7014	property_type	url
+P7015	property_type	quantity
+P7017	property_type	external-identifier
+P7018	property_type	string
+P7019	property_type	external-identifier
+P7020	property_type	external-identifier
+P7021	property_type	external-identifier
+P7022	property_type	external-identifier
+P7023	property_type	external-identifier
+P7024	property_type	external-identifier
+P7025	property_type	external-identifier
+P7026	property_type	external-identifier
+P7027	property_type	external-identifier
+P7028	property_type	external-identifier
+P7029	property_type	external-identifier
+P7030	property_type	external-identifier
+P7031	property_type	external-identifier
+P7032	property_type	external-identifier
+P7033	property_type	external-identifier
+P7034	property_type	external-identifier
+P7035	property_type	external-identifier
+P7036	property_type	external-identifier
+P7037	property_type	external-identifier
+P7038	property_type	external-identifier
+P7039	property_type	external-identifier
+P7040	property_type	external-identifier
+P7041	property_type	external-identifier
+P7042	property_type	external-identifier
+P7043	property_type	external-identifier
+P7044	property_type	external-identifier
+P7045	property_type	item
+P7046	property_type	external-identifier
+P7047	property_type	item
+P7048	property_type	external-identifier
+P7049	property_type	external-identifier
+P7050	property_type	external-identifier
+P7051	property_type	external-identifier
+P7052	property_type	external-identifier
+P7053	property_type	external-identifier
+P7054	property_type	external-identifier
+P7055	property_type	external-identifier
+P7056	property_type	external-identifier
+P7057	property_type	external-identifier
+P7058	property_type	external-identifier
+P7059	property_type	external-identifier
+P7060	property_type	external-identifier
+P7063	property_type	external-identifier
+P7064	property_type	external-identifier
+P7065	property_type	external-identifier
+P7066	property_type	external-identifier
+P7067	property_type	external-identifier
+P7068	property_type	external-identifier
+P7069	property_type	string
+P7070	property_type	external-identifier
+P7071	property_type	external-identifier
+P7072	property_type	external-identifier
+P7073	property_type	external-identifier
+P7074	property_type	external-identifier
+P7075	property_type	item
+P7076	property_type	external-identifier
+P7077	property_type	external-identifier
+P7078	property_type	item
+P7079	property_type	quantity
+P7080	property_type	quantity
+P7081	property_type	monolingualtext
+P7083	property_type	quantity
+P7084	property_type	item
+P7085	property_type	external-identifier
+P7086	property_type	item
+P7087	property_type	item
+P7089	property_type	external-identifier
+P7090	property_type	external-identifier
+P7091	property_type	external-identifier
+P7092	property_type	external-identifier
+P7093	property_type	external-identifier
+P7094	property_type	external-identifier
+P7095	property_type	item
+P7100	property_type	external-identifier
+P7101	property_type	url
+P7102	property_type	external-identifier
+P7103	property_type	time
+P7104	property_type	time
+P7105	property_type	external-identifier
+P7106	property_type	external-identifier
+P7107	property_type	external-identifier
+P7108	property_type	item
+P7109	property_type	external-identifier
+P7110	property_type	external-identifier
+P7111	property_type	external-identifier
+P7112	property_type	external-identifier
+P7113	property_type	external-identifier
+P7114	property_type	external-identifier
+P7115	property_type	external-identifier
+P7116	property_type	external-identifier
+P7117	property_type	external-identifier
+P7118	property_type	external-identifier
+P7119	property_type	external-identifier
+P7120	property_type	external-identifier
+P7121	property_type	external-identifier
+P7122	property_type	item
+P7124	property_type	time
+P7125	property_type	time
+P7126	property_type	string
+P7127	property_type	external-identifier
+P7128	property_type	external-identifier
+P7129	property_type	external-identifier
+P7130	property_type	external-identifier
+P7131	property_type	external-identifier
+P7132	property_type	external-identifier
+P7133	property_type	external-identifier
+P7134	property_type	external-identifier
+P7135	property_type	external-identifier
+P7136	property_type	external-identifier
+P7137	property_type	item
+P7138	property_type	external-identifier
+P7139	property_type	external-identifier
+P7140	property_type	external-identifier
+P7141	property_type	string
+P7142	property_type	external-identifier
+P7143	property_type	external-identifier
+P7144	property_type	external-identifier
+P7145	property_type	external-identifier
+P7146	property_type	external-identifier
+P7148	property_type	external-identifier
+P7149	property_type	external-identifier
+P7150	property_type	monolingualtext
+P7151	property_type	external-identifier
+P7152	property_type	item
+P7153	property_type	item
+P7154	property_type	external-identifier
+P7155	property_type	external-identifier
+P7156	property_type	external-identifier
+P7157	property_type	external-identifier
+P7159	property_type	external-identifier
+P7160	property_type	item
+P7161	property_type	external-identifier
+P7162	property_type	item
+P7163	property_type	item
+P7164	property_type	external-identifier
+P7165	property_type	item
+P7166	property_type	external-identifier
+P7167	property_type	item
+P7168	property_type	external-identifier
+P7169	property_type	item
+P7170	property_type	external-identifier
+P7171	property_type	external-identifier
+P7172	property_type	external-identifier
+P7173	property_type	external-identifier
+P7174	property_type	item
+P7175	property_type	external-identifier
+P7176	property_type	external-identifier
+P7177	property_type	external-identifier
+P7178	property_type	external-identifier
+P7179	property_type	external-identifier
+P7180	property_type	external-identifier
+P7181	property_type	external-identifier
+P7182	property_type	external-identifier
+P7183	property_type	external-identifier
+P7184	property_type	external-identifier
+P7185	property_type	external-identifier
+P7186	property_type	external-identifier
+P7187	property_type	external-identifier
+P7188	property_type	external-identifier
+P7189	property_type	external-identifier
+P7190	property_type	external-identifier
+P7191	property_type	external-identifier
+P7192	property_type	external-identifier
+P7193	property_type	external-identifier
+P7194	property_type	external-identifier
+P7195	property_type	external-identifier
+P7196	property_type	external-identifier
+P7197	property_type	external-identifier
+P7198	property_type	external-identifier
+P7199	property_type	external-identifier
+P7200	property_type	external-identifier
+P7201	property_type	external-identifier
+P7202	property_type	external-identifier
+P7203	property_type	external-identifier
+P7204	property_type	external-identifier
+P7205	property_type	external-identifier
+P7206	property_type	external-identifier
+P7207	property_type	external-identifier
+P7208	property_type	external-identifier
+P7209	property_type	item
+P7210	property_type	external-identifier
+P7211	property_type	external-identifier
+P7212	property_type	external-identifier
+P7213	property_type	url
+P7214	property_type	external-identifier
+P7215	property_type	external-identifier
+P7216	property_type	external-identifier
+P7217	property_type	external-identifier
+P7218	property_type	external-identifier
+P7219	property_type	string
+P7220	property_type	string
+P7221	property_type	string
+P7222	property_type	external-identifier
+P7223	property_type	external-identifier
+P7224	property_type	external-identifier
+P7225	property_type	external-identifier
+P7226	property_type	external-identifier
+P7227	property_type	external-identifier
+P7228	property_type	item
+P7229	property_type	external-identifier
+P7230	property_type	external-identifier
+P7231	property_type	external-identifier
+P7232	property_type	external-identifier
+P7233	property_type	external-identifier
+P7234	property_type	external-identifier
+P7235	property_type	string
+P7236	property_type	external-identifier
+P7237	property_type	external-identifier
+P7238	property_type	external-identifier
+P7241	property_type	external-identifier
+P7242	property_type	external-identifier
+P7243	property_type	monolingualtext
+P7250	property_type	string
+P7251	property_type	external-identifier
+P7252	property_type	item
+P7253	property_type	item
+P7254	property_type	external-identifier
+P7255	property_type	external-identifier
+P7256	property_type	quantity
+P7257	property_type	external-identifier
+P7258	property_type	external-identifier
+P7259	property_type	external-identifier
+P7260	property_type	external-identifier
+P7261	property_type	item
+P7262	property_type	external-identifier
+P7263	property_type	external-identifier
+P7264	property_type	external-identifier
+P7265	property_type	external-identifier
+P7266	property_type	external-identifier
+P7267	property_type	external-identifier
+P7268	property_type	external-identifier
+P7269	property_type	external-identifier
+P7270	property_type	external-identifier
+P7271	property_type	external-identifier
+P7272	property_type	external-identifier
+P7273	property_type	external-identifier
+P7274	property_type	external-identifier
+P7275	property_type	external-identifier
+P7276	property_type	external-identifier
+P7277	property_type	external-identifier
+P7278	property_type	external-identifier
+P7279	property_type	external-identifier
+P7280	property_type	external-identifier
+P7281	property_type	external-identifier
+P7282	property_type	external-identifier
+P7283	property_type	external-identifier
+P7284	property_type	external-identifier
+P7285	property_type	external-identifier
+P7286	property_type	external-identifier
+P7287	property_type	external-identifier
+P7288	property_type	external-identifier
+P7289	property_type	external-identifier
+P7290	property_type	string
+P7291	property_type	external-identifier
+P7292	property_type	external-identifier
+P7293	property_type	external-identifier
+P7294	property_type	url
+P7295	property_type	time
+P7296	property_type	external-identifier
+P7297	property_type	quantity
+P7298	property_type	external-identifier
+P7299	property_type	external-identifier
+P7300	property_type	external-identifier
+P7301	property_type	external-identifier
+P7302	property_type	external-identifier
+P7303	property_type	external-identifier
+P7304	property_type	external-identifier
+P7305	property_type	external-identifier
+P7306	property_type	external-identifier
+P7307	property_type	external-identifier
+P7308	property_type	external-identifier
+P7309	property_type	item
+P7310	property_type	external-identifier
+P7311	property_type	external-identifier
+P7312	property_type	external-identifier
+P7313	property_type	external-identifier
+P7314	property_type	external-identifier
+P7315	property_type	string
+P7316	property_type	quantity
+P7317	property_type	external-identifier
+P7318	property_type	external-identifier
+P7319	property_type	external-identifier
+P7320	property_type	external-identifier
+P7321	property_type	external-identifier
+P7322	property_type	external-identifier
+P7323	property_type	external-identifier
+P7324	property_type	external-identifier
+P7325	property_type	external-identifier
+P7326	property_type	external-identifier
+P7327	property_type	item
+P7328	property_type	quantity
+P7329	property_type	external-identifier
+P7330	property_type	string
+P7331	property_type	external-identifier
+P7332	property_type	external-identifier
+P7333	property_type	external-identifier
+P7334	property_type	external-identifier
+P7335	property_type	external-identifier
+P7336	property_type	external-identifier
+P7337	property_type	external-identifier
+P7338	property_type	string
+P7339	property_type	external-identifier
+P7340	property_type	external-identifier
+P7341	property_type	external-identifier
+P7342	property_type	external-identifier
+P7343	property_type	external-identifier
+P7344	property_type	external-identifier
+P7345	property_type	external-identifier
+P7346	property_type	external-identifier
+P7347	property_type	url
+P7348	property_type	external-identifier
+P7349	property_type	external-identifier
+P7350	property_type	external-identifier
+P7351	property_type	external-identifier
+P7352	property_type	external-identifier
+P7353	property_type	external-identifier
+P7354	property_type	external-identifier
+P7355	property_type	external-identifier
+P7356	property_type	external-identifier
+P7357	property_type	external-identifier
+P7358	property_type	external-identifier
+P7359	property_type	external-identifier
+P7360	property_type	external-identifier
+P7361	property_type	external-identifier
+P7362	property_type	external-identifier
+P7363	property_type	external-identifier
+P7364	property_type	external-identifier
+P7365	property_type	external-identifier
+P7366	property_type	external-identifier
+P7367	property_type	item
+P7368	property_type	external-identifier
+P7369	property_type	external-identifier
+P7370	property_type	external-identifier
+P7371	property_type	external-identifier
+P7372	property_type	external-identifier
+P7374	property_type	item
+P7375	property_type	url
+P7376	property_type	item
+P7377	property_type	item
+P7378	property_type	item
+P7379	property_type	quantity
+P7380	property_type	string
+P7381	property_type	external-identifier
+P7382	property_type	external-identifier
+P7383	property_type	string
+P7384	property_type	external-identifier
+P7387	property_type	external-identifier
+P7388	property_type	external-identifier
+P7389	property_type	external-identifier
+P7390	property_type	external-identifier
+P7391	property_type	quantity
+P7395	property_type	external-identifier
+P7396	property_type	external-identifier
+P7397	property_type	external-identifier
+P7398	property_type	external-identifier
+P7399	property_type	external-identifier
+P7400	property_type	external-identifier
+P7401	property_type	external-identifier
+P7402	property_type	external-identifier
+P7403	property_type	external-identifier
+P7404	property_type	external-identifier
+P7405	property_type	external-identifier
+P7406	property_type	item
+P7407	property_type	string
+P7408	property_type	external-identifier
+P7409	property_type	external-identifier
+P7410	property_type	external-identifier
+P7411	property_type	external-identifier
+P7412	property_type	external-identifier
+P7413	property_type	external-identifier
+P7414	property_type	external-identifier
+P7415	property_type	string
+P7416	property_type	string
+P7417	property_type	string
+P7418	property_type	string
+P7419	property_type	item
+P7420	property_type	string
+P7421	property_type	string
+P7422	property_type	quantity
+P7423	property_type	external-identifier
+P7425	property_type	external-identifier
+P7427	property_type	external-identifier
+P7428	property_type	external-identifier
+P7429	property_type	external-identifier
+P7430	property_type	external-identifier
+P7431	property_type	external-identifier
+P7432	property_type	external-identifier
+P7433	property_type	external-identifier
+P7434	property_type	external-identifier
+P7435	property_type	external-identifier
+P7436	property_type	external-identifier
+P7437	property_type	external-identifier
+P7438	property_type	external-identifier
+P7439	property_type	external-identifier
+P7440	property_type	external-identifier
+P7441	property_type	external-identifier
+P7442	property_type	item
+P7443	property_type	quantity
+P7444	property_type	external-identifier
+P7445	property_type	external-identifier
+P7446	property_type	external-identifier
+P7447	property_type	external-identifier
+P7448	property_type	external-identifier
+P7449	property_type	external-identifier
+P7450	property_type	external-identifier
+P7451	property_type	external-identifier
+P7452	property_type	item
+P7453	property_type	external-identifier
+P7454	property_type	external-identifier
+P7455	property_type	external-identifier
+P7456	property_type	external-identifier
+P7457	property_type	string
+P7458	property_type	external-identifier
+P7459	property_type	external-identifier
+P7460	property_type	external-identifier
+P7461	property_type	external-identifier
+P7462	property_type	quantity
+P7463	property_type	external-identifier
+P7464	property_type	external-identifier
+P7465	property_type	external-identifier
+P7466	property_type	external-identifier
+P7467	property_type	external-identifier
+P7468	property_type	external-identifier
+P7469	property_type	item
+P7470	property_type	string
+P7471	property_type	external-identifier
+P7472	property_type	external-identifier
+P7473	property_type	external-identifier
+P7474	property_type	external-identifier
+P7475	property_type	external-identifier
+P7476	property_type	external-identifier
+P7477	property_type	external-identifier
+P7478	property_type	string
+P7479	property_type	item
+P7480	property_type	external-identifier
+P7481	property_type	item
+P7482	property_type	item
+P7483	property_type	external-identifier
+P7484	property_type	external-identifier
+P7485	property_type	external-identifier
\ No newline at end of file
diff --git a/kgtk/tests/test_triple_generation.py b/kgtk/tests/test_triple_generation.py
new file mode 100644
index 000000000..2ab9c6fcb
--- /dev/null
+++ b/kgtk/tests/test_triple_generation.py
@@ -0,0 +1,97 @@
+import unittest
+from kgtk.triple_generator import TripleGenerator
+from pathlib import Path
+
+
+class TestTripleGeneration(unittest.TestCase):
+
+    def test_truthy_property_triple_generation(self):
+        property_tsv_file = 'data/P10.tsv'
+        wikidata_property_file = 'data/wikidata_properties.tsv'
+        o = open('data/P10_truthy_tmp.ttl', 'w')
+        generator = TripleGenerator(wikidata_property_file, label_set='label', alias_set='aliases',
+                                    description_set='descriptions', ignore=True, n=100, truthy=True, use_id=True,
+                                    dest_fp=o)
+        for line_num, edge in enumerate(open(property_tsv_file)):
+            if edge.startswith("#"):
+                continue
+            else:
+                generator.entry_point(line_num + 1, edge)
+        generator.finalize()
+        o.close()
+        f1 = open('data/P10_truthy.ttl')
+        f2 = open('data/P10_truthy_tmp.ttl')
+        self.assertEqual(f1.readlines(), f2.readlines())
+        f1.close()
+        f2.close()
+        p = Path('data/P10_truthy_tmp.ttl')
+        p.unlink()
+
+    def test_property_triple_generation(self):
+        property_tsv_file = 'data/P10.tsv'
+        wikidata_property_file = 'data/wikidata_properties.tsv'
+        o = open('data/P10_not_truthy_tmp.ttl', 'w')
+        generator = TripleGenerator(wikidata_property_file, label_set='label', alias_set='aliases',
+                                    description_set='descriptions', ignore=True, n=100, truthy=False, use_id=True,
+                                    dest_fp=o)
+        for line_num, edge in enumerate(open(property_tsv_file)):
+            if edge.startswith("#"):
+                continue
+            else:
+                generator.entry_point(line_num + 1, edge)
+        generator.finalize()
+        o.close()
+        f1 = open('data/P10_not_truthy.ttl')
+        f2 = open('data/P10_not_truthy_tmp.ttl')
+        self.assertEqual(f1.readlines(), f2.readlines())
+        f1.close()
+        f2.close()
+        p = Path('data/P10_not_truthy_tmp.ttl')
+        p.unlink()
+
+    def test_truthy_qnode_triple_generation(self):
+        qnode_tsv_file = 'data/Q57160439.tsv'
+        wikidata_property_file = 'data/wikidata_properties.tsv'
+        o = open('data/Q57160439_truthy_tmp.ttl', 'w')
+        generator = TripleGenerator(wikidata_property_file, label_set='label', alias_set='aliases',
+                                    description_set='descriptions', ignore=True, n=100, truthy=True, use_id=True,
+                                    dest_fp=o)
+        for line_num, edge in enumerate(open(qnode_tsv_file)):
+            if edge.startswith("#"):
+                continue
+            else:
+                generator.entry_point(line_num + 1, edge)
+        generator.finalize()
+
+        o.close()
+
+        f1 = open('data/Q57160439_truthy.ttl')
+        f2 = open('data/Q57160439_truthy_tmp.ttl')
+        self.assertEqual(f1.readlines(), f2.readlines())
+        f1.close()
+        f2.close()
+        p = Path('data/Q57160439_truthy_tmp.ttl')
+        p.unlink()
+
+    def test_not_truthy_qnode_triple_generation(self):
+        qnode_tsv_file = 'data/Q57160439.tsv'
+        wikidata_property_file = 'data/wikidata_properties.tsv'
+        o = open('data/Q57160439_not_truthy_tmp.ttl', 'w')
+        generator = TripleGenerator(wikidata_property_file, label_set='label', alias_set='aliases',
+                                    description_set='descriptions', ignore=True, n=100, truthy=False, use_id=True,
+                                    dest_fp=o)
+        for line_num, edge in enumerate(open(qnode_tsv_file)):
+            if edge.startswith("#"):
+                continue
+            else:
+                generator.entry_point(line_num + 1, edge)
+        generator.finalize()
+
+        o.close()
+        f1 = open('data/Q57160439_not_truthy.ttl')
+        f2 = open('data/Q57160439_not_truthy_tmp.ttl')
+        self.assertEqual(f1.readlines(), f2.readlines())
+        f1.close()
+        f2.close()
+        p = Path('data/Q57160439_not_truthy_tmp.ttl')
+        p.unlink()

From 32be6affdf243ebd947758992e32b9bba7856f88 Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Wed, 13 May 2020 13:37:53 -0700
Subject: [PATCH 186/278] unit test for small values

---
 kgtk/tests/data/small_values.tsv     |  6 ++
 kgtk/tests/data/small_values.ttl     | 90 ++++++++++++++++++++++++++++
 kgtk/tests/test_triple_generation.py | 24 ++++++++
 3 files changed, 120 insertions(+)
 create mode 100644 kgtk/tests/data/small_values.tsv
 create mode 100644 kgtk/tests/data/small_values.ttl

diff --git a/kgtk/tests/data/small_values.tsv b/kgtk/tests/data/small_values.tsv
new file mode 100644
index 000000000..e012fa736
--- /dev/null
+++ b/kgtk/tests/data/small_values.tsv
@@ -0,0 +1,6 @@
+node1	property	node2	id
+Q00005550-chemical-MESHC000006	P6897	0	Q00005550-chemical-MESHC000006-P2020013-18300
+Q00005550-chemical-MESHC000006	P7015	7	Q00005550-chemical-MESHC000006-P2020014-18301
+Q00005550-chemical-MESHC000006	P7079	1.9860001065575846e-07	Q00005550-chemical-MESHC000006-P2020015-18302
+Q00005550-chemical-MESHC000006	P7080	0.0004846436908691038	Q00005550-chemical-MESHC000006-P2020016-18304
+Q00005550-chemical-MESHC000006	P7083	0.0	Q00005550-chemical-MESHC000006-P2020017-18303
diff --git a/kgtk/tests/data/small_values.ttl b/kgtk/tests/data/small_values.ttl
new file mode 100644
index 000000000..890ecc620
--- /dev/null
+++ b/kgtk/tests/data/small_values.ttl
@@ -0,0 +1,90 @@
+@prefix wikibase: <http://wikiba.se/ontology#> .
+@prefix wd: <http://www.wikidata.org/entity/> .
+@prefix wdt: <http://www.wikidata.org/prop/direct/> .
+@prefix wdtn: <http://www.wikidata.org/prop/direct-normalized/> .
+@prefix wdno: <http://www.wikidata.org/prop/novalue/> .
+@prefix wds: <http://www.wikidata.org/entity/statement/> .
+@prefix wdv: <http://www.wikidata.org/value/> .
+@prefix wdref: <http://www.wikidata.org/reference/> .
+@prefix p: <http://www.wikidata.org/prop/> .
+@prefix pr: <http://www.wikidata.org/prop/reference/> .
+@prefix prv: <http://www.wikidata.org/prop/reference/value/> .
+@prefix prn: <http://www.wikidata.org/prop/reference/value-normalized/> .
+@prefix ps: <http://www.wikidata.org/prop/statement/> .
+@prefix psv: <http://www.wikidata.org/prop/statement/value/> .
+@prefix psn: <http://www.wikidata.org/prop/statement/value-normalized/> .
+@prefix pq: <http://www.wikidata.org/prop/qualifier/> .
+@prefix pqv: <http://www.wikidata.org/prop/qualifier/value/> .
+@prefix pqn: <http://www.wikidata.org/prop/qualifier/value-normalized/> .
+@prefix prov: <http://www.w3.org/ns/prov#> .
+@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
+@prefix schema: <http://schema.org/> .
+
+wd:Q00005550-chemical-MESHC000006 a wikibase:Item ;
+    p:P6897 wds:Q00005550-chemical-MESHC000006-Q00005550-chemical-MESHC000006-P2020013-18300 ;
+    p:P7015 wds:Q00005550-chemical-MESHC000006-Q00005550-chemical-MESHC000006-P2020014-18301 ;
+    p:P7079 wds:Q00005550-chemical-MESHC000006-Q00005550-chemical-MESHC000006-P2020015-18302 ;
+    p:P7080 wds:Q00005550-chemical-MESHC000006-Q00005550-chemical-MESHC000006-P2020016-18304 ;
+    p:P7083 wds:Q00005550-chemical-MESHC000006-Q00005550-chemical-MESHC000006-P2020017-18303 ;
+    wdtn:P6897 wdv:Quantityc0c0c0c0 ;
+    wdtn:P7015 wdv:Quantityc7c0c0c0 ;
+    wdtn:P7079 wdv:Quantityc0-00000019860001065575846c0c0c0 ;
+    wdtn:P7080 wdv:Quantityc0-0004846436908691038c0c0c0 ;
+    wdtn:P7083 wdv:Quantityc0c0c0c0 ;
+    wdt:P6897 0.0 ;
+    wdt:P7015 7.0 ;
+    wdt:P7079 1.9860001065575846E-7 ;
+    wdt:P7080 0.0004846436908691038 ;
+    wdt:P7083 0.0 .
+
+wds:Q00005550-chemical-MESHC000006-Q00005550-chemical-MESHC000006-P2020013-18300 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P6897 0.0 ;
+    psn:P6897 wdv:Quantityc0c0c0c0 ;
+    psv:P6897 wdv:Quantityc0c0c0c0 .
+
+wds:Q00005550-chemical-MESHC000006-Q00005550-chemical-MESHC000006-P2020014-18301 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P7015 7.0 ;
+    psn:P7015 wdv:Quantityc7c0c0c0 ;
+    psv:P7015 wdv:Quantityc7c0c0c0 .
+
+wds:Q00005550-chemical-MESHC000006-Q00005550-chemical-MESHC000006-P2020015-18302 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P7079 1.9860001065575846E-7 ;
+    psn:P7079 wdv:Quantityc0-00000019860001065575846c0c0c0 ;
+    psv:P7079 wdv:Quantityc0-00000019860001065575846c0c0c0 .
+
+wds:Q00005550-chemical-MESHC000006-Q00005550-chemical-MESHC000006-P2020016-18304 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P7080 0.0004846436908691038 ;
+    psn:P7080 wdv:Quantityc0-0004846436908691038c0c0c0 ;
+    psv:P7080 wdv:Quantityc0-0004846436908691038c0c0c0 .
+
+wds:Q00005550-chemical-MESHC000006-Q00005550-chemical-MESHC000006-P2020017-18303 a wikibase:Statement ;
+    wikibase:rank wikibase:BestRank ;
+    <http://www.isi.edu/etk/createdBy> <http://www.isi.edu/datamart> ;
+    ps:P7083 0.0 ;
+    psn:P7083 wdv:Quantityc0c0c0c0 ;
+    psv:P7083 wdv:Quantityc0c0c0c0 .
+
+wdv:Quantityc0-00000019860001065575846c0c0c0 a wikibase:QuantityValue ;
+    wikibase:quantityAmount 1.9860001065575846E-7 ;
+    wikibase:quantityNormalized wdv:Quantityc0-00000019860001065575846c0c0c0 .
+
+wdv:Quantityc0-0004846436908691038c0c0c0 a wikibase:QuantityValue ;
+    wikibase:quantityAmount 0.0004846436908691038 ;
+    wikibase:quantityNormalized wdv:Quantityc0-0004846436908691038c0c0c0 .
+
+wdv:Quantityc7c0c0c0 a wikibase:QuantityValue ;
+    wikibase:quantityAmount 7.0 ;
+    wikibase:quantityNormalized wdv:Quantityc7c0c0c0 .
+
+wdv:Quantityc0c0c0c0 a wikibase:QuantityValue ;
+    wikibase:quantityAmount 0.0 ;
+    wikibase:quantityNormalized wdv:Quantityc0c0c0c0 .
+
diff --git a/kgtk/tests/test_triple_generation.py b/kgtk/tests/test_triple_generation.py
index 2ab9c6fcb..91cc7e432 100644
--- a/kgtk/tests/test_triple_generation.py
+++ b/kgtk/tests/test_triple_generation.py
@@ -95,3 +95,27 @@ def test_not_truthy_qnode_triple_generation(self):
         f2.close()
         p = Path('data/Q57160439_not_truthy_tmp.ttl')
         p.unlink()
+
+    def test_triple_small_values(self):
+        small_values_file = 'data/small_values.tsv'
+        wikidata_property_file = 'data/wikidata_properties.tsv'
+        o = open('data/small_values_tmp.ttl', 'w')
+        generator = TripleGenerator(wikidata_property_file, label_set='label', alias_set='aliases',
+                                    description_set='descriptions', ignore=True, n=100, truthy=True, use_id=True,
+                                    dest_fp=o)
+        for line_num, edge in enumerate(open(small_values_file)):
+            if edge.startswith("#"):
+                continue
+            else:
+                generator.entry_point(line_num + 1, edge)
+        generator.finalize()
+
+        o.close()
+
+        f1 = open('data/small_values.ttl')
+        f2 = open('data/small_values_tmp.ttl')
+        self.assertEqual(f1.readlines(), f2.readlines())
+        f1.close()
+        f2.close()
+        p = Path('data/small_values_tmp.ttl')
+        p.unlink()
\ No newline at end of file

From 056538b6a30e30cef6ea15124b7d13af8ed08437 Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Wed, 13 May 2020 13:51:47 -0700
Subject: [PATCH 187/278] add travis ci

---
 .travis.yml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 000000000..f1b9f1c01
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,16 @@
+sudo: false
+dist: trusty
+language: python
+python:
+- 3.7
+install:
+- sudo apt-get update
+- pip install --upgrade pip
+- pip install -r requirements.txt
+script:
+- python -m spacy download en_core_web_sm
+- cd kgtk/tests
+- python -m unittest discover
+notifications:
+  slack:
+    secure: FfFhdBv7FgVTZrA/UUm3EcsH/dOvyOusIJ0o+Y+Ysf9DVeasBjJ2E7xaCMcs0KM8ypQuXTVMJyT88uGiJ3fNU/Sy5C/TEireGOWCqy84et/iFjMfIIHnPb3Nz6yLIrDsrrEufcLm1RDeMtQkvn55FfuLOoelfKe10/eAfR/luscoCr1LLqFxGZizpkYION9FCTlZ1CX+OK13ALuG9hqeCKy+k/PkmuwboQDW1N0Q7JcJTs90Pr02TZp83efePRmayXSjjhiy3npVsBYP/oQPyec1mgCSizn+lkTJJ80yzHe++e7zzpg5XbyLjSoA/ddz8AdRq5wD+BooVegJB0cxnMioEzHvpocIyUC28vGEBTbHCU+songs7z9WJyySTy3G1GaBSbcp6dOVDgTmizouBQbkL4/k+PJUDndsMN7hykDYzvlaVt2HZykiA+sf6EiW2RWPhWThmzo3ACJf30OTK78pUfuh1UcuxHcUz/Ve7V/2pP8wGnD2Imbj78GxKa+hzGQ+7lggExVUpPMCMPAJeFcSjbiLeUVO/muyqoRC6Mr1Y1ZlqL1EcKC9LC6jkXs0XV6jB3DRlr8YHiq6X1GPe0rSzV0/XUain9WY5jO15P8HBU2Pv4Y4hsU8LjVqBbX5r22Xquv6KaLQZVIJUOe2FGzfzYiiIXivYNvtATkxSfY=

From 5e9df91e389b245e2e7ff376280274f0c1c42b81 Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Wed, 13 May 2020 13:54:03 -0700
Subject: [PATCH 188/278] travis build initiate

---
 kgtk/tests/test_triple_generation.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kgtk/tests/test_triple_generation.py b/kgtk/tests/test_triple_generation.py
index 91cc7e432..a55a46a3a 100644
--- a/kgtk/tests/test_triple_generation.py
+++ b/kgtk/tests/test_triple_generation.py
@@ -8,6 +8,7 @@ class TestTripleGeneration(unittest.TestCase):
     def test_truthy_property_triple_generation(self):
         property_tsv_file = 'data/P10.tsv'
         wikidata_property_file = 'data/wikidata_properties.tsv'
+        
         o = open('data/P10_truthy_tmp.ttl', 'w')
         generator = TripleGenerator(wikidata_property_file, label_set='label', alias_set='aliases',
                                     description_set='descriptions', ignore=True, n=100, truthy=True, use_id=True,

From b95567b0ee8bf4edda3ab84ae8af69328149ead1 Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Wed, 13 May 2020 13:57:42 -0700
Subject: [PATCH 189/278] update python version for travis

---
 .travis.yml                          | 2 +-
 kgtk/tests/test_triple_generation.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index f1b9f1c01..debb77c4b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,7 +2,7 @@ sudo: false
 dist: trusty
 language: python
 python:
-- 3.7
+- 3.6.8
 install:
 - sudo apt-get update
 - pip install --upgrade pip
diff --git a/kgtk/tests/test_triple_generation.py b/kgtk/tests/test_triple_generation.py
index a55a46a3a..91cc7e432 100644
--- a/kgtk/tests/test_triple_generation.py
+++ b/kgtk/tests/test_triple_generation.py
@@ -8,7 +8,6 @@ class TestTripleGeneration(unittest.TestCase):
     def test_truthy_property_triple_generation(self):
         property_tsv_file = 'data/P10.tsv'
         wikidata_property_file = 'data/wikidata_properties.tsv'
-        
         o = open('data/P10_truthy_tmp.ttl', 'w')
         generator = TripleGenerator(wikidata_property_file, label_set='label', alias_set='aliases',
                                     description_set='descriptions', ignore=True, n=100, truthy=True, use_id=True,

From bb25904f1342fe07655820250a754b60b120a6f4 Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Wed, 13 May 2020 13:59:32 -0700
Subject: [PATCH 190/278] default python version

---
 .travis.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index debb77c4b..32ded647b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,8 +1,7 @@
 sudo: false
 dist: trusty
 language: python
-python:
-- 3.6.8
+
 install:
 - sudo apt-get update
 - pip install --upgrade pip

From 2a8f901b754996f81f0547336a2c077377b0a0b9 Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Wed, 13 May 2020 14:05:46 -0700
Subject: [PATCH 191/278] install kgtk as well

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 32ded647b..5cdd0a350 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,6 +6,7 @@ install:
 - sudo apt-get update
 - pip install --upgrade pip
 - pip install -r requirements.txt
+- pip install .
 script:
 - python -m spacy download en_core_web_sm
 - cd kgtk/tests

From be7cf926b56caf8db76ca64fe3149296cb65b9a5 Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Wed, 13 May 2020 14:13:11 -0700
Subject: [PATCH 192/278] force reinstall numpy, version conflict

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 5cdd0a350..b8bf289b0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,6 +6,7 @@ install:
 - sudo apt-get update
 - pip install --upgrade pip
 - pip install -r requirements.txt
+- pip install --force-reinstall numpy --no-cache
 - pip install .
 script:
 - python -m spacy download en_core_web_sm

From b752504faade3aa1bc64f63c0d04639e751261b7 Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Wed, 13 May 2020 14:21:12 -0700
Subject: [PATCH 193/278] python=3.7, dist=xenial

---
 .travis.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index b8bf289b0..3aaa00505 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,8 @@
 sudo: false
-dist: trusty
+dist: xenial
 language: python
+python:
+- 3.7
 
 install:
 - sudo apt-get update

From 6152e25bd149aac6c96c001a2ae3482ccf8f7dd7 Mon Sep 17 00:00:00 2001
From: saggu <amandeep.s.saggu@gmail.com>
Date: Wed, 13 May 2020 14:28:56 -0700
Subject: [PATCH 194/278] python==3.7,7

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 3aaa00505..99ecf8d31 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,7 +2,7 @@ sudo: false
 dist: xenial
 language: python
 python:
-- 3.7
+- 3.7.7
 
 install:
 - sudo apt-get update

From f3b51b3b9e22ce1d0b080b2a494d94eb17b5c79f Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 13 May 2020 15:45:34 -0700
Subject: [PATCH 195/278] Better fallback options.  Better feedback oactual
 option values.

---
 kgtk/cli/cat.py         |   8 +++
 kgtk/cli/clean_data.py  |   8 +++
 kgtk/cli/ifexists.py    |  25 +++++++---
 kgtk/cli/ifnotexists.py |  25 +++++++---
 kgtk/cli/join.py        |  26 ++++++++--
 kgtk/cli/validate.py    |  10 +++-
 kgtk/io/kgtkreader.py   | 105 ++++++++++++++++++++++++++++++++--------
 kgtk/join/ifexists.py   |  10 +++-
 kgtk/join/kgtkcat.py    |   4 ++
 kgtk/join/kgtkjoiner.py |  12 ++++-
 10 files changed, 191 insertions(+), 42 deletions(-)

diff --git a/kgtk/cli/cat.py b/kgtk/cli/cat.py
index e911c98c6..618213a17 100644
--- a/kgtk/cli/cat.py
+++ b/kgtk/cli/cat.py
@@ -53,6 +53,7 @@ def run(input_file_paths: typing.List[Path],
 
         errors_to_stdout: bool = False,
         errors_to_stderr: bool = True,
+        show_options: bool = False,
         verbose: bool = False,
         very_verbose: bool = False,
 
@@ -71,6 +72,13 @@ def run(input_file_paths: typing.List[Path],
     reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs)
     value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
 
+    # Show the final option structures for debugging and documentation.
+    if show_options:
+        print("input: %s" % " ".join((str(input_file_path) for input_file_path in input_file_paths)), file=error_file)
+        print("--output-file=%s" % str(output_file_path), file=error_file)
+        reader_options.show(out=error_file)
+        print("=======", file=error_file, flush=True)
+
     try:
         kc: KgtkCat = KgtkCat(input_file_paths=input_file_paths,
                               output_path=output_file_path,
diff --git a/kgtk/cli/clean_data.py b/kgtk/cli/clean_data.py
index bb497eea4..aa22c512b 100644
--- a/kgtk/cli/clean_data.py
+++ b/kgtk/cli/clean_data.py
@@ -42,6 +42,7 @@ def run(input_file: typing.Optional[Path],
         output_file: typing.Optional[Path],
         errors_to_stdout: bool = False,
         errors_to_stderr: bool = False,
+        show_options: bool = False,
         verbose: bool = False,
         very_verbose: bool = False,
         **kwargs # Whatever KgtkReaderOptions and KgtkValueOptions want.
@@ -56,6 +57,13 @@ def run(input_file: typing.Optional[Path],
     reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs)
     value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
 
+    # Show the final option structures for debugging and documentation.
+    if show_options:
+        print("input: %s" % (str(input_file) if input_file is not None else "-"), file=error_file)
+        print("output: %s" % (str(output_file) if output_file is not None else "-"), file=error_file)
+        reader_options.show(out=error_file)
+        print("=======", file=error_file, flush=True)
+
     if verbose:
         if input_file is not None:
             print("Cleaning data from '%s'" % str(input_file), file=error_file, flush=True)
diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
index 69718fbc7..288926305 100644
--- a/kgtk/cli/ifexists.py
+++ b/kgtk/cli/ifexists.py
@@ -50,18 +50,14 @@ def h(msg: str)->str:
 
     parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write", type=Path, default=None)
 
-    # This argument is retained for compatability with earlier versions of this command.
-    parser.add_argument(      "--error-limit", dest="error_limit",
-                              help=h("The maximum number of errors per input fule (default=%(default)s)"),
-                              default=KgtkReaderOptions.ERROR_LIMIT_DEFAULT)
-
     parser.add_argument(      "--field-separator", dest="field_separator",
                               help=h("Separator for multifield keys (default=%(default)s)")
                               , default=IfExists.FIELD_SEPARATOR_DEFAULT)
 
     KgtkReader.add_debug_arguments(parser, expert=_expert)
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input", expert=_expert)
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter", expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input", expert=_expert, defaults=False)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter", expert=_expert, defaults=False)
     KgtkValueOptions.add_arguments(parser, expert=_expert)
 
 def run(input_kgtk_file: typing.Optional[Path],
@@ -74,6 +70,7 @@ def run(input_kgtk_file: typing.Optional[Path],
 
         errors_to_stdout: bool = False,
         errors_to_stderr: bool = True,
+        show_options: bool = False,
         verbose: bool = False,
         very_verbose: bool = False,
 
@@ -91,6 +88,20 @@ def run(input_kgtk_file: typing.Optional[Path],
     filter_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="filter", fallback=True)
     value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
 
+    # Show the final option structures for debugging and documentation.
+    if show_options:
+        print("input: %s" % (str(input_kgtk_file) if input_kgtk_file is not None else "-"), file=error_file)
+        if input_keys is not None:
+            print("--input-keys=%s" % " ".join(input_keys), file=error_file)
+        print("--filter-on=%s" % (str(filter_kgtk_file) if filter_kgtk_file is not None else "-"), file=error_file)
+        if filter_keys is not None:
+            print("--filter-keys=%s" % " ".join(filter_keys), file=error_file)
+        print("--output-file=%s" % (str(output_kgtk_file) if output_kgtk_file is not None else "-"), file=error_file)
+        print("--field-separator='%s'" % str(field_separator), file=error_file)
+        input_reader_options.show(out=error_file, who="input")
+        filter_reader_options.show(out=error_file, who="filter")
+        print("=======", file=error_file, flush=True)
+
     try:
         ie: IfExists = IfExists(
             input_file_path=input_kgtk_file,
diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
index e49a481f7..dd29b6ec8 100644
--- a/kgtk/cli/ifnotexists.py
+++ b/kgtk/cli/ifnotexists.py
@@ -49,18 +49,14 @@ def h(msg: str)->str:
 
     parser.add_argument(      "--filter-keys", "--right-keys", dest="filter_keys", help="The key columns in the filter-on file.", nargs='*')
 
-    # This argument is retained for compatability with earlier versions of this command.
-    parser.add_argument(      "--error-limit", dest="error_limit",
-                              help=h("The maximum number of errors per input fule (default=%(default)s)"),
-                              default=KgtkReaderOptions.ERROR_LIMIT_DEFAULT)
-
     parser.add_argument(      "--field-separator", dest="field_separator",
                               help=h("Separator for multifield keys"),
                               default=IfExists.FIELD_SEPARATOR_DEFAULT)
 
     KgtkReader.add_debug_arguments(parser, expert=_expert)
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input", expert=_expert)
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter", expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input", expert=_expert, defaults=False)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter", expert=_expert, defaults=False)
     KgtkValueOptions.add_arguments(parser, expert=_expert)
 
 def run(input_kgtk_file: typing.Optional[Path],
@@ -73,6 +69,7 @@ def run(input_kgtk_file: typing.Optional[Path],
 
         errors_to_stdout: bool = False,
         errors_to_stderr: bool = True,
+        show_options: bool = False,
         verbose: bool = False,
         very_verbose: bool = False,
 
@@ -90,6 +87,20 @@ def run(input_kgtk_file: typing.Optional[Path],
     filter_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="filter", fallback=True)
     value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
 
+    # Show the final option structures for debugging and documentation.
+    if show_options:
+        print("input: %s" % (str(input_kgtk_file) if input_kgtk_file is not None else "-"), file=error_file)
+        if input_keys is not None:
+            print("--input-keys=%s" % " ".join(input_keys), file=error_file)
+        print("--filter-on=%s" % (str(filter_kgtk_file) if filter_kgtk_file is not None else "-"), file=error_file)
+        if filter_keys is not None:
+            print("--filter-keys=%s" % " ".join(filter_keys), file=error_file)
+        print("--output-file=%s" % (str(output_kgtk_file) if output_kgtk_file is not None else "-"), file=error_file)
+        print("--field-separator='%s'" % str(field_separator), file=error_file)
+        input_reader_options.show(out=error_file, who="input")
+        filter_reader_options.show(out=error_file, who="filter")
+        print("=======", file=error_file, flush=True)
+
     try:
         ie: IfExists = IfExists(
             input_file_path=input_kgtk_file,
diff --git a/kgtk/cli/join.py b/kgtk/cli/join.py
index 7e88b5449..3be24680b 100644
--- a/kgtk/cli/join.py
+++ b/kgtk/cli/join.py
@@ -19,9 +19,10 @@ def parser():
     return {
         'help': 'Join two KGTK files',
         'description': """Join two KGTK edge files or two KGTK node files.
+
 Join keys are extracted from one or both input files and stored in memory,
-then the data is processed in a second pass.
-stdin will not work as an input file if two passes are needed.
+then the data files are processed in a second pass.  stdin will not work as an
+input file if join keys are needed from it.
 
 The output file contains the union of the columns in the two
 input files, adjusted for predefined name aliasing.
@@ -31,6 +32,16 @@ def parser():
 Specify both to get a full outer join (equivalent to cat).
 Specify neither to get an inner join.
 
+By default, node files are joined on the id column, while edge files are joined
+on the node1 column. The label and node2 columns may be added to the edge file
+join criteria.  Alternatively, the left and right file join columns may be
+listed explicitly.
+
+To join an edge file to a node file, or to join quasi-KGTK files, use the
+following option (enable expert mode for more information):
+
+--mode=NONE
+
 Expert mode provides additional command arguments.
 """
     }
@@ -87,8 +98,8 @@ def h(msg: str)->str:
     # files, or for all files.
     KgtkReader.add_debug_arguments(parser, expert=_expert)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="left", expert=_expert)
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="right", expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="left", expert=_expert, defaults=False)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="right", expert=_expert, defaults=False)
     KgtkValueOptions.add_arguments(parser, expert=_expert)
 
 def run(left_file_path: typing.Optional[Path],
@@ -106,6 +117,7 @@ def run(left_file_path: typing.Optional[Path],
 
         errors_to_stdout: bool = False,
         errors_to_stderr: bool = True,
+        show_options: bool = False,
         verbose: bool = False,
         very_verbose: bool = False,
 
@@ -144,6 +156,12 @@ def run(left_file_path: typing.Optional[Path],
     right_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs, who="right", fallback=True)
     value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
 
+    # Show the final option structures for debugging and documentation.
+    if show_options:
+        # TODO: left_file_path, right_file_path, --join-on-label, etc.
+        left_reader_options.show(out=error_file, who="left")
+        right_reader_options.show(out=error_file, who="right")
+
     try:
         kr: KgtkJoiner = KgtkJoiner(
             left_file_path=left_file_path,
diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 9e16bd90d..299b7018b 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -40,7 +40,7 @@ def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Names
                               help="Process the only the header of the input file.", action="store_true")
 
     KgtkReader.add_debug_arguments(parser, expert=_expert)
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=True)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=_expert)
     KgtkValueOptions.add_arguments(parser, expert=True)
 
 
@@ -48,6 +48,7 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
         errors_to_stdout: bool = False,
         errors_to_stderr: bool = False,
         header_only: bool = False,
+        show_options: bool = False,
         verbose: bool = False,
         very_verbose: bool = False,
         **kwargs # Whatever KgtkReaderOptions and KgtkValueOptions want.
@@ -65,6 +66,13 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
     reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs)
     value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
 
+    # Show the final option structures for debugging and documentation.
+    if show_options:
+        print("input: %s" % " ".join((str(kgtk_file) for kgtk_file in kgtk_files)), file=error_file)
+        print("--header-only=%s" % str(header_only), file=error_file)
+        reader_options.show(out=error_file)
+        print("=======", file=error_file, flush=True)
+
     try:
         kgtk_file: typing.Optional[Path]
         for kgtk_file in kgtk_files:
diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index 23f8e6c0c..fdcd882d4 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -103,6 +103,7 @@ def add_arguments(cls,
                       mode_options: bool = False,
                       validate_by_default: bool = False,
                       expert: bool = False,
+                      defaults: bool = True,
                       who: str = ""):
 
         # This helper function makes it easy to suppress options from
@@ -114,6 +115,25 @@ def h(msg: str)->str:
             else:
                 return SUPPRESS
 
+        # This helper function decices whether or not to include defaults
+        # in argument declarations. If we plan to make arguments with
+        # prefixes and fallbacks, the fallbacks (the ones without prefixes)
+        # should get defaults value, while the prefixed arguments should
+        # not get defaults.
+        #
+        # At the present time, boolean arguments can't use fallbacks.
+        #
+        # Note: In obscure circumstances (EnumNameAction, I'm looking at you),
+        # explicitly setting "default=None" may fail, whereas omitting the
+        # "default=" phrase succeeds.
+        #
+        # TODO: continue researching these issues.
+        def d(default: typing.Any)->typing.Mapping[str, typing.Any]:
+            if defaults:
+                return {"default": default}
+            else:
+                return { }
+
         prefix1: str = "--" if len(who) == 0 else "--" + who + "-"
         prefix2: str = "" if len(who) == 0 else who + "_"
         prefix3: str = "" if len(who) == 0 else who + ": "
@@ -124,8 +144,9 @@ def h(msg: str)->str:
         fgroup.add_argument(prefix1 + "column-separator",
                             dest=prefix2 + "column_separator",
                             help=h(prefix3 + "Column separator (default=<TAB>)."), # TODO: provide the default with escapes, e.g. \t
-                            type=str, default=KgtkFormat.COLUMN_SEPARATOR)
+                            type=str, **d(default=KgtkFormat.COLUMN_SEPARATOR))
 
+        # TODO: use an Enum or add choices.
         fgroup.add_argument(prefix1 + "compression-type",
                             dest=prefix2 + "compression_type",
                             help=h(prefix3 + "Specify the compression type (default=%(default)s)."))
@@ -133,7 +154,7 @@ def h(msg: str)->str:
         fgroup.add_argument(prefix1 + "error-limit",
                             dest=prefix2 + "error_limit",
                             help=h(prefix3 + "The maximum number of errors to report before failing (default=%(default)s)"),
-                            type=int, default=cls.ERROR_LIMIT_DEFAULT)
+                            type=int, **d(default=cls.ERROR_LIMIT_DEFAULT))
 
         fgroup.add_argument(prefix1 + "gzip-in-parallel",
                             dest=prefix2 + "gzip_in_parallel",
@@ -143,13 +164,13 @@ def h(msg: str)->str:
         fgroup.add_argument(prefix1 + "gzip-queue-size",
                             dest=prefix2 + "gzip_queue_size",
                             help=h(prefix3 + "Queue size for parallel gzip (default=%(default)s)."),
-                            type=int, default=cls.GZIP_QUEUE_SIZE_DEFAULT)
+                            type=int, **d(default=cls.GZIP_QUEUE_SIZE_DEFAULT))
 
         if mode_options:
             fgroup.add_argument(prefix1 + "mode",
                                 dest=prefix2 + "mode",
                                 help=h(prefix3 + "Determine the KGTK file mode (default=%(default)s)."),
-                                type=KgtkReaderMode, action=EnumNameAction, default=KgtkReaderMode.AUTO)
+                                type=KgtkReaderMode, action=EnumNameAction, **d(KgtkReaderMode.AUTO))
             
         hgroup: _ArgumentGroup = parser.add_argument_group(h(prefix3 + "Header parsing"),
                                                            h("Options affecting " + prefix4 + "header parsing"))
@@ -162,7 +183,7 @@ def h(msg: str)->str:
         hgroup.add_argument(prefix1 + "header-error-action",
                             dest=prefix2 + "header_error_action",
                             help=h(prefix3 + "The action to take when a header error is detected.  Only ERROR or EXIT are supported (default=%(default)s)."),
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXIT)
+                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.EXIT))
 
         hgroup.add_argument(prefix1 + "skip-first-record",
                             dest=prefix2 + "skip_first_record",
@@ -172,7 +193,7 @@ def h(msg: str)->str:
         hgroup.add_argument(prefix1 + "unsafe-column-name-action",
                             dest=prefix2 + "unsafe_column_name_action",
                             help=h(prefix3 + "The action to take when a column name is unsafe (default=%(default)s)."),
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
+                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.REPORT))
 
         lgroup: _ArgumentGroup = parser.add_argument_group(h(prefix3 + "Line parsing"),
                                                            h("Options affecting " + prefix4 + "data line parsing"))
@@ -200,17 +221,17 @@ def h(msg: str)->str:
         lgroup.add_argument(prefix1 + "blank-required-field-line-action",
                             dest=prefix2 + "blank_required_field_line_action",
                             help=h(prefix3 + "The action to take when a line with a blank node1, node2, or id field (per mode) is detected (default=%(default)s)."),
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.EXCLUDE))
                                   
         lgroup.add_argument(prefix1 + "comment-line-action",
                             dest=prefix2 + "comment_line_action",
                             help=h(prefix3 + "The action to take when a comment line is detected (default=%(default)s)."),
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.EXCLUDE))
 
         lgroup.add_argument(prefix1 + "empty-line-action",
                             dest=prefix2 + "empty_line_action",
                             help=h(prefix3 + "The action to take when an empty line is detected (default=%(default)s)."),
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.EXCLUDE))
 
         lgroup.add_argument(prefix1 + "fill-short-lines",
                             dest=prefix2 + "fill_short_lines",
@@ -220,17 +241,17 @@ def h(msg: str)->str:
         lgroup.add_argument(prefix1 + "invalid-value-action",
                             dest=prefix2 + "invalid_value_action",
                             help=h(prefix3 + "The action to take when a data cell value is invalid (default=%(default)s)."),
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.REPORT)
+                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.REPORT))
 
         lgroup.add_argument(prefix1 + "long-line-action",
                             dest=prefix2 + "long_line_action",
                             help=h(prefix3 + "The action to take when a long line is detected (default=%(default)s)."),
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.EXCLUDE))
 
         lgroup.add_argument(prefix1 + "short-line-action",
                             dest=prefix2 + "short_line_action",
                             help=h(prefix3 + "The action to take when a short line is detected (default=%(default)s)."),
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.EXCLUDE))
 
         lgroup.add_argument(prefix1 + "truncate-long-lines",
                             dest=prefix2 + "truncate_long_lines",
@@ -240,7 +261,7 @@ def h(msg: str)->str:
         lgroup.add_argument(prefix1 + "whitespace-line-action",
                             dest=prefix2 + "whitespace_line_action",
                             help=h(prefix3 + "The action to take when a whitespace line is detected (default=%(default)s)."),
-                            type=ValidationAction, action=EnumNameAction, default=ValidationAction.EXCLUDE)
+                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.EXCLUDE))
     
     @classmethod
     # Build the value parsing option structure.
@@ -257,9 +278,9 @@ def from_dict(cls,
         # TODO: Figure out how to type check this method.
         def lookup(name: str, default):
             prefixed_name = prefix + name
-            if prefixed_name in d:
+            if prefixed_name in d and d[prefixed_name] is not None:
                 return d[prefixed_name]
-            elif fallback and name in d:
+            elif fallback and name in d and d[name] is not None:
                 return d[name]
             else:
                 return default
@@ -304,6 +325,34 @@ def from_args(cls,
     )->'KgtkReaderOptions':
         return cls.from_dict(vars(args), who=who, mode=mode, fallback=fallback)
 
+    def show(self, who: str="", out: typing.TextIO=sys.stderr):
+        prefix: str = "--" if len(who) == 0 else "--" + who + "-"
+        print("%smode=%s" % (prefix, self.mode.name), file=out)
+        print("%scolumn-separator='%s'" % (prefix, self.column_separator), file=out)
+        if self.force_column_names is not None:
+            print("%sforce_column_names=%s" % (prefix, " ".join(self.force_column_names)), file=out)
+        print("%sskip_first_record=%s" % (prefix, str(self.skip_first_record)), file=out)
+        print("%serror-limit=%s" % (prefix, str(self.error_limit)), file=out)
+        print("%srepair-and-validate-lines=%s" % (prefix, str(self.repair_and_validate_lines)), file=out)
+        print("%srepair-and-validate-values=%s" % (prefix, str(self.repair_and_validate_values)), file=out)
+        print("%sempty-line-action=%s" % (prefix, self.empty_line_action.name), file=out)
+        print("%scomment-line-action=%s" % (prefix, self.comment_line_action.name), file=out)
+        print("%swhitespace-line-action=%s" % (prefix, self.whitespace_line_action.name), file=out)
+        print("%sblank-required-field-line-action=%s" % (prefix, self.blank_required_field_line_action.name), file=out)
+        print("%sshort-line-action=%s" % (prefix, self.short_line_action.name), file=out)
+        print("%slong-line-action=%s" % (prefix, self.long_line_action.name), file=out)
+        print("%sheader-error-action=%s" % (prefix, self.header_error_action.name), file=out)
+        print("%sunsafe-column-name-action=%s" % (prefix, self.unsafe_column_name_action.name), file=out)
+        print("%sinvalid-value-action=%s" % (prefix, self.invalid_value_action.name), file=out)
+        print("%sfill-short-lines=%s" % (prefix, str(self.fill_short_lines)), file=out)
+        print("%struncate-long-lines=%s" % (prefix, str(self.truncate_long_lines)), file=out)
+        if self.compression_type is not None:
+            print("%scompression-type=%s" % (prefix, str(self.compression_type)), file=out)
+        print("%sgzip-in-parallel=%s" % (prefix, str(self.gzip_in_parallel)), file=out)
+        print("%sgzip-queue-size=%s" % (prefix, str(self.gzip_queue_size)), file=out)
+              
+        
+
 DEFAULT_KGTK_READER_OPTIONS: KgtkReaderOptions = KgtkReaderOptions()
 
 
@@ -959,6 +1008,12 @@ def h(msg: str)->str:
 
         # Avoid the argparse bug that prevents these two arguments from having
         # their help messages suppressed directly.
+        #
+        # TODO: Is there a better fix?
+        #
+        # TODO: replace --errors-to-stdout and --errors-to-stderr with
+        # --errors-to=stdout and --errors-to=stderr, using either an enum
+        # or choices.  That will avoid the argparse bug, too.
         if expert:
             errors_to = egroup.add_mutually_exclusive_group()
             errors_to.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
@@ -968,17 +1023,19 @@ def h(msg: str)->str:
                                          help="Send errors to stderr instead of stdout",
                                          action="store_true")
         else:
-            egroup.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
-                                      help=h("Send errors to stdout instead of stderr"),
-                                      action="store_true")
             egroup.add_argument(      "--errors-to-stderr", dest="errors_to_stderr",
                                       help=h("Send errors to stderr instead of stdout"),
                                       action="store_true")
+            egroup.add_argument(      "--errors-to-stdout", dest="errors_to_stdout",
+                                      help=h("Send errors to stdout instead of stderr"),
+                                      action="store_true")
+
+        egroup.add_argument(      "--show-options", dest="show_options", help="Print the options selected (default=%(default)s).", action='store_true')
 
-        egroup.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true')
+        egroup.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages (default=%(default)s).", action='store_true')
 
         egroup.add_argument(      "--very-verbose", dest="very_verbose",
-                                  help=h("Print additional progress messages."),
+                                  help=h("Print additional progress messages (default=%(default)s)."),
                                   action='store_true')
         
 def main():
@@ -1003,7 +1060,7 @@ def main():
 
     KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=True)
     KgtkValueOptions.add_arguments(parser, expert=True)
-    args = parser.parse_args()
+    args: Namespace = parser.parse_args()
 
     error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
 
@@ -1011,6 +1068,12 @@ def main():
     reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
+    if args.show_options:
+        print("--test=%s" % str(args.test), file=error_file)
+        print("--test-validate=%s" % str(args.test_validate), file=error_file)
+        reader_options.show(out=error_file)
+        print("=======", file=error_file, flush=True)
+
     kr: KgtkReader = KgtkReader.open(args.kgtk_file,
                                      error_file = error_file,
                                      options=reader_options,
diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index 26f1f1965..48e7d2edc 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -71,7 +71,7 @@ def get_primary_key_column(self, kr: KgtkReader, who: str)->typing.List[int]:
                 raise ValueError("The node1 column is missing from the %s node file." % who)
             return [ kr.node1_column_idx ]
         else:
-            raise ValueError("The %s file is neither edge nore node." % who)
+            raise ValueError("The %s file is neither edge nor node." % who)
 
     def get_edge_key_columns(self, kr: KgtkReader, who: str)-> typing.List[int]:
         if not kr.is_edge_file:
@@ -97,6 +97,9 @@ def get_key_columns(self, supplied_keys: typing.Optional[typing.List[str]], kr:
         if supplied_keys is not None and len(supplied_keys) > 0:
             return self.get_supplied_key_columns(supplied_keys, kr, who)
 
+        if not (kr.is_node_file or kr.is_edge_file):
+            raise ValueError("The %s file is a quasi-KGTK file.  Please supply its keys." % who)
+
         if kr.is_node_file or other_kr.is_node_file:
             return self.get_primary_key_column(kr, who)
 
@@ -230,6 +233,11 @@ def main():
     filter_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, who="filter")
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
+   # Show the final option structures for debugging and documentation.                                                                                             
+    if show_options:
+        input_reader_options.show(out=error_file, who="input")
+        filter_reader_options.show(out=error_file, who="filter")
+
     ie: IfExists = IfExists(
         input_file_path=args.input_file_path,
         input_keys=args.input_keys,
diff --git a/kgtk/join/kgtkcat.py b/kgtk/join/kgtkcat.py
index cb0d6ba8d..944c4cabe 100644
--- a/kgtk/join/kgtkcat.py
+++ b/kgtk/join/kgtkcat.py
@@ -171,6 +171,10 @@ def main():
     reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
+   # Show the final option structures for debugging and documentation.                                                                                             
+    if show_options:
+        reader_options.show(out=error_file)
+
     kc: KgtkCat = KgtkCat(input_file_paths=args.input_file_paths,
                           output_path=args.output_file_path,
                           reader_options=reader_options,
diff --git a/kgtk/join/kgtkjoiner.py b/kgtk/join/kgtkjoiner.py
index a1662ae0e..e7d7574da 100644
--- a/kgtk/join/kgtkjoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -131,7 +131,7 @@ def build_join_idx_list(self, kr: KgtkReader, who: str, join_columns: typing.Opt
                 print("Joining on id (index %s in the %s input file)" % (join_idx, who), file=self.error_file, flush=True)
             join_idx_list.append(join_idx)
         else:
-            raise ValueError("Unknown file type in build_join_idx_list(...)")
+            raise ValueError("Quasi-KGTK files require an explicit list of join columns")
 
         # join_on_label and join_on_node2 may be specified
         if self.join_on_label or self.join_on_node2:
@@ -224,6 +224,11 @@ def ok_to_join(self, left_kr: KgtkReader, right_kr: KgtkReader)->bool:
                 print("Both input files are node files.", file=self.error_file, flush=True)
             return True
 
+        elif (not (left_kr.is_node_file or left_kr.is_edge_file)) or (not(right_kr.is_edge_file or right_kr.is_node_file)):
+            if self.verbose:
+               print("One or both input files are quasi-KGTK files.", file=self.error_file, flush=True)
+            return True
+
         else:
             print("Cannot join edge and node files.", file=self.error_file, flush=True)
             return False
@@ -375,6 +380,11 @@ def main():
     right_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, who=KgtkJoiner.RIGHT)
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
+   # Show the final option structures for debugging and documentation.                                                                                             
+    if args.show_options:
+        left_reader_options.show(out=error_file, who="left")
+        right_reader_options.show(out=error_file, who="right")
+
     ej: KgtkJoiner = KgtkJoiner(left_file_path=args.left_file_path,
                                 right_file_path=args.right_file_path,
                                 output_path=args.output_file_path,

From cfecb9768467c2ea5f5f4e86e1bf341f4b69c74f Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 13 May 2020 16:29:46 -0700
Subject: [PATCH 196/278] Better feedback for separation characters.

---
 kgtk/cli/ifexists.py    |  2 +-
 kgtk/cli/ifnotexists.py |  2 +-
 kgtk/cli/join.py        | 15 +++++++++++++++
 kgtk/io/kgtkreader.py   |  2 +-
 4 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
index 288926305..ec24e8104 100644
--- a/kgtk/cli/ifexists.py
+++ b/kgtk/cli/ifexists.py
@@ -97,7 +97,7 @@ def run(input_kgtk_file: typing.Optional[Path],
         if filter_keys is not None:
             print("--filter-keys=%s" % " ".join(filter_keys), file=error_file)
         print("--output-file=%s" % (str(output_kgtk_file) if output_kgtk_file is not None else "-"), file=error_file)
-        print("--field-separator='%s'" % str(field_separator), file=error_file)
+        print("--field-separator=%s" % repr(field_separator), file=error_file)
         input_reader_options.show(out=error_file, who="input")
         filter_reader_options.show(out=error_file, who="filter")
         print("=======", file=error_file, flush=True)
diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
index dd29b6ec8..ba0d31242 100644
--- a/kgtk/cli/ifnotexists.py
+++ b/kgtk/cli/ifnotexists.py
@@ -96,7 +96,7 @@ def run(input_kgtk_file: typing.Optional[Path],
         if filter_keys is not None:
             print("--filter-keys=%s" % " ".join(filter_keys), file=error_file)
         print("--output-file=%s" % (str(output_kgtk_file) if output_kgtk_file is not None else "-"), file=error_file)
-        print("--field-separator='%s'" % str(field_separator), file=error_file)
+        print("--field-separator='%s'" % repr(field_separator), file=error_file)
         input_reader_options.show(out=error_file, who="input")
         filter_reader_options.show(out=error_file, who="filter")
         print("=======", file=error_file, flush=True)
diff --git a/kgtk/cli/join.py b/kgtk/cli/join.py
index 3be24680b..aa0e8c939 100644
--- a/kgtk/cli/join.py
+++ b/kgtk/cli/join.py
@@ -159,6 +159,21 @@ def run(left_file_path: typing.Optional[Path],
     # Show the final option structures for debugging and documentation.
     if show_options:
         # TODO: left_file_path, right_file_path, --join-on-label, etc.
+        print("left: %s" % (str(left_file_path) if left_file_path is not None else "-"), file=error_file)
+        print("right: %s" % (str(left_file_path) if left_file_path is not None else "-"), file=error_file)
+        print("--output-file=%s" % (str(output_file_path) if output_file_path is not None else "-"), file=error_file)
+        print("--left-join=%s" % str(left_join), file=error_file)
+        print("--right-join=%s" % str(right_join), file=error_file)
+        print("--join-on-label=%s" % str(join_on_label), file=error_file)
+        print("--join-on-node2=%s" % str(join_on_node2), file=error_file)
+        if left_join_columns is not None:
+            print("--left-join-columns=%s" % " ".join(left_join_columns), file=error_file)
+        if right_join_columns is not None:
+            print("--right-join-columns=%s" % " ".join(right_join_columns), file=error_file)
+        if prefix is not None:
+            print("--prefix=%s" % str(prefix), file=error_file)
+        print("--field-separator=%s" % repr(field_separator), file=error_file)
+              
         left_reader_options.show(out=error_file, who="left")
         right_reader_options.show(out=error_file, who="right")
 
diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index fdcd882d4..fe6b9fea0 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -328,7 +328,7 @@ def from_args(cls,
     def show(self, who: str="", out: typing.TextIO=sys.stderr):
         prefix: str = "--" if len(who) == 0 else "--" + who + "-"
         print("%smode=%s" % (prefix, self.mode.name), file=out)
-        print("%scolumn-separator='%s'" % (prefix, self.column_separator), file=out)
+        print("%scolumn-separator=%s" % (prefix, repr(self.column_separator)), file=out)
         if self.force_column_names is not None:
             print("%sforce_column_names=%s" % (prefix, " ".join(self.force_column_names)), file=out)
         print("%sskip_first_record=%s" % (prefix, str(self.skip_first_record)), file=out)

From cfe8ab172e5ef2836974429e7b7f15f1eea4847c Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 13 May 2020 16:49:02 -0700
Subject: [PATCH 197/278] Show value options when requested.

---
 kgtk/cli/clean_data.py         |  1 +
 kgtk/cli/ifexists.py           |  1 +
 kgtk/cli/ifnotexists.py        |  1 +
 kgtk/cli/join.py               |  1 +
 kgtk/cli/validate.py           |  1 +
 kgtk/io/edgereader.py          |  5 +++++
 kgtk/io/kgtkreader.py          |  1 +
 kgtk/io/nodereader.py          |  5 +++++
 kgtk/join/ifexists.py          |  1 +
 kgtk/join/kgtkcat.py           |  1 +
 kgtk/join/kgtkjoiner.py        |  1 +
 kgtk/value/kgtkvalueoptions.py | 33 ++++++++++++++++++++++++---------
 12 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/kgtk/cli/clean_data.py b/kgtk/cli/clean_data.py
index aa22c512b..51ac44f66 100644
--- a/kgtk/cli/clean_data.py
+++ b/kgtk/cli/clean_data.py
@@ -62,6 +62,7 @@ def run(input_file: typing.Optional[Path],
         print("input: %s" % (str(input_file) if input_file is not None else "-"), file=error_file)
         print("output: %s" % (str(output_file) if output_file is not None else "-"), file=error_file)
         reader_options.show(out=error_file)
+        value_options.show(out=error_file)
         print("=======", file=error_file, flush=True)
 
     if verbose:
diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
index ec24e8104..fcb4e42c6 100644
--- a/kgtk/cli/ifexists.py
+++ b/kgtk/cli/ifexists.py
@@ -100,6 +100,7 @@ def run(input_kgtk_file: typing.Optional[Path],
         print("--field-separator=%s" % repr(field_separator), file=error_file)
         input_reader_options.show(out=error_file, who="input")
         filter_reader_options.show(out=error_file, who="filter")
+        value_options.show(out=error_file)
         print("=======", file=error_file, flush=True)
 
     try:
diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
index ba0d31242..1ee8f6534 100644
--- a/kgtk/cli/ifnotexists.py
+++ b/kgtk/cli/ifnotexists.py
@@ -99,6 +99,7 @@ def run(input_kgtk_file: typing.Optional[Path],
         print("--field-separator='%s'" % repr(field_separator), file=error_file)
         input_reader_options.show(out=error_file, who="input")
         filter_reader_options.show(out=error_file, who="filter")
+        value_options.show(out=error_file)
         print("=======", file=error_file, flush=True)
 
     try:
diff --git a/kgtk/cli/join.py b/kgtk/cli/join.py
index aa0e8c939..0ef4a57e9 100644
--- a/kgtk/cli/join.py
+++ b/kgtk/cli/join.py
@@ -176,6 +176,7 @@ def run(left_file_path: typing.Optional[Path],
               
         left_reader_options.show(out=error_file, who="left")
         right_reader_options.show(out=error_file, who="right")
+        value_options.show(out=error_file)
 
     try:
         kr: KgtkJoiner = KgtkJoiner(
diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 299b7018b..0282fa144 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -71,6 +71,7 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
         print("input: %s" % " ".join((str(kgtk_file) for kgtk_file in kgtk_files)), file=error_file)
         print("--header-only=%s" % str(header_only), file=error_file)
         reader_options.show(out=error_file)
+        value_options.show(out=error_file)
         print("=======", file=error_file, flush=True)
 
     try:
diff --git a/kgtk/io/edgereader.py b/kgtk/io/edgereader.py
index 3225c4579..bef4f29ec 100644
--- a/kgtk/io/edgereader.py
+++ b/kgtk/io/edgereader.py
@@ -89,6 +89,11 @@ def main():
     reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, mode=KgtkReaderMode.EDGE)
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
+    if args.show_options:
+        reader_options.show(out=error_file)
+        value_options.show(out=error_file)
+        print("=======", file=error_file, flush=True)
+
     # Force the edge mode:
     er: EdgeReader = EdgeReader.open_edge_file(args.kgtk_file,
                                                error_file=error_file,
diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index fe6b9fea0..afdb45c51 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -1072,6 +1072,7 @@ def main():
         print("--test=%s" % str(args.test), file=error_file)
         print("--test-validate=%s" % str(args.test_validate), file=error_file)
         reader_options.show(out=error_file)
+        value_options.show(out=error_file)
         print("=======", file=error_file, flush=True)
 
     kr: KgtkReader = KgtkReader.open(args.kgtk_file,
diff --git a/kgtk/io/nodereader.py b/kgtk/io/nodereader.py
index bf74beb85..ce389aada 100644
--- a/kgtk/io/nodereader.py
+++ b/kgtk/io/nodereader.py
@@ -79,6 +79,11 @@ def main():
     reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, mode=KgtkReaderMode.NODE)
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
+    if args.show_options:
+        reader_options.show(out=error_file)
+        value_options.show(out=error_file)
+        print("=======", file=error_file, flush=True)
+
     nr: NodeReader = NodeReader.open_node_file(args.kgtk_file,
                                                error_file=error_file,
                                                options=reader_options,
diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index 48e7d2edc..509639ffc 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -237,6 +237,7 @@ def main():
     if show_options:
         input_reader_options.show(out=error_file, who="input")
         filter_reader_options.show(out=error_file, who="filter")
+        value_options.show(out=error_file)
 
     ie: IfExists = IfExists(
         input_file_path=args.input_file_path,
diff --git a/kgtk/join/kgtkcat.py b/kgtk/join/kgtkcat.py
index 944c4cabe..4327b78d1 100644
--- a/kgtk/join/kgtkcat.py
+++ b/kgtk/join/kgtkcat.py
@@ -174,6 +174,7 @@ def main():
    # Show the final option structures for debugging and documentation.                                                                                             
     if show_options:
         reader_options.show(out=error_file)
+        value_options.show(out=error_file)
 
     kc: KgtkCat = KgtkCat(input_file_paths=args.input_file_paths,
                           output_path=args.output_file_path,
diff --git a/kgtk/join/kgtkjoiner.py b/kgtk/join/kgtkjoiner.py
index e7d7574da..2f32d1281 100644
--- a/kgtk/join/kgtkjoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -384,6 +384,7 @@ def main():
     if args.show_options:
         left_reader_options.show(out=error_file, who="left")
         right_reader_options.show(out=error_file, who="right")
+        value_options.show(out=error_file)
 
     ej: KgtkJoiner = KgtkJoiner(left_file_path=args.left_file_path,
                                 right_file_path=args.right_file_path,
diff --git a/kgtk/value/kgtkvalueoptions.py b/kgtk/value/kgtkvalueoptions.py
index 5cb8e7526..704048954 100644
--- a/kgtk/value/kgtkvalueoptions.py
+++ b/kgtk/value/kgtkvalueoptions.py
@@ -4,6 +4,7 @@
 
 from argparse import ArgumentParser, Namespace, SUPPRESS
 import attr
+import sys
 import typing
 
 @attr.s(slots=True, frozen=True)
@@ -188,6 +189,10 @@ def from_dict(cls, d: dict, who: str = "")->'KgtkValueOptions':
                    additional_language_codes=d.get(prefix + "additional_language_codes", None),
                    minimum_valid_year=d.get(prefix + "minimum_valid_year", cls.MINIMUM_VALID_YEAR),
                    maximum_valid_year=d.get(prefix + "maximum_valid_year", cls.MAXIMUM_VALID_YEAR),
+                   minimum_valid_lat=d.get(prefix + "minimum_valid_lat", cls.MINIMUM_VALID_LAT),
+                   maximum_valid_lat=d.get(prefix + "maximum_valid_lat", cls.MAXIMUM_VALID_LAT),
+                   minimum_valid_lon=d.get(prefix + "minimum_valid_lon", cls.MINIMUM_VALID_LON),
+                   maximum_valid_lon=d.get(prefix + "maximum_valid_lon", cls.MAXIMUM_VALID_LON),
                    escape_list_separators=d.get(prefix + "escape_list_separators", False))
 
     @classmethod
@@ -195,6 +200,23 @@ def from_dict(cls, d: dict, who: str = "")->'KgtkValueOptions':
     def from_args(cls, args: Namespace, who: str = "")->'KgtkValueOptions':
         return cls.from_dict(vars(args), who=who)
 
+    def show(self, who: str="", out: typing.TextIO=sys.stderr):
+        prefix: str = "--" if len(who) == 0 else "--" + who + "-"
+        print("%sallow-month-or-day-zero=%s" % (prefix, str(self.allow_month_or_day_zero)), file=out)
+        print("%srepair-month-or-day-zero=%s" % (prefix, str(self.repair_month_or_day_zero)), file=out)
+        print("%sallow-language-suffixes=%s" % (prefix, str(self.allow_language_suffixes)), file=out)
+        print("%sallow-lax-strings=%s" % (prefix, str(self.allow_lax_strings)), file=out)
+        print("%sallow-lax-lq-strings=%s" % (prefix, str(self.allow_lax_lq_strings)), file=out)
+        if self.additional_language_codes is not None:
+            print("%sadditional-language-codes=%s" % (prefix, " ".join(self.additional_language_codes)), file=out)
+        print("%sminimum-valid-year=%d" % (prefix, self.minimum_valid_year), file=out)
+        print("%smaximum-valid-year=%d" % (prefix, self.maximum_valid_year), file=out)
+        print("%sminimum-valid-lat=%f" % (prefix, self.minimum_valid_lat), file=out)
+        print("%smaximum-valid-lat=%f" % (prefix, self.maximum_valid_lat), file=out)
+        print("%sminimum-valid-lon=%f" % (prefix, self.minimum_valid_lon), file=out)
+        print("%smaximum-valid-lon=%f" % (prefix, self.maximum_valid_lon), file=out)
+        print("%sescape-list-separators=%s" % (prefix, str(self.escape_list_separators)), file=out)
+
 DEFAULT_KGTK_VALUE_OPTIONS: KgtkValueOptions = KgtkValueOptions()
 
 def main():
@@ -210,15 +232,8 @@ def main():
     # Build the value parsing option structure.
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
-    print("allow_month_or_day_zero: %s" % str(value_options.allow_month_or_day_zero))
-    print("allow_lax_strings: %s" % str(value_options.allow_lax_strings))
-    print("allow_lax_lq_strings: %s" % str(value_options.allow_lax_lq_strings))
-    print("allow_language_suffixes: %s" % str(value_options.allow_language_suffixes))
-    if value_options.additional_language_codes is None:
-        print("additional_language_codes: None")
-    else:
-        print("additional_language_codes: [ %s ]" % ", ".join(value_options.additional_language_codes))
-    
+    value_options.show()
+
     # Test prefixed value option processing.
     left_value_options: KgtkValueOptions = KgtkValueOptions.from_args(args, who="left")
     print("left_allow_month_or_day_zero: %s" % str(left_value_options.allow_month_or_day_zero))

From ed5d0a155e89bac8ac95fa4448a69339ab2ed771 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Wed, 13 May 2020 17:23:28 -0700
Subject: [PATCH 198/278] fixed a value URI parsing error for Globecoordinate

---
 kgtk/triple_generator.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kgtk/triple_generator.py b/kgtk/triple_generator.py
index c79789d3b..2b6edd5f0 100644
--- a/kgtk/triple_generator.py
+++ b/kgtk/triple_generator.py
@@ -270,9 +270,10 @@ def generate_normal_triple(
 
         elif edge_type == GlobeCoordinate:
             latitude, longitude = node2[1:].split("/")
+            latitude = float(latitude)
+            longitude = float(longitude)
             object = GlobeCoordinate(
-                latitude, longitude, 0.0001, globe=StringValue("Earth")
-            )
+                latitude, longitude, 0.0001, globe=Item("Q2")) # earth
 
         elif edge_type == QuantityValue:
             # +70[+60,+80]Q743895

From 384bd9820fb31bd70c1e42cd429c47c58509e37b Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 13 May 2020 17:57:25 -0700
Subject: [PATCH 199/278] Use composable optional bools.

---
 kgtk/cli/ifexists.py           |  10 ++--
 kgtk/cli/ifnotexists.py        |  10 ++--
 kgtk/cli/join.py               |  11 ++--
 kgtk/cli/validate.py           |   4 +-
 kgtk/io/kgtkreader.py          |  33 ++++-------
 kgtk/join/ifexists.py          |  13 +++--
 kgtk/join/kgtkjoiner.py        |  19 ++++--
 kgtk/utils/argparsehelpers.py  |  30 ++++++++++
 kgtk/value/kgtkvalueoptions.py | 103 +++++++++++++++------------------
 9 files changed, 134 insertions(+), 99 deletions(-)
 create mode 100644 kgtk/utils/argparsehelpers.py

diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
index fcb4e42c6..4b0d055ad 100644
--- a/kgtk/cli/ifexists.py
+++ b/kgtk/cli/ifexists.py
@@ -42,13 +42,15 @@ def h(msg: str)->str:
 
     parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
 
-    parser.add_argument(      "--input-keys", "--left-keys", dest="input_keys", help="The key columns in the file being filtered.", nargs='*')
+    parser.add_argument(      "--input-keys", "--left-keys", dest="input_keys",
+                              help="The key columns in the file being filtered (default=None).", nargs='*')
 
-    parser.add_argument(      "--filter-on", dest="filter_kgtk_file", help="The KGTK file to filter against.", type=Path, required=True)
+    parser.add_argument(      "--filter-on", dest="filter_kgtk_file", help="The KGTK file to filter against (required).", type=Path, required=True)
 
-    parser.add_argument(      "--filter-keys", "--right-keys", dest="filter_keys", help="The key columns in the filter-on file.", nargs='*')
+    parser.add_argument(      "--filter-keys", "--right-keys", dest="filter_keys",
+                              help="The key columns in the filter-on file (default=None).", nargs='*')
 
-    parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write", type=Path, default=None)
+    parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write (required).", type=Path, default=None)
 
     parser.add_argument(      "--field-separator", dest="field_separator",
                               help=h("Separator for multifield keys (default=%(default)s)")
diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
index 1ee8f6534..854ef86bd 100644
--- a/kgtk/cli/ifnotexists.py
+++ b/kgtk/cli/ifnotexists.py
@@ -41,13 +41,15 @@ def h(msg: str)->str:
 
     parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
 
-    parser.add_argument(      "--filter-on", dest="_filter_kgtk_file", help="The KGTK file to filter against.", type=Path, required=True)
+    parser.add_argument(      "--input-keys", "--left-keys", dest="input_keys",
+                              help="The key columns in the file being filtered (default=None).", nargs='*')
 
-    parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write", type=Path, default=None)
+    parser.add_argument(      "--filter-on", dest="_filter_kgtk_file", help="The KGTK file to filter against (required).", type=Path, required=True)
 
-    parser.add_argument(      "--input-keys", "--left-keys", dest="input_keys", help="The key columns in the file being filtered.", nargs='*')
+    parser.add_argument(      "--filter-keys", "--right-keys", dest="filter_keys",
+                              help="The key columns in the filter-on file (default=None).", nargs='*')
 
-    parser.add_argument(      "--filter-keys", "--right-keys", dest="filter_keys", help="The key columns in the filter-on file.", nargs='*')
+    parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write (required),", type=Path, default=None)
 
     parser.add_argument(      "--field-separator", dest="field_separator",
                               help=h("Separator for multifield keys"),
diff --git a/kgtk/cli/join.py b/kgtk/cli/join.py
index 0ef4a57e9..8fbed6207 100644
--- a/kgtk/cli/join.py
+++ b/kgtk/cli/join.py
@@ -13,6 +13,7 @@
 from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
 from kgtk.join.kgtkjoiner import KgtkJoiner
+from kgtk.utils.argparsehelpers import optional_bool
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 def parser():
@@ -71,15 +72,16 @@ def h(msg: str)->str:
 
     parser.add_argument(      "--join-on-label", dest="join_on_label",
                               help="If both input files are edge files, include the label column in the join (default=%(default)s).",
-                              action='store_true')
+                              type=optional_bool, nargs='?', const=True, default=False)
 
     parser.add_argument(      "--join-on-node2", dest="join_on_node2",
                               help="If both input files are edge files, include the node2 column in the join (default=%(default)s).",
-                              action='store_true')
+                              type=optional_bool, nargs='?', const=True, default=False)
     
     parser.add_argument(      "--left-file-join-columns", dest="left_join_columns", help="Left file join columns (default=None).", nargs='+')
 
-    parser.add_argument(      "--left-join", dest="left_join", help="Perform a left outer join (default=%(default)s).", action='store_true')
+    parser.add_argument(      "--left-join", dest="left_join", help="Perform a left outer join (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
 
     parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-")
 
@@ -88,7 +90,8 @@ def h(msg: str)->str:
     
     parser.add_argument(      "--right-file-join-columns", dest="right_join_columns", help="Right file join columns (default=None).", nargs='+')
     
-    parser.add_argument(      "--right-join", dest="right_join", help="Perform a right outer join (default=%(default)s).", action='store_true')
+    parser.add_argument(      "--right-join", dest="right_join", help="Perform a right outer join (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
 
     parser.add_argument(      "--field-separator", dest="field_separator",
                               help=h("Separator for multifield keys (default=%(default)s)")
diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 0282fa144..7d88e2fa2 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -18,6 +18,7 @@
 
 from kgtk.cli_argparse import KGTKArgumentParser
 from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
+from kgtk.utils.argparsehelpers import optional_bool
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 def parser():
@@ -37,7 +38,8 @@ def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Names
     parser.add_argument(      "kgtk_files", nargs="*", help="The KGTK file(s) to validate. May be omitted or '-' for stdin.", type=Path)
 
     parser.add_argument(      "--header-only", dest="header_only",
-                              help="Process the only the header of the input file.", action="store_true")
+                              help="Process the only the header of the input file.",
+                              type=optional_bool, nargs='?', const=True, default=False)
 
     KgtkReader.add_debug_arguments(parser, expert=_expert)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=_expert)
diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index afdb45c51..9de0bfc8e 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -30,6 +30,7 @@
 
 from kgtk.kgtkformat import KgtkFormat
 from kgtk.io.kgtkbase import KgtkBase
+from kgtk.utils.argparsehelpers import optional_bool
 from kgtk.utils.closableiter import ClosableIter, ClosableIterTextIOWrapper
 from kgtk.utils.enumnameaction import EnumNameAction
 from kgtk.utils.gzipprocess import GunzipProcess
@@ -104,7 +105,8 @@ def add_arguments(cls,
                       validate_by_default: bool = False,
                       expert: bool = False,
                       defaults: bool = True,
-                      who: str = ""):
+                      who: str = "",
+    ):
 
         # This helper function makes it easy to suppress options from
         # The help message.  The options are still there, and initialize
@@ -121,8 +123,6 @@ def h(msg: str)->str:
         # should get defaults value, while the prefixed arguments should
         # not get defaults.
         #
-        # At the present time, boolean arguments can't use fallbacks.
-        #
         # Note: In obscure circumstances (EnumNameAction, I'm looking at you),
         # explicitly setting "default=None" may fail, whereas omitting the
         # "default=" phrase succeeds.
@@ -159,7 +159,7 @@ def d(default: typing.Any)->typing.Mapping[str, typing.Any]:
         fgroup.add_argument(prefix1 + "gzip-in-parallel",
                             dest=prefix2 + "gzip_in_parallel",
                             help=h(prefix3 + "Execute gzip in parallel (default=%(default)s)."),
-                            action='store_true')
+                            type=optional_bool, nargs='?', const=True, **d(default=False))
 
         fgroup.add_argument(prefix1 + "gzip-queue-size",
                             dest=prefix2 + "gzip_queue_size",
@@ -188,7 +188,7 @@ def d(default: typing.Any)->typing.Mapping[str, typing.Any]:
         hgroup.add_argument(prefix1 + "skip-first-record",
                             dest=prefix2 + "skip_first_record",
                             help=h(prefix3 + "Skip the first record when forcing column names (default=%(default)s)."),
-                            action='store_true')
+                            type=optional_bool, nargs='?', const=True, **d(default=False))
 
         hgroup.add_argument(prefix1 + "unsafe-column-name-action",
                             dest=prefix2 + "unsafe_column_name_action",
@@ -201,22 +201,12 @@ def d(default: typing.Any)->typing.Mapping[str, typing.Any]:
         lgroup.add_argument(prefix1 + "repair-and-validate-lines",
                             dest=prefix2 + "repair_and_validate_lines",
                             help=h(prefix3 + "Repair and validate lines (default=%(default)s)."),
-                            action='store_true', default=validate_by_default)
-
-        lgroup.add_argument(prefix1 + "do-not-repair-and-validate-lines",
-                            dest=prefix2 + "repair_and_validate_lines",
-                            help=h(prefix3 + "Do not repair and validate lines."),
-                            action='store_false')
+                            type=optional_bool, nargs='?', const=True, **d(default=validate_by_default))
 
         lgroup.add_argument(prefix1 + "repair-and-validate-values",
                             dest=prefix2 + "repair_and_validate_values",
                             help=h(prefix3 + "Repair and validate values (default=%(default)s)."),
-                            action='store_true', default=validate_by_default)
-
-        lgroup.add_argument(prefix1 + "do-not-repair-and-validate-values",
-                            dest=prefix2 + "repair-and-validate_values",
-                            help=h(prefix3 + "Do not repair and validate values."),
-                            action='store_false')
+                            type=optional_bool, nargs='?', const=True, **d(default=validate_by_default))
 
         lgroup.add_argument(prefix1 + "blank-required-field-line-action",
                             dest=prefix2 + "blank_required_field_line_action",
@@ -236,7 +226,7 @@ def d(default: typing.Any)->typing.Mapping[str, typing.Any]:
         lgroup.add_argument(prefix1 + "fill-short-lines",
                             dest=prefix2 + "fill_short_lines",
                             help=h(prefix3 + "Fill missing trailing columns in short lines with empty values (default=%(default)s)."),
-                            action='store_true')
+                            type=optional_bool, nargs='?', const=True, **d(default=False))
 
         lgroup.add_argument(prefix1 + "invalid-value-action",
                             dest=prefix2 + "invalid_value_action",
@@ -256,7 +246,7 @@ def d(default: typing.Any)->typing.Mapping[str, typing.Any]:
         lgroup.add_argument(prefix1 + "truncate-long-lines",
                             dest=prefix2 + "truncate_long_lines",
                             help=h(prefix3 + "Remove excess trailing columns in long lines (default=%(default)s)."),
-                            action='store_true')
+                            type=optional_bool, nargs='?', const=True, **d(default=False))
 
         lgroup.add_argument(prefix1 + "whitespace-line-action",
                             dest=prefix2 + "whitespace_line_action",
@@ -1050,13 +1040,14 @@ def main():
     parser = ArgumentParser()
     parser.add_argument(dest="kgtk_file", help="The KGTK file to read", type=Path, nargs="?")
     KgtkReader.add_debug_arguments(parser, expert=True)
-    parser.add_argument(       "--test", dest="test_method", help="The test to perform",
+    parser.add_argument(       "--test", dest="test_method", help="The test to perform (default=%(default)s).",
                                choices=["rows", "concise-rows",
                                         "kgtk-values", "concise-kgtk-values",
                                         "dicts", "concise-dicts",
                                         "kgtk-value-dicts", "concise-kgtk-value-dicts"],
                                default="rows")
-    parser.add_argument(       "--test-validate", dest="test_validate", help="Validate KgtkValue objects in test.", action='store_true')
+    parser.add_argument(       "--test-validate", dest="test_validate", help="Validate KgtkValue objects in test (default=%(default)s).",
+                               type=optional_bool, nargs='?', const=True, default=False)
 
     KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=True)
     KgtkValueOptions.add_arguments(parser, expert=True)
diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index 509639ffc..d25dd6c7b 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -27,6 +27,7 @@
 from kgtk.kgtkformat import KgtkFormat
 from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
+from kgtk.utils.argparsehelpers import optional_bool
 from kgtk.utils.enumnameaction import EnumNameAction
 from kgtk.utils.validationaction import ValidationAction
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions
@@ -208,16 +209,18 @@ def main():
 
     parser.add_argument(dest="input_file_path", help="The KGTK file with the input data", type=Path, nargs="?")
 
-    parser.add_argument(      "--filter-on", dest="filter_file_path", help="The KGTK file with the filter data", type=Path, required=True)
+    parser.add_argument(      "--filter-on", dest="filter_file_path", help="The KGTK file with the filter data (required).", type=Path, required=True)
 
     parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-")
     
-    parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=IfExists.FIELD_SEPARATOR_DEFAULT)
+    parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys (default=%(default)s)",
+                              default=IfExists.FIELD_SEPARATOR_DEFAULT)
    
-    parser.add_argument(      "--invert", dest="invert", help="Invert the test (if not exists).", action='store_true')
+    parser.add_argument(      "--invert", dest="invert", help="Invert the test (if not exists) (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
 
-    parser.add_argument(      "--input-keys", dest="input_keys", help="The key columns in the input file.", nargs='*')
-    parser.add_argument(      "--filter-keys", dest="filter_keys", help="The key columns in the filter file.", nargs='*')
+    parser.add_argument(      "--input-keys", dest="input_keys", help="The key columns in the input file (default=None).", nargs='*')
+    parser.add_argument(      "--filter-keys", dest="filter_keys", help="The key columns in the filter file (default=None).", nargs='*')
 
     KgtkReader.add_debug_arguments(parser)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input")
diff --git a/kgtk/join/kgtkjoiner.py b/kgtk/join/kgtkjoiner.py
index 2f32d1281..659738af5 100644
--- a/kgtk/join/kgtkjoiner.py
+++ b/kgtk/join/kgtkjoiner.py
@@ -16,6 +16,7 @@
 from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
 from kgtk.join.kgtkmergecolumns import KgtkMergeColumns
+from kgtk.utils.argparsehelpers import optional_bool
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 @attr.s(slots=True, frozen=True)
@@ -356,15 +357,25 @@ def main():
     parser.add_argument(dest="right_file_path", help="The right KGTK file to join", type=Path)
     parser.add_argument(      "--field-separator", dest="field_separator", help="Separator for multifield keys", default=KgtkJoiner.FIELD_SEPARATOR_DEFAULT)
 
-    parser.add_argument(      "--join-on-label", dest="join_on_label", help="If both input files are edge files, include the label column in the join.", action='store_true')
-    parser.add_argument(      "--join-on-node2", dest="join_on_node2", help="If both input files are edge files, include the node2 column in the join.", action='store_true')
+    parser.add_argument(      "--join-on-label", dest="join_on_label",
+                              help="If both input files are edge files, include the label column in the join (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
+    
+    parser.add_argument(      "--join-on-node2", dest="join_on_node2",
+                              help="If both input files are edge files, include the node2 column in the join (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
+    
     parser.add_argument(      "--left-file-join-columns", dest="left_join_columns", help="Left file join columns.", nargs='+')
-    parser.add_argument(      "--left-join", dest="left_join", help="Perform a left outer join.", action='store_true')
+
+    parser.add_argument(      "--left-join", dest="left_join", help="Perform a left outer join (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
 
     parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write", type=Path, default=None)
     parser.add_argument(      "--prefix", dest="prefix", help="An optional prefix applied to right file column names in the output file (default=None).")
     parser.add_argument(      "--right-file-join-columns", dest="right_join_columns", help="Right file join columns.", nargs='+')
-    parser.add_argument(      "--right-join", dest="right_join", help="Perform a right outer join.", action='store_true')
+
+    parser.add_argument(      "--right-join", dest="right_join", help="Perform a right outer join (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
 
     KgtkReader.add_debug_arguments(parser, expert=True)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, who=KgtkJoiner.LEFT, expert=True)
diff --git a/kgtk/utils/argparsehelpers.py b/kgtk/utils/argparsehelpers.py
new file mode 100644
index 000000000..ba09b8370
--- /dev/null
+++ b/kgtk/utils/argparsehelpers.py
@@ -0,0 +1,30 @@
+"""This argparse type conversion function implements optional boolean arguments.
+
+--arg
+--arg=True
+--arg=False
+
+and other variations.  A default value of None is allowed for fallback
+argument composition.
+
+Sample usage:
+
+parser.add_argument(prefix1 + "gzip-in-parallel",
+                    dest=prefix2 + "gzip_in_parallel",
+                    help=h(prefix3 + "Execute gzip in parallel (default=%(default)s)."),
+                    type=optional_bool, nargs='?', const=True, **d(default=False))
+
+"""
+
+import typing
+
+def optional_bool(value)->typing.Optional[bool]:
+    if value is None:
+        return None
+    if isinstance(value, bool):
+        return value
+    if value.lower() in {'false', 'f', '0', 'no', 'n'}:
+        return False
+    elif value.lower() in {'true', 't', '1', 'yes', 'y'}:
+        return True
+    raise ValueError(f'{value} is not a valid boolean value')
diff --git a/kgtk/value/kgtkvalueoptions.py b/kgtk/value/kgtkvalueoptions.py
index 704048954..b4e483a9e 100644
--- a/kgtk/value/kgtkvalueoptions.py
+++ b/kgtk/value/kgtkvalueoptions.py
@@ -7,6 +7,8 @@
 import sys
 import typing
 
+from kgtk.utils.argparsehelpers import optional_bool
+
 @attr.s(slots=True, frozen=True)
 class KgtkValueOptions:
     """
@@ -65,6 +67,7 @@ def add_arguments(cls,
                       who: str = "",
                       desc: str = ".",
                       expert: bool = False,
+                      defaults: bool = True,
     ):
         """Add arguments for KgtkValue option processing.
 
@@ -90,89 +93,77 @@ def h(msg: str)->str:
             else:
                 return SUPPRESS
 
+        # This helper function decices whether or not to include defaults
+        # in argument declarations. If we plan to make arguments with
+        # prefixes and fallbacks, the fallbacks (the ones without prefixes)
+        # should get defaults value, while the prefixed arguments should
+        # not get defaults.
+        #
+        # Note: In obscure circumstances (EnumNameAction, I'm looking at you),
+        # explicitly setting "default=None" may fail, whereas omitting the
+        # "default=" phrase succeeds.
+        #
+        # TODO: continue researching these issues.
+        def d(default: typing.Any)->typing.Mapping[str, typing.Any]:
+            if defaults:
+                return {"default": default}
+            else:
+                return { }
+
         vgroup = parser.add_argument_group(h(prefix3 + "Data value parsing"),
                                            h("Options controlling the parsing and processing of KGTK data values" + desc))
         vgroup.add_argument(      prefix1 + "additional-language-codes", dest=prefix2 + "additional_language_codes",
                                   help=h(prefix3 + "Additional language codes (default=None)."),
                                   nargs="*", default=None)
 
-        lsgroup= vgroup.add_mutually_exclusive_group()
-        lsgroup.add_argument(      prefix1 + "allow-language-suffixes", dest=prefix2 + "allow_language_suffixes",
+        vgroup.add_argument(      prefix1 + "allow-language-suffixes", dest=prefix2 + "allow_language_suffixes",
                                    help=h(prefix3 + "Allow language identifier suffixes starting with a dash (default=%(default)s)."),
-                                   action='store_true', default=True)
-
-        lsgroup.add_argument(      prefix1 + "disallow-language-suffixes", dest=prefix2 + "allow_language_suffixes",
-                                   help=h(prefix3 + "Disallow language identifier suffixes starting with a dash."),
-                                   action='store_false')
+                                   type=optional_bool, nargs='?', const=True, **d(default=False))
 
-        laxgroup= vgroup.add_mutually_exclusive_group()
-        laxgroup.add_argument(      prefix1 + "allow-lax-strings", dest=prefix2 + "allow_lax_strings",
-                                    help=h(prefix3 + "Do not check if double quotes are backslashed inside strings (default=%(default)s)."),
-                                    action='store_true', default=False)
 
-        laxgroup.add_argument(      prefix1 + "disallow-lax-strings", dest=prefix2 + "allow_lax_strings",
-                                    help=h(prefix3 + "Check if double quotes are backslashed inside strings."),
-                                    action='store_false')
+        vgroup.add_argument(      prefix1 + "allow-lax-strings", dest=prefix2 + "allow_lax_strings",
+                                  help=h(prefix3 + "Do not check if double quotes are backslashed inside strings (default=%(default)s)."),
+                                  type=optional_bool, nargs='?', const=True, **d(default=False))
 
-        lqgroup= vgroup.add_mutually_exclusive_group()
-        lqgroup.add_argument(      prefix1 + "allow-lax-lq-strings", dest=prefix2 + "allow_lax_lq_strings",
-                                   help=h(prefix3 + "Do not check if single quotes are backslashed inside language qualified strings (default=%(default)s)."),
-                                   action='store_true', default=False)
+        vgroup.add_argument(      prefix1 + "allow-lax-lq-strings", dest=prefix2 + "allow_lax_lq_strings",
+                                  help=h(prefix3 + "Do not check if single quotes are backslashed inside language qualified strings (default=%(default)s)."),
+                                  type=optional_bool, nargs='?', const=True, **d(default=False))
 
-        lqgroup.add_argument(      prefix1 + "disallow-lax-lq-strings", dest=prefix2 + "allow_lax_lq_strings",
-                                   help=h(prefix3 + "Check if single quotes are backslashed inside language qualified strings."),
-                                   action='store_false')
+        vgroup.add_argument(      prefix1 + "allow-month-or-day-zero", dest=prefix2 + "allow_month_or_day_zero",
+                                  help=h(prefix3 + "Allow month or day zero in dates (default=%(default)s)."),
+                                  type=optional_bool, nargs='?', const=True, **d(default=False))
 
-        amd0group= vgroup.add_mutually_exclusive_group()
-        amd0group.add_argument(      prefix1 + "allow-month-or-day-zero", dest=prefix2 + "allow_month_or_day_zero",
-                                     help=h(prefix3 + "Allow month or day zero in dates (default=%(default)s)."),
-                                     action='store_true', default=False)
-
-        amd0group.add_argument(      prefix1 + "disallow-month-or-day-zero", dest=prefix2 + "allow_month_or_day_zero",
-                                     help=h(prefix3 + "Allow month or day zero in dates."),
-                                     action='store_false')
-
-        rmd0group= vgroup.add_mutually_exclusive_group()
-        rmd0group.add_argument(      prefix1 + "repair-month-or-day-zero", dest=prefix2 + "repair_month_or_day_zero",
-                                     help=h(prefix3 + "Repair month or day zero in dates (default=%(default)s)."),
-                                     action='store_true', default=False)
-
-        rmd0group.add_argument(      prefix1 + "no-repair-month-or-day-zero", dest=prefix2 + "repair_month_or_day_zero",
-                                     help=h(prefix3 + "Do not repair month or day zero in dates."),
-                                     action='store_false')
+        vgroup.add_argument(      prefix1 + "repair-month-or-day-zero", dest=prefix2 + "repair_month_or_day_zero",
+                                  help=h(prefix3 + "Repair month or day zero in dates (default=%(default)s)."),
+                                  type=optional_bool, nargs='?', const=True, **d(default=False))
 
         vgroup.add_argument(      prefix1 + "minimum-valid-year", dest=prefix2 + "minimum_valid_year",
                                   help=h(prefix3 + "The minimum valid year in dates (default=%(default)d)."),
-                                  type=int, default=cls.MINIMUM_VALID_YEAR)
+                                  type=int, **d(default=cls.MINIMUM_VALID_YEAR))
 
         vgroup.add_argument(      prefix1 + "maximum-valid-year", dest=prefix2 + "maximum_valid_year",
                                   help=h(prefix3 + "The maximum valid year in dates (default=%(default)d)."),
-                                  type=int, default=cls.MAXIMUM_VALID_YEAR)
+                                  type=int, **d(default=cls.MAXIMUM_VALID_YEAR))
 
         vgroup.add_argument(      prefix1 + "minimum-valid-lat", dest=prefix2 + "minimum_valid_lat",
-                                  help=h(prefix3 + "The minimum valid latitude (default=%(default)d)."),
-                                  type=int, default=cls.MINIMUM_VALID_LAT)
+                                  help=h(prefix3 + "The minimum valid latitude (default=%(default)f)."),
+                                  type=int, **d(default=cls.MINIMUM_VALID_LAT))
 
         vgroup.add_argument(      prefix1 + "maximum-valid-lat", dest=prefix2 + "maximum_valid_lat",
-                                  help=h(prefix3 + "The maximum valid latitude (default=%(default)d)."),
-                                  type=int, default=cls.MAXIMUM_VALID_LAT)
+                                  help=h(prefix3 + "The maximum valid latitude (default=%(default)f)."),
+                                  type=int, **d(default=cls.MAXIMUM_VALID_LAT))
 
         vgroup.add_argument(      prefix1 + "minimum-valid-lon", dest=prefix2 + "minimum_valid_lon",
-                                  help=h(prefix3 + "The minimum valid longitude (default=%(default)d)."),
-                                  type=int, default=cls.MINIMUM_VALID_LON)
+                                  help=h(prefix3 + "The minimum valid longitude (default=%(default)f)."),
+                                  type=int, **d(default=cls.MINIMUM_VALID_LON))
 
         vgroup.add_argument(      prefix1 + "maximum-valid-lon", dest=prefix2 + "maximum_valid_lon",
-                                  help=h(prefix3 + "The maximum valid longitude (default=%(default)d)."),
-                                  type=int, default=cls.MAXIMUM_VALID_LON)
-
-        elsgroup= vgroup.add_mutually_exclusive_group()
-        elsgroup.add_argument(      prefix1 + "escape-list-separators", dest=prefix2 + "escape_list_separators",
-                                    help=h(prefix3 + "Escape all list separators instead of splitting on them (default=%(default)s)."),
-                                    action='store_true', default=False)
+                                  help=h(prefix3 + "The maximum valid longitude (default=%(default)f)."),
+                                  type=int, **d(default=cls.MAXIMUM_VALID_LON))
 
-        elsgroup.add_argument(      prefix1 + "no-escape-list-separators", dest=prefix2 + "escape_list_separators",
-                                    help=h(prefix3 + "Do not escape list separators."),
-                                    action='store_false')
+        vgroup.add_argument(      prefix1 + "escape-list-separators", dest=prefix2 + "escape_list_separators",
+                                  help=h(prefix3 + "Escape all list separators instead of splitting on them (default=%(default)s)."),
+                                  type=optional_bool, nargs='?', const=True, **d(default=False))
 
     @classmethod
     # Build the value parsing option structure.

From 6b32b24776e755c5eef8bff9a3fb1298ceaee991 Mon Sep 17 00:00:00 2001
From: greatyyx <bigyyx@gmail.com>
Date: Thu, 14 May 2020 11:50:14 -0700
Subject: [PATCH 200/278] create kgtk exception auto handler, apply it in
 filter

---
 kgtk/cli/filter.py |  8 ++------
 kgtk/exceptions.py | 11 ++++++++++-
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/kgtk/cli/filter.py b/kgtk/cli/filter.py
index 23fc3232f..5fa18d9d6 100644
--- a/kgtk/cli/filter.py
+++ b/kgtk/cli/filter.py
@@ -28,9 +28,8 @@ def add_arguments(parser):
 
 def run(datatype, pattern, input, subj_col, pred_col, obj_col):
     # import modules locally
-    import socket
     import sh # type: ignore
-    from kgtk.exceptions import KGTKException
+    from kgtk.exceptions import kgtk_exception_auto_handler
 
     props=[subj_col, pred_col, obj_col]
 
@@ -62,8 +61,5 @@ def prepare_filter(property, prop_pattern):
             elif not sys.stdin.isatty():
                 sh.mlr('--%slite' % datatype, 'filter', filter_str, 
                         _in=sys.stdin, _out=sys.stdout, _err=sys.stderr)
-    except sh.SignalException_SIGPIPE:
-        # handles SIGPIPE, if it raises to upper level, it will cause another error
-        pass
     except Exception as e:
-        raise KGTKException(e)
+        kgtk_exception_auto_handler(e)
diff --git a/kgtk/exceptions.py b/kgtk/exceptions.py
index c70e03797..2d614320f 100644
--- a/kgtk/exceptions.py
+++ b/kgtk/exceptions.py
@@ -1,6 +1,7 @@
 import sys
 import warnings
 import traceback
+import sh
 
 
 class KGTKException(BaseException):
@@ -11,6 +12,14 @@ def __init__(self, message):
         self.message = message
 
 
+def kgtk_exception_auto_handler(e: Exception):
+    if isinstance(e, (sh.SignalException_SIGPIPE, BrokenPipeError)):
+        return
+    elif isinstance(e, KGTKException):
+        raise e
+    raise KGTKException(KGTKException.message + str(e))
+
+
 class KGTKArgumentParseException(KGTKException):
     # same as https://docs.python.org/3/library/argparse.html#argparse.ArgumentParser.error
     return_code = 2
@@ -34,7 +43,7 @@ def __call__(self, func, *args, **kwargs):
             if return_code != 0:
                 warnings.warn('Please raise exception instead of returning non-zero value')
             return return_code
-        except BrokenPipeError:
+        except (sh.SignalException_SIGPIPE, BrokenPipeError):
             pass
         except BaseException:
             type_, exc_val, exc_tb = sys.exc_info()

From 8d6e7534254d2ccd5f7bf1047d63f17aec3b23c4 Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Thu, 14 May 2020 18:16:59 -0700
Subject: [PATCH 201/278] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 07bfa8bed..480f8a2d1 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# KGTK: Knowledge Graph Toolkit
+# KGTK: Knowledge Graph Toolkit [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3605675.svg)](https://doi.org/10.5281/zenodo.3605675)
 
 KGTK is a Python library for easy manipulation with knowledge graphs. It provides a flexible framework that allows chaining of common graph operations, such as: extraction of subgraphs, filtering, computation of graph metrics, validation, cleaning, generating embeddings, and so on. Its principal format is TSV, though we do support a number of other inputs. 
 

From f1896aa30b0c7fc7dcd17eaa90140cf0ebfb8d87 Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Thu, 14 May 2020 18:27:06 -0700
Subject: [PATCH 202/278] fix doi. Initial version of doc

---
 .readthedocs.yml      |  15 ++++++
 README.md             |   2 +-
 docs/index.md         |  18 +++++++
 docs/specification.md | 114 ++++++++++++++++++++++++++++++++++++++++++
 mkdocs.yml            |  13 +++++
 5 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 .readthedocs.yml
 create mode 100644 docs/index.md
 create mode 100644 docs/specification.md
 create mode 100644 mkdocs.yml

diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 000000000..b6cb68e87
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,15 @@
+# Required
+version: 2
+
+# Build documentation with MkDocs
+mkdocs:
+  configuration: mkdocs.yml
+
+# Optionally build your docs in additional formats such as PDF and ePub
+formats: all
+
+# Optionally set the version of Python and requirements required to build your docs
+python:
+  version: 3.7
+  install:
+    - requirements: docs/requirements.txt
diff --git a/README.md b/README.md
index 480f8a2d1..a3ab034d3 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# KGTK: Knowledge Graph Toolkit [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3605675.svg)](https://doi.org/10.5281/zenodo.3605675)
+# KGTK: Knowledge Graph Toolkit I](https://zenodo.org/badge/DOI/10.5281/zenodo.3828069.svg)](https://doi.org/10.5281/zenodo.3828069)
 
 KGTK is a Python library for easy manipulation with knowledge graphs. It provides a flexible framework that allows chaining of common graph operations, such as: extraction of subgraphs, filtering, computation of graph metrics, validation, cleaning, generating embeddings, and so on. Its principal format is TSV, though we do support a number of other inputs. 
 
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 000000000..547d6ad25
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,18 @@
+# Knowledge Graph Toolkit (KGTK)
+
+KGTK is a Python library for easy manipulation with knowledge graphs. It provides a flexible framework that allows chaining of common graph operations, such as: extraction of subgraphs, filtering, computation of graph metrics, validation, cleaning, generating embeddings, and so on. Its principal format is TSV, though we do support a number of other inputs.
+
+## Features
+
+* Computation of class instances
+* Computation of reachable nodes
+* Filtering based on property values
+* Removal of columns
+* Sorting
+* Computation of various embeddings
+* Cleaning and validation
+* Computation of graph metrics
+* Joining and concatenation of graphs
+* Manipulation of Wikidata data
+
+
diff --git a/docs/specification.md b/docs/specification.md
new file mode 100644
index 000000000..daf864a61
--- /dev/null
+++ b/docs/specification.md
@@ -0,0 +1,114 @@
+The current mapping between OWL and OpenAPI specification (OAS) supported by OBA can be seen below.
+
+!!! warning
+    We are currently working on improving the mapping with complex axiomatization of domains and ranges and other property annotations (minimum and maximum cardinality, etc.)
+
+**Namespaces** used in this document:
+
+  - owl: [http://www.w3.org/2002/07/owl#](http://www.w3.org/2002/07/owl#)
+  - rdfs: [http://www.w3.org/2000/01/rdf-schema#](http://www.w3.org/2000/01/rdf-schema#)
+  - skos: [http://www.w3.org/2004/02/skos/core#](http://www.w3.org/2004/02/skos/core#)
+  - prov: [http://www.w3.org/ns/prov#](http://www.w3.org/ns/prov#)
+
+## owl:Class
+
+Each class in the ontology is associated with two paths for the GET operation, one path for POST, PUT and DELETE operations; and a schema. For example, consider the class "Plan" from [http://purl.org/net/p-plan](http://purl.org/net/p-plan). The following GET paths would be generated: 
+
+```yaml
+/plans:
+  get:
+        description: Gets a list of all instances of Plan (more information in http://purl.org/net/p-plan#Plan)
+        parameters:
+        {...} #omitted for simplicity
+        responses:
+        200:
+          content:
+            application/json:
+              schema:
+                items:
+                  $ref: '#/components/schemas/Plan'
+                type: array
+          description: Successful response - returns an array with the instances of Plan.
+          headers:
+            link:
+              description: Information about pagination
+              schema:
+                type: string
+      summary: List all instances of Plan
+```
+
+```yaml
+/plans/{id}:
+    get:
+      description: Gets the details of a given Plan (more information in http://purl.org/net/p-plan#Plan)
+      parameters:
+      {...} #omitted for simplicity, the response is similar to the one above
+```
+And the following Schema would be generated:
+
+```yaml
+Plan:
+      description: A p-plan:Plan is a specific type of prov:Plan. It is composed of smaller steps that use and produce Variables.
+      properties:
+        wasGeneratedBy:
+        {...} #omitted other properties for simplicity.
+```
+
+### rdfs:subClassOf
+
+Subclasses inherit all properties from their respective superclasses. The OpenAPI specification has the `allOf` clause to indicate this behavior. However, this was not supported by any existing generators until very recently, and therefore OBA will iterate through all superclasses to add the appropriate properties for a given schema.
+
+## owl:ObjectProperty
+
+Each object property is added to its corresponding schema definition that uses it as domain. For example, in the P-Plan ontology, `Plan` has a property `isSubPlanofPlan` which has domain `Plan`. This would be represented as follows in the OpenAPI specification:
+
+```yaml
+ Plan:
+      description: A p-plan:Plan is a specific type of prov:Plan. It is composed of smaller steps that use and produce Variables.
+      properties:
+        isSubPlanOfPlan:
+          description: A p-plan:Plan may be a subplan of another bigger p-plan:Plan. p-plan:isSubPlanOfPlan is used to state the link among the two different plans. 
+          items:
+            $ref: '#/components/schemas/Plan'
+          nullable: true
+          type: array
+```
+
+## owl:DataTypeProperty
+
+Similar mapping to an objec property, except that no schemas will be used as reference under the `items` field. For example, consider a `dateCreated` property that indicates when an item is created:
+
+```yaml
+dateCreated:
+          description: Creation date of the item
+          items:
+            type: string
+          nullable: true
+          type: array
+```
+
+### rdfs:domain and rdfs:range
+
+For each object and datatype property, OBA will analyze their `rdfs:domain` and `rdfs:range` to assign the property in the right schema (using `rdfs:domain`) and use the appropriate reference or datatype (by inspecting `rdfs:range`). At the moment, cardinality constraints are not taken into account in this mapping.
+
+
+## Other important considerations
+
+All properties are `nullable` (i.e., optional) and are returned as a list. This is because from the development perspective, it is easier to deal with lists (even if they have one element) than having to distinguish whether the object returned is a list or not.
+
+Complex unions and intersections are considered of type `object` instead of a particular schema.
+
+## Class and property documentation
+OBA uses `rdfs:comment`, `skos:definition` and `prov:definition` annotations in the ontology for creating definitions of the classes and properties in OBA. An example can be seen below: 
+
+Example:
+```yaml
+Plan:
+      description: A p-plan:Plan is a specific type of prov:Plan. It is composed of smaller steps that use and produce Variables.
+      properties:
+        isSubPlanOfPlan:
+          description: A p-plan:Plan may be a subplan of another bigger p-plan:Plan. p-plan:isSubPlanOfPlan is used to state the link among the two different plans. Note that if p1 is a p-plan:subPlan of p2, p1will not necessarily be a step of p2. A multistep will represent p1 in p2, and link to p1 with the p-plan.hasStepDecomposition relationship.
+        {...} #Rest of the schema ommited for brevity
+```
+
+
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 000000000..f737063fe
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,13 @@
+site_name: KGTK documentation
+nav:
+  - Home: index.md
+  - KGTK file specification: specification.md
+theme:
+  name: material
+
+markdown_extensions:
+  - admonition
+  - codehilite:
+      guess_lang: false
+  - toc:
+      permalink: true

From 211b7c3f50d5907066b3fe3e53448cd30b3f4cd4 Mon Sep 17 00:00:00 2001
From: GreatYYX <bigyyx@gmail.com>
Date: Thu, 14 May 2020 18:59:26 -0700
Subject: [PATCH 203/278] fix doi in readme

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index a3ab034d3..beabf0907 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# KGTK: Knowledge Graph Toolkit I](https://zenodo.org/badge/DOI/10.5281/zenodo.3828069.svg)](https://doi.org/10.5281/zenodo.3828069)
+# KGTK: Knowledge Graph Toolkit [![doi](https://zenodo.org/badge/DOI/10.5281/zenodo.3828069.svg)](https://doi.org/10.5281/zenodo.3828069)
 
 KGTK is a Python library for easy manipulation with knowledge graphs. It provides a flexible framework that allows chaining of common graph operations, such as: extraction of subgraphs, filtering, computation of graph metrics, validation, cleaning, generating embeddings, and so on. Its principal format is TSV, though we do support a number of other inputs. 
 
@@ -98,4 +98,4 @@ More detailed description of the arguments will be added here promptly.
 
 ### Developer Instructions
 
-Please refer to [this](README_dev.md)
\ No newline at end of file
+Please refer to [this](README_dev.md)

From c25ab7aa2d88877a30cb67b45bb18d2e4555743e Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Thu, 14 May 2020 19:19:57 -0700
Subject: [PATCH 204/278] Update specification.md

---
 docs/specification.md | 420 +++++++++++++++++++++++++++++++++---------
 1 file changed, 338 insertions(+), 82 deletions(-)

diff --git a/docs/specification.md b/docs/specification.md
index daf864a61..4b776f6e2 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -1,114 +1,370 @@
-The current mapping between OWL and OpenAPI specification (OAS) supported by OBA can be seen below.
+## KGTK File Format
 
-!!! warning
-    We are currently working on improving the mapping with complex axiomatization of domains and ranges and other property annotations (minimum and maximum cardinality, etc.)
+**Authors:** Hans Chalupsky, Craig Roger, Pedro Szekely
 
-**Namespaces** used in this document:
+**Version:** 2.0
 
-  - owl: [http://www.w3.org/2002/07/owl#](http://www.w3.org/2002/07/owl#)
-  - rdfs: [http://www.w3.org/2000/01/rdf-schema#](http://www.w3.org/2000/01/rdf-schema#)
-  - skos: [http://www.w3.org/2004/02/skos/core#](http://www.w3.org/2004/02/skos/core#)
-  - prov: [http://www.w3.org/ns/prov#](http://www.w3.org/ns/prov#)
 
-## owl:Class
+KGTK uses a text-based, columnar file format that aims to be simple, readable, expressive, yet self-describing and easily generatable and parsable by standard tools.  The KGTK file design is focused on being able to represent arbitrary knowledge graphs, but can be used to describe any attributed, labeled or unlabeled hypergraph. 
 
-Each class in the ontology is associated with two paths for the GET operation, one path for POST, PUT and DELETE operations; and a schema. For example, consider the class "Plan" from [http://purl.org/net/p-plan](http://purl.org/net/p-plan). The following GET paths would be generated: 
+## Basic File Structure
+**Encoding**: KGTK files are text files that use UTF8 encoding for Unicode characters.
 
-```yaml
-/plans:
-  get:
-        description: Gets a list of all instances of Plan (more information in http://purl.org/net/p-plan#Plan)
-        parameters:
-        {...} #omitted for simplicity
-        responses:
-        200:
-          content:
-            application/json:
-              schema:
-                items:
-                  $ref: '#/components/schemas/Plan'
-                type: array
-          description: Successful response - returns an array with the instances of Plan.
-          headers:
-            link:
-              description: Information about pagination
-              schema:
-                type: string
-      summary: List all instances of Plan
+**Separator characters**: files are TAB-separated multi-column files, values containing TAB characters need to escape them with the \t escape sequence.
+
+**Comments**: lines that begin with a #-sign are treated as comments and will be ignored, lines consisting of all whitespace will also be ignored.
+Headers: the first line of each file is interpreted as a header line which needs to list the names of required and optional columns. Column names must be nonblank and unique within a file. Column names must be symbols. Column names should not contain quoted whitespace.
+
+**Newlines and special characters**: each line ends with an end-of-line character or character sequence (such as CR, LF, or CR LF).  Text values that need to contain a newline character can encode them via `\n` and/or `\r`.  Other escape sequences mirroring those defined by Python are also supported.  Backslash can more generally be used to escape characters with special meaning, for example, `\|` to escape a vertical bar in a values list.  Leading and trailing whitespace in values other than inside quoted strings is disallowed.
+
+**Columns and null values**: each file can have an arbitrary number of columns, however, the number of columns in each content line has to be constant across the file.  Specific required columns are described in more detail below.  Undefined values can be specified by the empty string which is a zero-length field (not the empty quoted string).
+
+**Unordered rows**: Records in a KGTK file may appear in any order, and may be reordered freely, without changing their semantic meaning. Duplicate records may be created or removed without changing the semantic content of the file.  This means that comments and blank lines appearing in a KGTK source file may be removed by certain processing steps that cannot easily preserve them (e.g. with a sort or join operation).
+
+## Representing Graphs
+KGTK defines knowledge graphs (or more generally any attributed graph or hypergraph) as a set of nodes and a set of edges between those nodes.  KGTK represents everything of meaning via an edge.  Edges themselves can be attributed by having edges asserted about them, thus, KGTK can in fact represent arbitrary hypergraphs.  KGTK intentionally does not distinguish attributes or qualifiers on nodes and edges from full-fledged edges, tools operating on KGTK graphs can instead interpret edges differently if they so desire.  In KGTK, everything can be a node, and every node can have any type of edge to any other node.
+ 
+Nodes are described in one or more node files, and edges in one or more edge files.  The resulting graph is built from the union of all loaded files.  There is some redundancy of representation between node and edge files.  In fact, all graphs can be described with just an edge file, and some graphs can be described with just a node file.  However, certain aspects can be described more concisely with a node file and others only with an edge file, thus both formats are available for use by an application. 
+ 
+Nodes and edges must have unique IDs, however, IDs can be left implicit and will then be system generated.
+
+## KGTK Data Types
+KGTK represents data via nodes and edges between those nodes.  Since edges can themselves serve as nodes, those two sets are not disjoint.
+
+KGTK uses two basic data types to represent nodes and edges: symbols and literals.  Symbols are names such as `Node42` or `a90b-bc8f`, literals are numbers or quoted strings, for example, `3.1415` or `“John Doe”`.  Both symbols and literals may contain internal whitespace (except for unescaped TABs and newline characters).
+
+There is a third type we call structured (or fancy) literals, which are useful to concisely represent things such as dates or locations.  For example, `@043.26193/010.92708` represents the location with latitude `043.26193` and longitude `010.92708`.  However, this is just shorthand for a location node with latitude and longitude edges leading to those numeric values.
+
+To allow us to easily specify (and parse) an object type without a verbose type declaration or other complex syntactic structure, we adopt the convention where the first character of a value tells us its data type.  The table below lists different sets of first characters and the data type they correspond to with some examples.
+
+|First Character | Data Type | Examples|
+|----------------|-----------|---------|
+|0-9, +, -,.     | Number    |1, 42, 3.14e-10, 0.01, .1, 0xff|
+|“ |String|“John Doe”
+|^, @, ‘, !|Structured Literal|^10:30, ‘Deutsch’@de
+|otherwise|Symbol|Node42, \0ob1
+ 
+Note that in the last symbol example the special meaning of 0 was escaped with the backslash character (which does not itself become part of the symbol’s name).  Without that the value `0ob1` would be interpreted as an illegal octal numeric value.
+
+## Predefined Names
+KGTK comes with a small set of predefined column names and edge labels that either need to be used at certain positions in node or edge files, or that are used by KGTK to translate structured literals into their internal representation.  The table below lists those names together with their allowable aliases. Aliases are expensive to process; we may want to define a KGTK file profile that excludes aliases.
+The presence of ID as an alias for id implies that the predefined names are sensitive to case We might want to consider making column names insensitive to case, although that can also cause processing inefficiencies.
+If a predefined name or allowable alias appears as a column name, no other column name may appear from the same set of equivalent names.
+
+|Predefined Name|Allowable Aliases|Description|
+|---------------|-----------------|-----------|
+|id|ID|Node and edge IDs|
+|node1|from, subject|Start node of an edge|
+|node2|to, object|End node of an edge|
+|label|predicate, relation, relationship|Node or edge label
+|source||Node or edge provenance|
+|text, language||Field values for language-qualified strings|
+|magnitude, tolerance, unit ||Field values for dimensioned numbers|
+latitude, longitude||Field values for locations |
+|year, month, day, hour, minute, second, nanosecond, timezone, precision||Field values for times and dates|
+
+## Edge File Format
+
+The edge file is the core representational structure for KGTK graphs.  Everything can be specified in the edge file or files.  Node files only provide a different point of view that makes the representation of node-centric information more concise.
+
+Edge files specify the set of edges in a knowledge graph.  They have three mandatory columns: node1, label, and node2 (or their aliases).  The label might be left blank to represent unlabeled graphs[CMR: I am concerned that blank label values may cause syntactic (not semantic) confusion. I think it would be better to use a special value, such as _.], however, we will ignore lines with blank node1 or node2 (for us that does not correspond to unknown, just missing). [For processing efficiency, we might want to define an edge file profile that disallows comment lines, blank lines, and lines with blank node1 or node2 values.]
+
+An optional edge ID field can be used to name an edge.  All additional columns have a user-defined meaning and are optional.  Here is a small example edge file:
+```
+node1   label       node2
+N1      rdf:type    Person
+N1      label       “Moe”
+N2      rdf:type    Person
+N2      label       “Larry”
+N3      rdf:type    Person
+N3      label       “Curly”
+N1      brotherOf   N3
+N1      friendOf    N2
+N1      friendOf    N3
+N1      diedAtAge   77
+```
+
+This file defines three nodes with types and respective labels (all specified via edges), and some relationships between them.  We used here an RDF-ish type label with an rdf: namespace prefix, but there is no requirement for that, any other label could have been used.  Similarly, type names such as Person could be prefixed with a namespace or use a full URI.  Multiple values as for N1’s friends can be specified via multiple entries or via a special list syntax described below.
+
+Any symbol or literal can serve as a node ID or label, so another representation for this information would be the following:
+
+```
+node1      label            node2
+“Moe”      rdf:type         Person
+“Larry”    rdf:type         Person
+“Curly”    rdf:type         Person
+“Moe”      brotherOf        “Curly”
+“Moe”      friendOf         “Larry”
+“Moe”      friendOf         “Curly”
+77         “death age of”   “Moe”
 ```
 
-```yaml
-/plans/{id}:
-    get:
-      description: Gets the details of a given Plan (more information in http://purl.org/net/p-plan#Plan)
-      parameters:
-      {...} #omitted for simplicity, the response is similar to the one above
+The meaning of a column is defined by its column header, so the order of columns does not matter.  The following would be an equivalent representation of the three node types:
+
+```
+label        node1        node2
+rdf:type     “Moe”        Person
+rdf:type     “Larry”      Person
+rdf:type     “Curly”      Person
 ```
-And the following Schema would be generated:
 
-```yaml
-Plan:
-      description: A p-plan:Plan is a specific type of prov:Plan. It is composed of smaller steps that use and produce Variables.
-      properties:
-        wasGeneratedBy:
-        {...} #omitted other properties for simplicity.
+Additional columns can be used to specify edges about an edge.  For example:
+
+```
+node1       label       node2   creator     source      
+“Moe”       rdf:type    Person  “Hans”      Wikipedia   
+“Larry”     rdf:type    Person  “Hans”      Wikipedia
+“Curly”     rdf:type    Person  “Hans”      Wikipedia
 ```
 
-### rdfs:subClassOf
+Each edge is uniquely identified by its (node1, label, node2) triple (ignoring the order in which these columns were specified in the file).  So, additional values about a particular edge can be added by repeating the edge and listing the value.  For example:
 
-Subclasses inherit all properties from their respective superclasses. The OpenAPI specification has the `allOf` clause to indicate this behavior. However, this was not supported by any existing generators until very recently, and therefore OBA will iterate through all superclasses to add the appropriate properties for a given schema.
+```
+node1     label     node2   creator   source    
+“Moe”     rdf:type  Person  “Hans”    Wikipedia
+“Larry”   rdf:type  Person  “Hans”    Wikipedia
+“Curly”   rdf:type  Person  “Hans”    Wikipedia
+# we repeat the edge triple but only list additional
+# values where they apply, other columns are left blank:
+“Curly”   rdf:type  Person            IMDB
+```
+To allow us to use edges in both the node1 and node2 positions of an edge or to use them as arguments in an explicit node1/label/node2 triple, we can name or alias them via an explicit id column.  The names or aliases can then be used as stand-ins for the explicit triple.  For example:
+
+```
+node1     label     node2   creator   id
+“Moe”     rdf:type  Person  “Hans”    E1
+“Larry”   rdf:type  Person  “Hans”    E2
+“Curly”   rdf:type  Person  “Hans”    E3
+E1        source    Wikipedia
+E2        source    Wikipedia
+E3        source    Wikipedia
+E3        source    IMDB
+# the first creator edge is equivalent to this one:
+E1      creator     “Hans”
+```
 
-## owl:ObjectProperty
 
-Each object property is added to its corresponding schema definition that uses it as domain. For example, in the P-Plan ontology, `Plan` has a property `isSubPlanofPlan` which has domain `Plan`. This would be represented as follows in the OpenAPI specification:
+Column values in the edges table are simply a shorthand for a more explicit line-based edge representation using edge IDs.  However, for edges without explicitly provided IDs, columns are the only way to say something about them.  Column values are only related to the edge they are modifying, they are not related or linked to each other in any way.
 
-```yaml
- Plan:
-      description: A p-plan:Plan is a specific type of prov:Plan. It is composed of smaller steps that use and produce Variables.
-      properties:
-        isSubPlanOfPlan:
-          description: A p-plan:Plan may be a subplan of another bigger p-plan:Plan. p-plan:isSubPlanOfPlan is used to state the link among the two different plans. 
-          items:
-            $ref: '#/components/schemas/Plan'
-          nullable: true
-          type: array
+Columnar edges can themselves be named via IDs, for example:
+```
+node1     label     node2     creator     id
+“Moe”     rdf:type  Person    “Hans”      E1
+E1        creator   “Hans”                E11
 ```
 
-## owl:DataTypeProperty
+Note that explicit IDs are simply aliases for the internal edge ID based on the triple, they do not replace that ID, they simply point to it.  In future versions of KGTK, we might allow edge IDs that are only unique within a file which is OK since they will point to a global ID based on the edge triple.  Since edge IDs are simply aliases, an edge can have multiple IDs defined for it, all pointing to the same triple ID.
 
-Similar mapping to an objec property, except that no schemas will be used as reference under the `items` field. For example, consider a `dateCreated` property that indicates when an item is created:
+## Multi-valued Edges
+As shown above, multi-valued edges can be represented through separate entries in the edge table.  Alternatively, there is a list syntax available using the | separator.   For example, here is an alternative way to represent the multiple sources for one of the edges:
 
-```yaml
-dateCreated:
-          description: Creation date of the item
-          items:
-            type: string
-          nullable: true
-          type: array
 ```
+node1      label      node2     creator      source
+“Curly”    rdf:type   Person    “Hans”       Wikipedia|IMDB
+```
+
+This representation is equivalent to the following:
 
-### rdfs:domain and rdfs:range
+```
+node1       label       node2     creator     source
+“Curly”     rdf:type    Person    “Hans”      Wikipedia
+“Curly”     rdf:type    Person                IMDB
+```
+For value lists care must be taken that individual values must either do not contain vertical bars, or if they do, theythat they must be are escaped by backslash escape syntax.
 
-For each object and datatype property, OBA will analyze their `rdfs:domain` and `rdfs:range` to assign the property in the right schema (using `rdfs:domain`) and use the appropriate reference or datatype (by inspecting `rdfs:range`). At the moment, cardinality constraints are not taken into account in this mapping.
+List values will provide a valuable conciseness when records are viewed by humans. However, they may impost complexity on tools that use KGTK files. We may want to define a KGTK profile that excludes list values.
 
+Multiple values are combined without ordering using a set semantics, duplicates will simply be ignored.
 
-## Other important considerations
+List values are not allowed in node1, label and node2 columns of the edge table.  This simplifies parsing and avoids edge IDs being associated with multiple edges.
 
-All properties are `nullable` (i.e., optional) and are returned as a list. This is because from the development perspective, it is easier to deal with lists (even if they have one element) than having to distinguish whether the object returned is a list or not.
+## Unlabeled and Undirected Edges
+Even though unusual for knowledge graphs, edges might be unlabeled to represent purely structural information more common in standard graph representations.  To represent an unlabeled edge, the label column in the edge file can simply be blank.  By default, edges are assumed to be directed from node1 to node2.  To represent a blank, undirected edge, the special predicate label _ (underscore) can be used.  To represent labeled but undirected edges, the edge label needs to start with an _ (underscore), for example, _brotherOf.
+Node File Format
+Node files allow a more concise node-centric specification of edges.  They have one mandatory column for the node ID (using the predefined name or its alias(es)).  Lines with blank node IDs are ignored. Node files must not contain a node1 column, in order to distinguish node files from edge files, which may contain an id column. We might want to disallow node2 columns from node files, too.All other columns are optional and specify edges where the identified node is node1.  Here is a small example that simply adds labels to our three nodes:
 
-Complex unions and intersections are considered of type `object` instead of a particular schema.
+```
+id      label     
+N1      “Moe”
+N2      “Larry”
+N3      “Curly”
+```
 
-## Class and property documentation
-OBA uses `rdfs:comment`, `skos:definition` and `prov:definition` annotations in the ontology for creating definitions of the classes and properties in OBA. An example can be seen below: 
+A minimal version of the nodes file above would only contain the id column (e.g., to communicate a set of nodes to some operation).  Here is a more elaborate example adding types, creators and sources:
 
-Example:
-```yaml
-Plan:
-      description: A p-plan:Plan is a specific type of prov:Plan. It is composed of smaller steps that use and produce Variables.
-      properties:
-        isSubPlanOfPlan:
-          description: A p-plan:Plan may be a subplan of another bigger p-plan:Plan. p-plan:isSubPlanOfPlan is used to state the link among the two different plans. Note that if p1 is a p-plan:subPlan of p2, p1will not necessarily be a step of p2. A multistep will represent p1 in p2, and link to p1 with the p-plan.hasStepDecomposition relationship.
-        {...} #Rest of the schema ommited for brevity
 ```
+id        label       rdf:type      creator        source
+N1        “Moe”       Person        “Hans”        Wikipedia
+N2        “Larry”     Person        “Hans”        Wikipedia
+N3        “Curly”     Person        “Hans”        Wikipedia|IMDB
+```
+
+The equivalent edge file for the above looks like this.  Note that here the creator and source edges are on nodes and not on edges as in our previous examples:
+
+```
+node1     label       node2
+N1        label       “Moe”
+N1        rdf:type    Person
+N1        creator     “Hans”
+N1        source      Wikipedia
+N2        label       “Larry”   
+N2        rdf:type    Person
+N2        creator     “Hans”
+N2        source      Wikipedia
+N3        label       “Curly”
+N3        rdf:type    Person
+N3        creator     “Hans”
+N3        source      Wikipedia
+N3        source      IMDB
+```
+
+This example illustrates that the node table is simply a slightly more concise, node-centric representation that is most useful for dense edges, that is, edges that have values for most or all nodes.
+
+## Edge Collections and Graphs
+KGTK does not have a specific graph type to collect or name sets of edges (different from RDF).  Instead, edges can be grouped by linking them to collection nodes using the same edge syntax as used for all other edges.  For example, the following edge table assigns the three type edges to the collection Stooges via a graph edge each:
+
+```
+node1     label     node2   graph     
+“Moe”     rdf:type  Person  Stooges
+“Larry”   rdf:type  Person  Stooges
+“Curly”   rdf:type  Person  Stooges
+```
+
+There is nothing special about the label graph used for those edges, any other name could have been used (for example, memberOf).  The above corresponds to the following explicit edge representation:
+
+```
+node1     label     node2   id
+“Moe”     rdf:type  Person  e1
+“Larry”   rdf:type  Person  e2
+“Curly”   rdf:type  Person  e3
+e1        graph     Stooges
+e2        graph     Stooges
+e3        graph     Stooges
+```
+
+
+By defining collection or graph membership via explicit edges, edges can be in more than one graph.
+
+To make it possible to define such membership edges about columnar edges, without having to list all of them explicitly, we introduce a special syntax `*<label>` that can be used in a node or edge file header.  The syntax means that all edges defined in a row by non-star syntax will be a node1 to the specified `<label>` edge.  For example:
+
+```
+node1       label     node2       source          *graph
+“Moe”       rdf:type  Person      Wikipedia       Stooges
+“Larry”     rdf:type  Person      Wikipedia       Stooges
+“Curly”     rdf:type  Person      Wikipedia|IMDB  Stooges
+```
+
+The above corresponds to the following non-starred explicit edge representation which requires us to introduce edge IDs for the base edges so we can list the source edges explicitly:
+
+```
+node1       label       node2     id      source            graph
+“Moe”       rdf:type    Person    e1      Wikipedia         Stooges
+“Larry”     rdf:type    Person    e2      Wikipedia         Stooges
+“Curly”     rdf:type    Person    e3      Wikipedia|IMDB    Stooges
+e1          source      Wikipedia                           Stooges
+e2          source      Wikipedia                           Stooges
+e3          source      Wikipedia                           Stooges
+e3          source      IMDB                                Stooges
+```
+
+In this table, the source column is now redundant, we just left it for continuity with the previous example.
+
+Note that there could be multiple starred labels in a file header, but their edges get only introduced on edges from other non-starred labels, not on each other.  If an edge label starts with a * but should not be interpreted in this way, it can either be used in the explicit edge syntax or it can be escaped in the column header.
+
+## Literals
+Literals are used to represent data values such as  numbers, dimensioned quantities, times, locations, etc.  They can serve anywhere as node or edge IDs or even edge labels.
+
+To make parsing simple, we use a scheme where the first character of a KGTK value indicates whether it is a symbol or literal, and if so what kind of literal. 
+
+Numbers: dimensionless numeric values must start with a digit, `+`, `-`or decimal point and then follow standard fixed-point or scientific notation.  All legal numeric values allowed in Python (including binary, octal, hexadecimal and long integer values) are allowed.  Insignificant leading or trailing zeros are allowed.
+
+Strings: unqualified strings are enclosed in double quotes, for example `“Foo”`.  Special characters such as newlines, quotes, etc. can be escaped with a backslash, for example, `“this is a \”weird\” st\|ring,\tno?\n”`
+
+Booleans: we use two special symbols True and False to indicate boolean values.  They will be handled just like symbols but interpreted as booleans in contexts where that matters.
+
+## Structured Literals
+Structured (or fancy) literals are useful to concisely represent values such as dates or locations that have further internal structure.  For example, `@043.26193/010.92708` represents the location with latitude `043.26193` and longitude `010.92708`.  Structured literals are simply a shorthand that imply additional edges that do not need to be explicitly stated, for example, the latitude and longitude edges leading to the respective numeric values for a location.
+
+**Language-qualified strings**: strings can be qualified with a language tag to indicate the human language used.  We use the RDF convention for this but single quotes to distinguish them from unqualified strings, for example, `‘Sprechen sie deutsch?’@de`.  Language tags are two-letter ISO 639-1 codes.  Example use in edge file:
+```
+node1       label     node2
+N1          label     ‘Curly’@en
+# implied edges:
+‘Curly’@en  text      “Curly”
+‘Curly’@en  language  “en”
+```
+
+**Quantities**: numbers can be dimensioned to represent quantities, e.g., a length such as 5 meters or a weight such as 10 pounds.  For quantities we use a variant of the Wikidata format amount~toleranceUxxxx.  A quantity starts with a number, followed by an optional tolerance interval, and then followed by either a combination of standard (SI) units (see Appendix) or a Wikidata node defining the unit, for example, Q11573 which indicates “meter”.  Here are some examples: `10m, +10m/s2, -1.2e+2[-1.0,+1.0]kg.m/s2, +17.2Q494083`
+
+Example use in edge file:
+
+```
+node1     label     node2
+N1        speed     10.2m/s2
+# implied edges:
+10.2m/s2  magnitude 10.2
+10.2m/s2  unit      “m/s2”
+```
+
+**Location coordinates**: we also use the Wikidata format `@LAT/LON`, for example: `@043.26193/010.92708`
+
+Example use in edge file:
+
+```
+node1                 label     node2
+N1                    location    @043.26193/010.92708
+# implied edges:
+@043.26193/010.92708  latitude  043.26193
+@043.26193/010.92708  longitude 010.92708
+```
+
+**Dates and times**: temporal literals are started with a ^ caret character (indicating the tip of a clock hand) and followed by an ISO 8601 date and an optional precision designator, for example: ^1839-00-00T00:00:00Z/9
+
+Example use in edge file:
+
+```
+node1                 label     node2
+N1                    time      ^2020-02-24T17:05:30
+# implied edges:
+^2020-02-24T17:05:30  year      2020
+^2020-02-24T17:05:30  month     2
+^2020-02-24T17:05:30  day       24
+^2020-02-24T17:05:30  hour      17
+^2020-02-24T17:05:30  minute    5
+^2020-02-24T17:05:30  second    30
+```
+
+## Notes, Issues:
+There is no support for calendar, we could allow it as an optional qnode after the precision designator, eg, `+1839-00-00T00:00:00Z/9/Q12138`
+Here is a link to the TIMEX2 annotation format standardized during the DARPA ACE and TIDES programs: TIMEX2  The interesting parts are mostly part of the string that goes into the VAL attribute, page 13 and onwards.  This is probably overkill for what we want to do, but is good to have as a backup in case we run into limitations
+
+Other types: we reserve the !-character as an extension type that will be followed by a literal and then a scheme or arbitrary type indicating the type of literal (inspired by RDF’s typed literals qualified by data type IRIs).  For example:  !P1..3W^timex3, !12345^^dbpedia:NewTaiwanDollar.  
+
+The single-caret notation is to be used for special schemes such as “timex3” (as yet to be defined), which will expand into a special set of edges for this scheme.  For example, timex3 might use a superset of edges as used for date/time literals.
+
+The double-caret notation is used for arbitrary typed literals.  These will always expand to a pair of value/type edges.  For example:
+
+```
+node1               label     node2
+N1                  price     !1000^^dbpedia:USD
+# implied edges:
+!1000^^dbpedia:USD  value     1000
+!1000^^dbpedia:USD  type      dbpedia:USD
+```
+
+## Object Identity
+We have to define when two node or edge IDs or labels are the same, so KGTK can know when to add something to an existing node or edge, and when to create a new one.  The following rules apply:
+
+All object types are mutually disjoint, that is, symbols can only be equal to symbols, numbers can only be equal to numbers, dimensioned numbers can only be equal to dimensioned numbers, etc.
+Two symbols are the same if their names consist of the exact same sequence of characters (escape characters only used at the surface to support parsing are not counted).
+
+Two literals are identical only if their surface syntax is identical, that is we do not require or assume any kind of normalization (which might be difficult to provide by some non-KGTK components and might also be ambiguous or lossy as for certain floating point numbers).  This means, for example, that 1, 1., 1.0, 01 are all considered to be different literals.  
+Two structured literals are identical only if their surface syntax is identical, and again we do not require or assume any normalization.  This means, for example, that @043/053 and @043.0/053.0 are different locations, and similarly, 1m and 100cm while denoting the same length are considered different.
+
+We might consider relaxing this in some form in the future to provide some form of normalization on literals, or to provide tools that would normalize literals according to some rules.
 
+Edge IDs are based on the (node1, label, node2) triple describing an edge.  Two edge IDs are the same if and only if all their components are identical.
+Possible Extensions
 
+## Blank IDs
+Node and edge IDs will generally have to be globally unique using some UUID standard to allow incremental loading of files without having to worry about name clashes.  Blank IDs (somewhat similar to RDF’s blank nodes) can be used to generate file-local unique IDs that will translate into globally unique IDs looking across files.  A blank ID is guaranteed to refer to the same object within a single node or edge file, but will not clash with the same ID used in a different file.  Blank IDs start with a :-character, for example, :b17.

From 739babe7e30e1dbd0212fb1a345dcd7636a8c850 Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Thu, 14 May 2020 19:32:35 -0700
Subject: [PATCH 205/278] Create requirements.txt

---
 docs/requirements.txt | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 docs/requirements.txt

diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 000000000..898468cb1
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1 @@
+mkdocs-material
\ No newline at end of file

From 7f2d2cde885de2697655f9e6d2bd6abf2cd2c970 Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Thu, 14 May 2020 19:40:25 -0700
Subject: [PATCH 206/278] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index beabf0907..aa869121c 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ KGTK is a Python library for easy manipulation with knowledge graphs. It provide
 
 ## Documentation
 
-To-do.
+https://kgtk.readthedocs.io/en/latest/
 
 ## Features
 

From 86ca0679f6f1e34035595397173e90e019a8485a Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 18 May 2020 10:44:01 -0700
Subject: [PATCH 207/278] Show value options when requested in kgtk cat.

---
 kgtk/cli/cat.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kgtk/cli/cat.py b/kgtk/cli/cat.py
index 618213a17..089775e9e 100644
--- a/kgtk/cli/cat.py
+++ b/kgtk/cli/cat.py
@@ -77,6 +77,7 @@ def run(input_file_paths: typing.List[Path],
         print("input: %s" % " ".join((str(input_file_path) for input_file_path in input_file_paths)), file=error_file)
         print("--output-file=%s" % str(output_file_path), file=error_file)
         reader_options.show(out=error_file)
+        value_options.show(out=error_file)
         print("=======", file=error_file, flush=True)
 
     try:

From 143ea5f2cca48ebaf3c29a0988e902441285cd59 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 18 May 2020 13:41:47 -0700
Subject: [PATCH 208/278] Make --show-options and --very-verbose be expert
 options.

---
 kgtk/io/kgtkreader.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index 9de0bfc8e..b61f042b8 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -1020,13 +1020,15 @@ def h(msg: str)->str:
                                       help=h("Send errors to stdout instead of stderr"),
                                       action="store_true")
 
-        egroup.add_argument(      "--show-options", dest="show_options", help="Print the options selected (default=%(default)s).", action='store_true')
+        if expert:
+            egroup.add_argument(      "--show-options", dest="show_options", help="Print the options selected (default=%(default)s).", action='store_true')
 
         egroup.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages (default=%(default)s).", action='store_true')
 
-        egroup.add_argument(      "--very-verbose", dest="very_verbose",
-                                  help=h("Print additional progress messages (default=%(default)s)."),
-                                  action='store_true')
+        if expert:
+            egroup.add_argument(      "--very-verbose", dest="very_verbose",
+                                      help=h("Print additional progress messages (default=%(default)s)."),
+                                      action='store_true')
         
 def main():
     """

From b97b316a96a6a114f366e8cca797c24bc990c90c Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 18 May 2020 14:18:37 -0700
Subject: [PATCH 209/278] Improve the description of the kgtk cat command.

---
 kgtk/cli/cat.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kgtk/cli/cat.py b/kgtk/cli/cat.py
index 089775e9e..2fab95f1b 100644
--- a/kgtk/cli/cat.py
+++ b/kgtk/cli/cat.py
@@ -17,8 +17,10 @@
 
 def parser():
     return {
-        'help': 'Join two KGTK files',
-        'description': 'Join two KGTK edge files or two KGTK node files. Two passes may be needed, stdin may be forbidden.'
+        'help': 'Concatenate KGTK files',
+        'description': 'Concatenate two or more KGTK files, merging the columns appropriately. ' +
+        'All files must be KGTK edge files or all files must be KGTK node files (unless overridden with --mode=NONE). ' +
+        '\n\nAdditonal options are shown in expert help.\nkgtk --expert cat --help'
     }
 
 

From d7a25987d86cf839894373ac3b836543aa152607 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 18 May 2020 14:21:28 -0700
Subject: [PATCH 210/278] Fix typo.

---
 kgtk/cli/cat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/cli/cat.py b/kgtk/cli/cat.py
index 2fab95f1b..a36a244af 100644
--- a/kgtk/cli/cat.py
+++ b/kgtk/cli/cat.py
@@ -20,7 +20,7 @@ def parser():
         'help': 'Concatenate KGTK files',
         'description': 'Concatenate two or more KGTK files, merging the columns appropriately. ' +
         'All files must be KGTK edge files or all files must be KGTK node files (unless overridden with --mode=NONE). ' +
-        '\n\nAdditonal options are shown in expert help.\nkgtk --expert cat --help'
+        '\n\nAdditional options are shown in expert help.\nkgtk --expert cat --help'
     }
 
 

From 8696f78f6b253d64d5107cefb33470ab6b59fd1c Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Mon, 18 May 2020 15:39:47 -0700
Subject: [PATCH 211/278] boilerplate for doc

---
 docs/embedding.md     |  1 +
 docs/filter.md        | 38 +++++++++++++++++++++++
 docs/install.md       | 71 +++++++++++++++++++++++++++++++++++++++++++
 docs/instances.md     |  1 +
 docs/loader.md        |  1 +
 docs/merge.md         |  1 +
 docs/remove_col.md    |  1 +
 docs/rnodes.md        |  1 +
 docs/sort.md          |  1 +
 docs/specification.md |  4 ++-
 docs/validate.md      |  1 +
 docs/wikidata.md      |  1 +
 docs/zconcat.md       |  1 +
 mkdocs.yml            | 13 ++++++++
 14 files changed, 135 insertions(+), 1 deletion(-)
 create mode 100644 docs/embedding.md
 create mode 100644 docs/filter.md
 create mode 100644 docs/install.md
 create mode 100644 docs/instances.md
 create mode 100644 docs/loader.md
 create mode 100644 docs/merge.md
 create mode 100644 docs/remove_col.md
 create mode 100644 docs/rnodes.md
 create mode 100644 docs/sort.md
 create mode 100644 docs/validate.md
 create mode 100644 docs/wikidata.md
 create mode 100644 docs/zconcat.md

diff --git a/docs/embedding.md b/docs/embedding.md
new file mode 100644
index 000000000..2a73a37a1
--- /dev/null
+++ b/docs/embedding.md
@@ -0,0 +1 @@
+Work in progress
diff --git a/docs/filter.md b/docs/filter.md
new file mode 100644
index 000000000..bcfb5e4d8
--- /dev/null
+++ b/docs/filter.md
@@ -0,0 +1,38 @@
+The filter command is used to select edges from an edge file. The initial implementation will use a simple language, but we may in the future extend it to be similar to graphy. The initial implementation also ignores reification.
+
+Filters are specified using patterns of the form
+    subject-pattern ; predicate-pattern ; object-pattern
+Each of the patterns can consist of a list of symbols separated using commas.
+
+## Usage
+
+```bash
+kgtk filter -p PATTERN OPTIONS INPUT
+```
+- `OPTIONS` allow the user to specify different column headers for node1/subject (argument: `--subj`), label/predicate (argument: `--pred`), node2/object (argument: `--obj`)
+- `INPUT` can be a filename or empty if piped from another command
+
+## Examples
+
+Select all edges that have property P154. The property is called ‘prop’ in this file
+
+```bash
+kgtk filter -p “ ; P154 ; “ --pred ‘prop’ INPUT
+```
+
+Select all edges that have properties P154 or P983
+
+```bash
+kgtk filter -p “ ; P154, P983 ; “ INPUT
+```
+
+Select all edges that have properties P154 or P983 and object Q12
+
+```bash
+kgtk filter -p “ ; P154, P983 ; Q12 “ INPUT
+```
+
+Select all edges that have subject Q31 or Q45
+```bash
+kgtk filter -p “ Q32, Q45 ; ; “ INPUT
+```
diff --git a/docs/install.md b/docs/install.md
new file mode 100644
index 000000000..6daef3bdb
--- /dev/null
+++ b/docs/install.md
@@ -0,0 +1,71 @@
+## Steps for installing KGTK
+
+**Before you start**:  Our installation will use a conda environment. If you don't have a conda installed, follow this [link](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) to install it.
+
+1. Set up your own conda environment:
+```
+conda create -n kgtk-env python=3.7
+conda activate kgtk-env
+```
+ **Note:** Installing Graph-tool is problematic on python 3.8 and out of a virtual environment. Thus: **the advised installation path is by using a virtual environment.**
+
+2. Install (the dev branch at this point): `pip install kgtk`
+
+You can test if `kgtk` is installed properly now with: `kgtk -h`.
+
+3. Install `graph-tool`: `conda install -c conda-forge graph-tool`. If you don't use conda or run into problems, see these [instructions](https://git.skewed.de/count0/graph-tool/-/wikis/installation-instructions). 
+
+4. Install `mlr`. Depending on your environment, you can run one of the following:
+  * `brew update && brew install miller` (on mac)
+  * `sudo port selfupdate && sudo port install miller` (on mac)
+  * `sudo apt-get install miller` (linux)
+  * `sudo apt install miller` (linux)
+  * `sudo yum install miller` (linux)
+  
+More installation options for `mlr` can be found [here](https://johnkerl.org/miller/doc/build.html).
+
+## Running KGTK commands
+
+To list all the available KGTK commands, run:
+
+`kgtk -h`
+
+To see the arguments of a particular commands, run:
+
+`kgtk <command> -h`
+
+An example command that computes instances of the subclasses of two classes:
+
+`kgtk instances --transitive --class Q13442814,Q12345678`
+
+## Additional information
+
+### The Miller Package
+
+1. Our code uses the "miller" package to manipulate formatted data.
+
+2. TheGitHub repository for miller is:
+```
+https://github.com/johnkerl/miller
+```
+3. The documentaton is:
+```
+https://www.mankier.com/1/mlr
+```
+4. You may need to install the miller command (mlr) on your system.
+   * OpenSUSE Tumbleweed Linux: install package `miller` from Main Repository (OSS)
+
+### List of supported tools
+* `instances`
+* `reachable_nodes`
+* `filter`
+* `text_embedding`
+* `remove_columns`
+* `sort`
+* `gt_loader`
+* `merge_identical_nodes`
+* `zconcat`
+* `export_neo4j`
+
+To get an information on how to use each of them, run:
+`kgtk [TOOL] -h`
diff --git a/docs/instances.md b/docs/instances.md
new file mode 100644
index 000000000..2a73a37a1
--- /dev/null
+++ b/docs/instances.md
@@ -0,0 +1 @@
+Work in progress
diff --git a/docs/loader.md b/docs/loader.md
new file mode 100644
index 000000000..2a73a37a1
--- /dev/null
+++ b/docs/loader.md
@@ -0,0 +1 @@
+Work in progress
diff --git a/docs/merge.md b/docs/merge.md
new file mode 100644
index 000000000..2a73a37a1
--- /dev/null
+++ b/docs/merge.md
@@ -0,0 +1 @@
+Work in progress
diff --git a/docs/remove_col.md b/docs/remove_col.md
new file mode 100644
index 000000000..2a73a37a1
--- /dev/null
+++ b/docs/remove_col.md
@@ -0,0 +1 @@
+Work in progress
diff --git a/docs/rnodes.md b/docs/rnodes.md
new file mode 100644
index 000000000..2a73a37a1
--- /dev/null
+++ b/docs/rnodes.md
@@ -0,0 +1 @@
+Work in progress
diff --git a/docs/sort.md b/docs/sort.md
new file mode 100644
index 000000000..2a73a37a1
--- /dev/null
+++ b/docs/sort.md
@@ -0,0 +1 @@
+Work in progress
diff --git a/docs/specification.md b/docs/specification.md
index 4b776f6e2..1468d66bb 100644
--- a/docs/specification.md
+++ b/docs/specification.md
@@ -1,6 +1,8 @@
 ## KGTK File Format
 
-**Authors:** Hans Chalupsky, Craig Roger, Pedro Szekely
+**Authors:** Hans Chalupsky, Craig Milo Rogers, Pedro Szekely
+
+**Contributors:** Daniel Garijo
 
 **Version:** 2.0
 
diff --git a/docs/validate.md b/docs/validate.md
new file mode 100644
index 000000000..2a73a37a1
--- /dev/null
+++ b/docs/validate.md
@@ -0,0 +1 @@
+Work in progress
diff --git a/docs/wikidata.md b/docs/wikidata.md
new file mode 100644
index 000000000..2a73a37a1
--- /dev/null
+++ b/docs/wikidata.md
@@ -0,0 +1 @@
+Work in progress
diff --git a/docs/zconcat.md b/docs/zconcat.md
new file mode 100644
index 000000000..2a73a37a1
--- /dev/null
+++ b/docs/zconcat.md
@@ -0,0 +1 @@
+Work in progress
diff --git a/mkdocs.yml b/mkdocs.yml
index f737063fe..9b525d737 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -1,7 +1,20 @@
 site_name: KGTK documentation
 nav:
   - Home: index.md
+  - Getting started: install.md
   - KGTK file specification: specification.md
+  - 'KGTK commands':
+      - 'filter': 'filter.md'
+      - 'generate_wikidata_triples': 'wikidata.md'
+      - 'gtloader': 'loader.md'
+      - 'instances': 'instances.md'  
+      - 'merge_identical_nodes': 'merge.md'
+      - 'reachable_nodes': 'rnodes.md'
+      - 'remove_columns': 'remove_col.md'
+      - 'sort': 'sort.md'
+      - 'text_embeddings': 'embedding.md'
+      - 'validate': 'validate.md'
+      - 'zconcat': 'zconcat.md'
 theme:
   name: material
 

From 08c874f58a6d5fd55ec80ec63b0d6012ed810df2 Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Mon, 18 May 2020 15:57:59 -0700
Subject: [PATCH 212/278] Update wikidata.md

---
 docs/wikidata.md | 193 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 192 insertions(+), 1 deletion(-)

diff --git a/docs/wikidata.md b/docs/wikidata.md
index 2a73a37a1..bb8b22bce 100644
--- a/docs/wikidata.md
+++ b/docs/wikidata.md
@@ -1 +1,192 @@
-Work in progress
+This command will generate wikidata triples from two edge files:
+
+- A statement and qualifier edge file that contains an edge id, node1, label, and node2
+- A kgtk file that contains the mapping information from property identifier to its datatype
+
+## Usage
+
+```bash
+KGTK generate_wikidata_triples OPTIONS
+```
+**OPTIONS**:
+
+`--uri-prefix {string}`: in this version we will use the wikidata prefixes. In a future version we will allow the user to specify a prefix.
+
+`--label-property {p1, p2, ...}`: the properties that will produce Wikidata labels
+
+`--alias-property {p1, p2, …}`: the properties that will produce Wikidata aliases
+
+`--description-property {p1, p2, …}`: the properties that will produce Wikidata descriptions
+
+`--property-types {file}`: a file that provides the type of each property present in the edge file 
+
+`--generate-truthy`: the default is to not generate truthy triples. Specify this option to generate truthy triples (future version)
+
+`--ignore {yes|no}` :  if set to yes, ignore various kinds of exceptions and mistakes and log them to a log file with line number in input file, rather than stopping.
+
+`--output-n-lines: {int}`: output triples approximately every N lines of reading stdin. Because of the calling of ETK API, it is inefficient or impossible (consider potential statementless qualifier edge) to pipe after reading/processing every line. Set this number to improve the efficiency and the algorithm will make sure there is no stateless qualifier. Note that different n may give slightly different output if label edge, description edge and alias edges happen to be splitted.  
+
+`--generate-truthy -gt {yes|no}` If Set to true, generate the truthy statements
+
+`--line-by-line -lbl {yes|no}`
+
+`--use-gz -gz {yes|no}`
+
+
+## Properties File
+The properties file is an edge file with the following format:
+```
+node1   label           node2
+P1      property_type   item
+P2      property_type   quantity
+```
+
+The type of a property is called datatype in the json dump. 
+
+The code supports several data type: External identifier and URLValue. Currently the code support 8 property types:
+
+1- Item
+2- Quantity
+3- Globe-coordinate
+4- Time
+5- Monolingualtext
+6- Url
+7- External identifier
+8- String
+
+Note: for now, the prop_types.tsv must use the following node2 column values to specify the property type. For example, globe_coordinate will be illegal. It must be globe-coordinate.
+
+```
+node1   label               node2
+P1      property_type       item
+P2      property_type       quantity
+P3      property_type       globe-coordinate
+P4      property_type       time
+P5      property_type       monolingualtext
+P6      property_type       string
+P7      property_type       url
+P8      property_type       external-identifier
+```
+
+In ETK the possible property types are defined in 
+```
+class Datatype(Enum):
+    Item = Item
+    Property = Property
+    ExternalIdentifier = ExternalIdentifier
+    QuantityValue = QuantityValue
+    TimeValue = TimeValue
+    StringValue = StringValue
+    URLValue = URLValue
+    GlobeCoordinate = GlobeCoordinate
+    MonolingualText = MonolingualText
+```
+
+## Handling Different Types Of Edges
+
+### Label Properties
+This case applies to all edge labels in the label-property option. For example:
+```
+Q123     label     ‘Hello’@en
+```
+
+Expected output that ETK should generate:
+```
+ wd:Q123 rdfs:label     "Hello"@en .
+ wd:Q123 skos:prefLabel "Hello"@en .
+ wd:Q123 schema:name    "Hello"@en .
+ ```
+
+Note: should check that there is a single label statement for each language.
+
+
+### Alias Properties
+This case applies to all edge labels in the alias-property option. For example:
+```
+Q123    alias   ‘Howdy’@en
+Q123    alias   ‘Hola’@sp
+```
+Expected output that ETK should generate:
+
+```
+ wd:Q123 skos:altLabel "Howdy"@en .
+ wd:Q123 skos:altLabel "Hola"@sp .
+```
+
+### Description Properties
+This case applies to all edge labels in the description-property option. For example:
+
+```
+Q123    description     ‘A form of salutation’@en
+Q123    description     ‘Saludo’@sp
+```
+
+Expected output that ETK should generate:
+
+```
+ wd:Q123 schema:description "A form of salutation"@en .
+ wd:Q123 schema:description "Saludo"@sp .
+```
+
+### Property Declarations
+This case applies to edges whose type is property. For example:
+
+Obtain additional property_type information from property_types.tsv to enhance the output.
+```
+P22     type    property
+```
+
+Expected output that ETK should generate:
+
+```
+  wd:P22 a wikibase:Property ;
+     wikibase:directClaim wdt:P22 ;
+     wikibase:claim p:P22 ;
+     wikibase:statementProperty ps:P22 ;
+     wikibase:statementValue psv:P22 ;
+     wikibase:qualifier pq:P22 ;
+     wikibase:qualifierValue pqv:P22 ;
+     wikibase:reference pr:P22 ;
+     wikibase:referenceValue prv:P22 ;
+     wikibase:novalue wdno:P22 .
+```
+
+## Regular Edges
+This case applies to edges not covered by the previous cases. For example:
+```
+Q3  P2  Q123
+Q3  P7  123[-1.0,+1.0]
+Q3  P7  89
+```
+
+Expected output that ETK should generate. The highlighted parts are created when the truthy option is set to “yes”.
+```
+  wd:Q3 
+     wdt:P7 "value1", "value2" ;
+     wdt:P2 wd:Q3 ;
+     p:P2 wds:Q3-4cc1f2d1-490e-c9c7-4560-46c3cce05bb7 ;
+     p:P7 wds:Q3-24bf3704-4c5d-083a-9b59-1881f82b6b37,
+          wds:Q3-45abf5ca-4ebf-eb52-ca26-811152eb067c .
+```
+
+The triples generated for each statement depend on the property type of the property. ETK has an API to handle all the different types of property values. Here are some examples taken from the RDF Dump Format page in the Wikidata documentation.
+
+```
+wds:Q3-24bf3704-4c5d-083a-9b59-1881f82b6b37 a wikibase:Statement, wikibase:BestRank ;
+     ps:P7 "123"^^xsd:decimal ;
+     psv:P7 wdv:382603eaa501e15688076291fc47ae54 ;
+     psn:P7 wdv:85374998f22bda54efb44a5617d76e51 .
+
+ wdv:382603eaa501e15688076291fc47ae54 a wikibase:QuantityValue ;
+     wikibase:quantityAmount "+123"^^xsd:decimal ;
+     wikibase:quantityUpperBound "+124"^^xsd:decimal ;
+     wikibase:quantityLowerBound "+122"^^xsd:decimal ;
+     wikibase:quantityUnit <http://www.wikidata.org/entity/Q218593> ;
+     wikibase:quantityNormalized wdv:85374998f22bda54efb44a5617d76e51.
+```
+
+
+
+
+
+

From 0dd9b52b9e133f357633cdf38bfe496c184c385f Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Mon, 18 May 2020 16:10:56 -0700
Subject: [PATCH 213/278] typo and update

---
 docs/loader.md   | 35 ++++++++++++++++++++++++++++++++++-
 docs/wikidata.md |  2 +-
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/docs/loader.md b/docs/loader.md
index 2a73a37a1..c57b945f9 100644
--- a/docs/loader.md
+++ b/docs/loader.md
@@ -1 +1,34 @@
-Work in progress
+This command loads a TSV edges file into Graph-tool. Optionally, compute centrality metrics, and, optionally, dump the resulting graph-tool (.gt) object to disk. Prints the resulting edge file to stdout.
+
+## Usage
+```
+kgtk gt_loader [-h] [--directed] [--degrees] [--pagerank] [--hits]
+                      [--log LOG_FILE] [-o OUTPUT]
+                      filename
+```
+
+positional arguments:
+```
+  filename              filename here
+```
+
+optional arguments:
+```
+  -h, --help            show this help message and exit
+  --directed            Is the graph directed or not?
+  --degrees             Whether or not to compute degree distribution.
+  --pagerank            Whether or not to compute PageRank centraility.
+  --hits                Whether or not to compute HITS centraility.
+  --log LOG_FILE        Log file for summarized statistics of the graph.
+  -o OUTPUT, --out OUTPUT
+                        Graph tool file to dump the graph too - if empty, it
+                        will not be saved.
+```
+
+## Examples
+
+Import a TSV file into Graph-tool, and compute degrees, pagerank and hits. We store the result to disk, and the statistics to log.txt. 
+
+```
+kgtk gt_loader --directed --degrees --pagerank --hits --out file.gt --log log.txt ./data/conceptnet_first10.tsv
+```
diff --git a/docs/wikidata.md b/docs/wikidata.md
index bb8b22bce..3692ad6fb 100644
--- a/docs/wikidata.md
+++ b/docs/wikidata.md
@@ -95,7 +95,7 @@ Expected output that ETK should generate:
  wd:Q123 rdfs:label     "Hello"@en .
  wd:Q123 skos:prefLabel "Hello"@en .
  wd:Q123 schema:name    "Hello"@en .
- ```
+```
 
 Note: should check that there is a single label statement for each language.
 

From 3d0c0048ac7ed16b003efe7d0f730ffb08206133 Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Mon, 18 May 2020 16:21:31 -0700
Subject: [PATCH 214/278] updates

---
 docs/merge.md      | 22 +++++++++++++++++++++-
 docs/remove_col.md | 25 ++++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/docs/merge.md b/docs/merge.md
index 2a73a37a1..bf115d3a4 100644
--- a/docs/merge.md
+++ b/docs/merge.md
@@ -1 +1,21 @@
-Work in progress
+!!! warning
+    This command is under testing
+
+Given a nodes and an edges file (!) in TSV format, collapse the nodes that are connected with a same-as relation. Reflect these changes both in the nodes and in the edges file. Remove the same-as relations from the edge file.
+
+After the merge, the identical rows are deduplicated.
+
+## Usage
+```
+kgtk merge_identical_nodes -ef EDGEFILE -nf NODEFILE [-l SAMEASLBL]
+EDGEFILE is an edge file in TSV format
+NODEFILE is a node file in TSV format
+SAMEASLBL is a same-as relation that is used to indicate identity 
+```
+
+## Examples
+Merge nodes connected with a ‘mw:SameAs’ relation
+```
+kgtk merge_identical_nodes -ef data/edges.tsv -nf data/nodes.tsv -l “mw:SameAs”
+```
+
diff --git a/docs/remove_col.md b/docs/remove_col.md
index 2a73a37a1..6d66521fc 100644
--- a/docs/remove_col.md
+++ b/docs/remove_col.md
@@ -1 +1,24 @@
-Work in progress
+Command to remove a subset of the columns from a TSV file. For instance, remove “id” and “docid” from a Wikidata edges file.
+
+## Usage
+```
+kgtk remove_columns -c COLUMNS INPUT
+INPUT can be a filename or empty if piped from another command
+```
+## Examples
+Remove the columns ‘other’ and ‘pos’ from the conceptnet CSKG file
+```
+kgtk remove_columns -c "other, pos" data/conceptnet_first10.tsv
+```
+
+Remove id and docid from Wikidata edges file
+```
+kgtk remove_columns -c “id, docid” data/wikidata_edges.tsv
+```
+
+Remove id and docid from Wikidata edges file piped from another command
+
+```
+gzcat wikidata_edges.tsv.gz | kgtk remove_columns -c “id, docid”
+```
+

From 4cb5cc9dd5d77f613790c15106c457791585e584 Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Mon, 18 May 2020 16:36:50 -0700
Subject: [PATCH 215/278] more udpates

---
 docs/rnodes.md   |  30 +++++-
 docs/sort.md     |  65 +++++++++++-
 docs/validate.md | 253 ++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 345 insertions(+), 3 deletions(-)

diff --git a/docs/rnodes.md b/docs/rnodes.md
index 2a73a37a1..85a0e4a66 100644
--- a/docs/rnodes.md
+++ b/docs/rnodes.md
@@ -1 +1,29 @@
-Work in progress
+Given a set of nodes N and a set of properties P, this command computes the set of nodes R that can be reached from N via paths containing any of the properties in P.
+
+The output is an edge file with three columns:
+- subject: a node in the input set N
+- property: reachable, or whatever property is provided as the closure property
+- object: a node reachable from N via the input properties
+
+## Usage:
+```
+kgtk reachable_nodes OPTIONS
+Options:
+--root {n1, n2, …}: the starting nodes
+--property {p1, p2, …}: properties to traverse to compute closure. A property preceded with a minus sign (e.g., -p1) must be followed in the reverse direction.
+--output-property {p}: the name of the property to represent the closure, default reachable.
+--output-object: when supplied, the output will contain only the distinct values in the third column (object).
+```
+
+## Examples
+
+All nodes reachable vi p1 and p2 starting from n1, n2, n3
+```
+kgtk reachable_nodes --property p1,p2 --root n1,n2,n3
+```
+
+Closure of the subclass property starting from every node in roots.tsv
+```
+kgtk reachable_nodes --property kgtk:subclass-of <(cat roots.tsv)
+```
+
diff --git a/docs/sort.md b/docs/sort.md
index 2a73a37a1..573397116 100644
--- a/docs/sort.md
+++ b/docs/sort.md
@@ -1 +1,64 @@
-Work in progress
+This command will sort any KGTK file on one or more columns. If more than one column is given, columns are compared in the order listed (not in the order they appear in the file).  Data is sorted in ascending order by default, but can also be sorted in reverse.  The command expects a KGTK file with a header line which will be included in the sorted output.
+
+## Usage
+```
+kgtk sort [-h] [-o OUTPUT] [-c COLUMNS] [-r] [--tsv] [--csv]
+                        [--space] [--speed] [-X EXTRA] [-dt _DT] [INPUT]
+```
+
+positional arguments:
+```
+  INPUT                 input file to sort, if empty or `-' process stdin
+```
+
+optional arguments:
+```
+  -h, --help            show this help message and exit
+  -o OUTPUT, --out OUTPUT
+                        output file to write to, otherwise output goes to stdout
+  -c COLUMNS, --column COLUMNS, --columns COLUMNS
+                        comma-separated list of column names or numbers
+                        (1-based) to sort on, defaults to 1
+  -r, --reverse         generate output in reverse sort order
+  --tsv                 assume tab-separated input (default)
+  --csv                 assume comma-separated input (for non-KGTK files)
+  --space               space-optimized configuration for sorting large files
+  --speed               speed-optimized configuration for sorting large files
+  -X EXTRA, --extra EXTRA
+                        extra options to supply to the Unix sort program
+  -dt _DT, --datatype _DT
+                        Deprecated: datatype of the input file, e.g., tsv (default) or csv.
+```
+
+Input files can be piped in from stdin or named explicitly.  They can also be optionally compressed and will transparently be decompressed by zconcat.  Columns can be specified by the names used in the file header line, as 1-based positions, or through the pre-defined positions of reserved names such as `subject', etc.  Column names found in the header will override any predefined positions.
+
+## Examples
+Sort the conceptnet CSKG file based on label and node2
+```
+kgtk sort -c label,node2 data/conceptnet_first10.tsv
+```
+
+Sort a file piped from another command based on label and node2 
+```
+gzcat wikidata_edges.tsv.gz | kgtk sort -c label,node2
+```
+
+Sort a compressed file to a named output file:
+```
+kgtk sort -c 'label, id' -o nodes-sort.tsv nodes-shuf.tsv.gz
+```
+
+Sort a compressed file from stdin to stdout:
+```
+cat nodes-shuf.tsv.gz | kgtk sort -c 'label, id' | head -5
+id    label    type    descriptions    aliases    document_id
+Q28415        item    'railway station'@en        wikidata-20200203
+Q45582        item    'Polish literary award'@en        wikidata-20200203
+Q45877        item    'television series'@en        wikidata-20200203
+Q45886        item            wikidata-20200203
+```
+
+Sort a large file with speed optimizations on:
+```
+cat nodes.tsv.gz | kgtk sort -c 'id, label' --speed -X ' -T /tmp' -o nodes-sorted.tsv
+```
diff --git a/docs/validate.md b/docs/validate.md
index 2a73a37a1..520b60c3a 100644
--- a/docs/validate.md
+++ b/docs/validate.md
@@ -1 +1,252 @@
-Work in progress
+Command that will validate that a KGTK file complies with the specification in KGTK File Format v2. Currently, validation is limited to header column names and data column counts. It does not yet validate that headers and cells are compliant with the KGTK data type rules.
+
+
+## Usage
+usage: 
+```
+kgtk validate [-h]
+                     [--blank-id-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--blank-node1-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--blank-node2-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--blank-required-field-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--column-separator COLUMN_SEPARATOR]
+                        [--comment-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--compression-type COMPRESSION_TYPE]
+                     [--empty-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--errors-to-stdout] [--error-limit ERROR_LIMIT]
+                     [--fill-short-lines]
+                     [--force-column-names FORCE_COLUMN_NAMES [FORCE_COLUMN_NAMES ...]]
+                     [--gzip-in-parallel] [--gzip-queue-size GZIP_QUEUE_SIZE]
+                     [--header-error-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--invalid-value-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                        [--long-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--mode {NONE,EDGE,NODE,AUTO}]
+                     [--short-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--skip-first-record] [--truncate-long-lines] [-v]
+                        [--unsafe-column-name-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--very-verbose]
+                     [--whitespace-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [kgtk_file [kgtk_file …]]
+```
+
+positional arguments:
+```
+  kgtk_file             The KGTK file(s) to validate. May be omitted or '-' for stdin.
+```
+
+optional arguments:
+```
+  -h, --help            show this help message and exit
+  --blank-id-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a blank id field is detected.
+  --blank-node1-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a blank node1 field is detected.
+  --blank-node2-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a blank node2 field is detected.
+  --blank-required-field-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a line with a blank node1, node2, or
+                        id field (per mode) is detected.
+  --column-separator COLUMN_SEPARATOR
+                        Column separator.
+  --comment-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a comment line is detected.
+  --compression-type COMPRESSION_TYPE
+                        Specify the input file compression type, otherwise use the
+                        extension.
+  --empty-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when an empty line is detected.
+  --errors-to-stdout    Send errors to stdout instead of stderr
+  --error-limit ERROR_LIMIT
+                        The maximum number of errors to report before failing
+  --fill-short-lines    Fill missing trailing columns in short lines with empty
+                        values.
+  --force-column-names FORCE_COLUMN_NAMES [FORCE_COLUMN_NAMES ...]
+                        Force the column names.
+  --gzip-in-parallel    Execute gzip in parallel.
+  --gzip-queue-size GZIP_QUEUE_SIZE
+                        Queue size for parallel gzip.
+  --header-error-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a header error is detected Only ERROR
+                        or EXIT are supported.
+  --header-only Process only the header of the input file.
+  --invalid-value-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a a data cell value is invalid.
+  --long-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a long line is detected.
+  --mode {NONE,EDGE,NODE,AUTO}
+                        Determine the KGTK input file mode.
+  --short-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take whe a short line is detected.
+  --skip-first-record   Skip the first record when forcing column names.
+  --truncate-long-lines
+                        Remove excess trailing columns in long lines.
+  --unsafe-column-name-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a column name is unsafe.
+  -v, --verbose         Print additional progress messages.
+  --very-verbose        Print additional progress messages.
+  --whitespace-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a whitespace line is detected.
+```
+
+## Additional Usage Notes
+### kgtk_file
+The input file may be specified by path.  The file path “-” is reserved for standard input; omitting the input file also defaults to standard input. Multiple files may be specified.
+
+### --blank-id-line-action
+KGTK File Format v2 specifies that lines in node files that contain empty values in the id column (or an allowable alias) are to be ignored.
+
+|Action keyword|Action when condition detected|
+|--------------|------------------------------|
+|PASS|Silently allow the data line to pass through|
+|REPORT|Report the data line and let it pass through|
+|EXCLUDE|Silently exclude (ignore) the data line|
+|COMPLAIN|Report the data line and exclude (ignore) it|
+|ERROR|Raise a ValueError|
+|EXIT|sys.exit(1)|
+
+### --blank-node1-line-action
+KGTK File Format v2 specifies that lines in edge files that contain empty values in the node1 column (or an allowable alias) are to be ignored. 
+
+### --blank-node2-line-action
+KGTK File Format v2 specifies that lines in edge files that contain empty values in the node2 column (or an allowable alias) are to be ignored. 
+
+### --blank-required-field-line-action
+This option is intended for use in auto detection mode.  It supplies the default value for --blank-id-line-action for node files and the default values for --blank-node1-line-actin and 
+
+### --blank-node2-line-action for edge files.
+KGTK File Format v2 specifies that lines containing only whitespace are to be ignored. 
+
+### --column-separator
+KGTK File Format v2 specifies that columns are separated by the tab character. The column separator may be overridden to allow a different separator, such as a comma, although there may be complications, such as comma characters inside quoted strings.
+### --compression-type
+If the input path ends with one of the following extensions, it will be automatically decompressed. Alternatively, the --compression-type option may be specified to force the selection of a specific decompressor.
+
+|Extension|Decompression|
+|---------|-------------|
+|.bz2|bzip2|
+|.gz|gzip|
+|.lz4|lz4|
+|.xz|lzma|
+
+### --comment-line-action
+KGTK File Format v2 specifies that lines beginning with “#” are comment lines.
+
+### --empty-line-action
+KGTK File Format v2 specifies that empty lines (a special case of whitespace lines) should be ignored. 
+
+### --errors-to-stdout
+Error messages are normally written to stdout. This option causes error messages to be written to stdout, which is occasionally useful when debugging.
+
+### --error-limit
+Ths maximum number of errors to report before failing. The default value is 1000.
+
+### --force-column-names
+Supply a set of column names to either override the first line of the input file or to supply column headers, when missing from the input file (see --skip-first-record). The column names are a whitespace separated list.
+
+### --gzip-in-parallel
+This option runs the select decompressor or compressor in a parallel process. This currently results in degraded performance, but it may be possible to gain a performance advantage with more sophisticated inter-process communication.
+
+### --gzip-queue-size
+This is an implementation parameter for the (de)compression parallelization.
+
+### --header-error-action
+The action to take if a header error is detected, such as:
+
+- An empty column name
+- A duplicate column name
+- A missing required column name for an edge or node file
+- An ambiguous required column name (e.g., ‘id’ and ‘ID’ are both present)
+Only ERROR and EXIT actions are implemented for header errors.
+
+### --invalid-value-action
+The action to take if a data cell does not meet the data type requirements given in the KGTK File Format v2.
+
+- Numbers
+- Strings
+- Language-qualified strings
+- Date and times
+- Location coordinates
+- Symbols
+- Quantities are not recognized yet.
+
+
+The default is to check for valid values, complain about a row with any invalid values, and continue to process the row.  If you select the PASS action, then data cell value validation will be bypassed, with significant performance benefits.
+
+### --long-line-action
+KGTK File Format v2 specifies that data lines should have the same number of fields as there are columns.
+
+### --mode
+Determine the KGTk input file mode.
+
+|Mode|Meaning|
+|----|-------|
+|NONE|Do not require node1, node1, or id columns|
+|EDGE|Treat the input file as a KGTK edge file and require the |presence of node1 and node2 columns or their allowable aliases.
+|NODE|Treat the input file as a KGTK node file and require the presence of an id column or its allowable alias (ID).|
+|AUTO|Automatically determine if an input file is an edge file or a node file. If a node1 (or allowable alias) column is present, assume that the file is a KGTK edge file. Otherwise, assume that it is a KGTK node file|
+
+### --short-line-action
+KGTK File Format v2 specifies that data lines should have the same number of fields as there are columns. 
+
+### --skip-first-record
+When --force-column-names has supplied a set of column names, this option may be supplied to indicate that the forced column names should replace the first (header) line of the input file.
+
+### --unsafe-column-name
+The action to take if a header column name contains one of the following:
+- Leading white space
+- Trailing white space
+- Internal white space except in strings or language-qualified strings
+- Commas
+- Vertical bars
+- Semicolons
+  
+### --whitespace-line-action
+KGTK File Format v2 specifies that data lines containing only whitespace characters should be ignored. 
+
+## Examples
+In this example, the input file has spaces instead of tabs in the header line.
+```bash
+python3 -m kgtk validate -v ../../drive/datasets/edges-v2-property-stats-labeled.tsv
+Validating '../../drive/datasets/edges-v2-property-stats-labeled.tsv'
+KgtkReader: File_path.suffix: .tsv
+KgtkReader: reading file ../../drive/datasets/edges-v2-property-stats-labeled.tsv
+header: id      count   label
+In input header 'id      count   label': Column name 'id      count   label' contains internal white space
+node1 column not found, assuming this is a KGTK node file
+In input header 'id      count   label': Missing required column: id | ID
+Exit requested
+```
+
+In this example, some of the data lines are missing columns.
+```bash
+Validating '../../drive/datasets/edges-v3-short-ids-extra-columns.tsv.gz'
+KgtkReader: File_path.suffix: .gz
+KgtkReader: reading gzip ../../drive/datasets/edges-v3-short-ids-extra-columns.tsv.gz
+header: id      node1   label   node2   magnitude       unit    lower   upper   latitude        longitude       precision       calendar        entity-type
+node1 column found, this is a KGTK edge file
+KgtkReader: Reading an edge file. node1=1 label=2 node2=3
+In input data line 1445572, Required 13 columns, saw 4: '__1445572      Q503323 P3625   &"': __1445572  Q503323 P3625   &"
+In input data line 1445582, saw an empty line:
+In input data line 1445583, Required 13 columns, saw 1: 'Q503323': Q503323
+In input data line 1445584, Required 13 columns, saw 2: '       P2859':         P2859
+In input data line 1445585, Required 13 columns, saw 11: '      6"                                                                      ':      6"
+In input data line 2237571, Required 13 columns, saw 4: '__2237558      Q864677 P3625   r"': __2237558  Q864677 P3625   r"
+In input data line 2237581, saw an empty line:
+In input data line 2237582, Required 13 columns, saw 1: 'Q864677': Q864677
+In input data line 2237583, Required 13 columns, saw 2: '       P3917':         P3917
+In input data line 2237584, Required 13 columns, saw 2: '       +123':  +123
+In input data line 2237585, Required 13 columns, saw 2: '       +123':  +123
+In input data line 2237594, saw an empty line:
+```
+In this example, the KGTk file starts with a comment instead of a header line.
+
+```bash
+Validating '../../drive/datasets/edges-v2-property-stats.tsv'
+KgtkReader: File_path.suffix: .tsv
+KgtkReader: reading file ../../drive/datasets/edges-v2-property-stats.tsv
+header: # > date; zcat edges-v2.csv.gz | cut -f 3 | sort | uniq -c > property-stats.log; date
+In input header '# > date; zcat edges-v2.csv.gz | cut -f 3 | sort | uniq -c > property-stats.log; date': Column name '# > date; zcat edges-v2.csv.gz | cut -f 3 | sort | uniq -c > property-stats.log; date' contains internal white space, Column name '# > date; zcat edges-v2.csv.gz | cut -f 3 | sort | uniq -c > property-stats.log; date' contains a vertical bar (|), Column name '# > date; zcat edges-v2.csv.gz | cut -f 3 | sort | uniq -c > property-stats.log; date' contains a semicolon (;)
+node1 column not found, assuming this is a KGTK node file
+In input header '# > date; zcat edges-v2.csv.gz | cut -f 3 | sort | uniq -c > property-stats.log; date': Missing required column: id | ID
+Command exited with non-zero status 1
+```
\ No newline at end of file

From b72dbaef159479aefc9ca7bb9ff5f79b231db7d1 Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Mon, 18 May 2020 16:39:09 -0700
Subject: [PATCH 216/278] Update zconcat.md

---
 docs/zconcat.md | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/docs/zconcat.md b/docs/zconcat.md
index 2a73a37a1..fead797be 100644
--- a/docs/zconcat.md
+++ b/docs/zconcat.md
@@ -1 +1,44 @@
-Work in progress
+This command concatenates any mixture of plain or gzip/bzip2/xz-compressed files.
+
+## Usage
+```
+kgtk zconcat [-h] [-o OUTPUT] [--gz] [--bz2] [--xz] [INPUT [INPUT ...]]
+```
+
+positional arguments:
+```
+  INPUT                 input files to process, if empty or `-' read from stdin
+```
+
+optional arguments:
+```
+  -h, --help            show this help message and exit
+  -o OUTPUT, --out OUTPUT
+                        output file to write to, otherwise output goes to stdout
+  --gz, --gzip          compress result with gzip
+  --bz2, --bzip2        compress result with bzip2
+  --xz                  compress result with xz
+```
+
+Inputs are zero or more input files. The files can be plain or compressed in a mix of different formats. If the input argument is empty, the script expects piped input from another command.  A ‘-’ at any position in the input list will splice in input from stdin there, which allows arbitrary concatenation of named files with input from stdin.
+
+## Examples
+Concatenate 2 unzipped files and store them in a file:
+```
+kgtk zconcat -o dest.tsv file1.tsv file2.tsv
+```
+
+Concatenate 2 unzipped files and output to stdout:
+```
+kgtk zconcat file1.tsv file2.tsv
+```
+
+Concatenate 2 gzipped files and store as gzip 
+```
+kgtk zconcat --gz -o dest.tsv.gz file1.tsv.gz file2.tsv.gz
+```
+
+Concatenate a mixture of compressed and plain files to a compressed result:
+```
+cat file1.gz  |  kgtk zconcat --gz  -o dest.gz file2.bz2 - file3
+```

From 5a251958737113b9d0007c022c1154693e8d8b6d Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 18 May 2020 16:51:06 -0700
Subject: [PATCH 217/278] Allow laxer dates.

---
 kgtk/value/kgtkvalue.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/kgtk/value/kgtkvalue.py b/kgtk/value/kgtkvalue.py
index e38cf575f..b9bb064c2 100644
--- a/kgtk/value/kgtkvalue.py
+++ b/kgtk/value/kgtkvalue.py
@@ -790,9 +790,16 @@ def is_location_coordinates(self, validate: bool=False)->bool:
     #
     # Year-month-day
     year_pat: str = r'(?P<year>[-+]?[0-9]{4})'
+    month_pat: str = r'(?P<month>1[0-2]|0[1-9])'
+    day_pat: str = r'(?P<day>3[01]|0[1-9]|[12][0-9])'
+    date_pat: str = r'(?:{year}(?:(?P<hyphen>-)?{month}?(?:(?(hyphen)-){day})?)?)'.format(year=year_pat,
+                                                                                          month=month_pat,
+                                                                                          day=day_pat)
+
+    lax_year_pat: str = r'(?P<year>[-+]?[0-9]{4}(?:[0-9]+(?=-))?)' # Extra digits must by followed by hyphen.
     lax_month_pat: str = r'(?P<month>1[0-2]|0[0-9])'
     lax_day_pat: str = r'(?P<day>3[01]|0[0-9]|[12][0-9])'
-    lax_date_pat: str = r'(?:{year}(?:(?P<hyphen>-)?{month}?(?:(?(hyphen)-){day})?)?)'.format(year=year_pat,
+    lax_date_pat: str = r'(?:{year}(?:(?P<hyphen>-)?{month}?(?:(?(hyphen)-){day})?)?)'.format(year=lax_year_pat,
                                                                                               month=lax_month_pat,
                                                                                               day=lax_day_pat)
     # hour-minutes-seconds
@@ -850,6 +857,7 @@ def is_date_and_times(self, validate: bool=False)->bool:
         -HH:MM
 
         NOTE: This code also accepts the following, which are disallowed by the standard:
+        YYYY-
         YYYYT...
         YYYYMM
         YYYYMMT...

From 2ab97983c7a924ec7c98d22f4f61723f82ed0fe8 Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Mon, 18 May 2020 17:01:58 -0700
Subject: [PATCH 218/278] missing commands

---
 docs/embedding.md | 75 ++++++++++++++++++++++++++++++++++++++++++++++-
 docs/instances.md | 19 +++++++++++-
 2 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/docs/embedding.md b/docs/embedding.md
index 2a73a37a1..95aefb80d 100644
--- a/docs/embedding.md
+++ b/docs/embedding.md
@@ -1 +1,74 @@
-Work in progress
+Computes embeddings of nodes using properties of nodes. The values are concatenated into sentences defined by a template, and embedded using a pre-trained language model.
+
+The following language models are supported:
+- bert-base-nli-cls-token
+- bert-base-nli-max-tokens
+- bert-base-nli-mean-tokens
+- bert-base-nli-stsb-mean-tokens
+- bert-base-wikipedia-sections-mean-tokens
+- bert-large-nli-cls-token
+- bert-large-nli-max-tokens
+- bert-large-nli-mean-tokens
+- bert-large-nli-stsb-mean-tokens
+- distilbert-base-nli-mean-tokens
+- distilbert-base-nli-stsb-mean-tokens
+- distiluse-base-multilingual-cased
+- roberta-base-nli-mean-tokens
+- roberta-base-nli-stsb-mean-tokens
+- roberta-large-nli-mean-tokens
+- roberta-large-nli-stsb-mean-tokens
+
+The output is an edge file where each node appears once; a user defined property is used to store the embedding, and the value is a string containing the embedding. For example:
+
+To generate the embeddings, the command first generates a sentence for each node using the properties listed in the label-properties, description-properties, isa-properties and has-properties options. Each sentence is generated using the following template:
+
+```
+{label-properties}, {description-properties} is a {isa-properties}, and has {has-properties}
+```
+
+An example sentence is “Saint David, patron saint of Wales is a human, Catholic priest, Catholic bishop, and has date of death, religion and canonization status”
+
+```
+subject        predicate        object
+Q1        text_embedding    “0.222, 0.333, ..”
+Q2        text_embedding    “0.444, 0.555, ..”
+```
+
+## Usage:
+```
+kgtk text_embedding OPTIONS
+```
+### Options:
+```
+--model { string }: one of the models listed above.
+--label-properties {p1, p2, …}: an ordered list of properties. The output is the value of the first property that returns a non-empty value.
+--description-properties {p1, p2, …}: an ordered list of properties. The output is the value of the first property that returns a non-empty value.
+--isa-properties {p1, p2, ..}: an ordered list of properties. When a property contains multiple values, select values randomly up to a certain limit. When a property value is not a literal, output the label of the property value. When multiple isa-properties are present, the values are output comma-separated.
+--has-properties {p1, p2, …}: an ordered list of properties. The output consists of a comma-separated text with the labels of the properties, using and for the last item, e.g., “country, place of birth, religion and canonization status”
+--output-property {p}: the property used to record the embedding. Default: text_embedding.
+--embedding-projector-metatada {p1, p2, …}: list of properties used to construct a metadata file for use in the Google Embedding Projector: http://projector.tensorflow.org. Default: the same properties used in the --property option.
+--embedding-projector-metadata-path {file}: output path for saving the metadata file for the Google Embedding Projector. Default: ~\embedding-metadata.txt
+```
+
+## Assumptions
+The input is an edge file sorted by subject.
+
+## Examples
+```
+Kgtk text_embedding \ 
+            --input input_file.csv \
+--model bert-base-wikipedia-sections-mean-tokens \
+--property-value label, description \
+--output-property bert_embedding \
+--embedding-projector-metadada subject, label, description \
+--embedding-projector-metadata-path ~/Documents/blah-blah.txt 
+```
+
+```
+Q31    label    “somalia”
+Q31    P31    Q233454
+Q31    description    “country in africa”
+```
+```
+Q31 bert_embedding “0.22, 0.56”
+```
\ No newline at end of file
diff --git a/docs/instances.md b/docs/instances.md
index 2a73a37a1..9ae3128cc 100644
--- a/docs/instances.md
+++ b/docs/instances.md
@@ -1 +1,18 @@
-Work in progress
+Given a set of nodes C that represent classes, compute the set of all instances of every member in C.
+
+## Usage:
+```
+kgtk instances OPTIONS
+```
+## Options:
+```
+--isa-file: the name of the isa file, if different from the default.
+--classes: identifiers of classes, comma-separated. The set of classes can be provided in the standard input or chained from the previous command in the pipeline
+```
+
+## Examples
+
+instances of the subclasses of two classes
+```
+kgtk instances --transitive --class Q13442814,Q12345678
+```

From 867c08fb474396a66717aa67f6bc2cff45b01493 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 18 May 2020 19:42:22 -0700
Subject: [PATCH 219/278] Change error message format.

---
 kgtk/io/kgtkreader.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index b61f042b8..3a385afca 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -655,10 +655,11 @@ def exclude_line(self, action: ValidationAction, msg: str, line: str)->bool:
             # Immediately raise an exception.
             raise ValueError("In input data line %d, %s: %s" % (self.data_lines_read, msg, line))
         elif action == ValidationAction.EXIT:
-            print("In input data line %d, %s: %s" % (self.data_lines_read, msg, line), file=self.error_file, flush=True)
+            print("Data line %d:\n%s\n%s" % (self.data_lines_read, line, msg), file=self.error_file, flush=True)
             sys.exit(1)
             
-        print("In input data line %d, %s: %s" % (self.data_lines_read, msg, line), file=self.error_file, flush=True)
+        # print("In input data line %d, %s: %s" % (self.data_lines_read, msg, line), file=self.error_file, flush=True)
+        print("Data line %d:\n%s\n%s" % (self.data_lines_read, line, msg), file=self.error_file, flush=True)
         self.data_errors_reported += 1
         if self.options.error_limit > 0 and self.data_errors_reported >= self.options.error_limit:
             raise ValueError("Too many data errors, exiting.")
@@ -945,7 +946,7 @@ def _ignore_invalid_values(self, values: typing.List[str], line: str)->bool:
             return False
 
         return self.exclude_line(self.options.invalid_value_action,
-                                 "; ".join(problems),
+                                 "\n".join(problems),
                                  line)
 
     # May be overridden

From 244753f4748a3d3318ea1c5f7634e92bbedcf512 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 18 May 2020 19:56:01 -0700
Subject: [PATCH 220/278] Make --show-options an expert option, but correctly
 so.

---
 kgtk/io/kgtkreader.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index 3a385afca..1bbbc86a7 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -1021,15 +1021,13 @@ def h(msg: str)->str:
                                       help=h("Send errors to stdout instead of stderr"),
                                       action="store_true")
 
-        if expert:
-            egroup.add_argument(      "--show-options", dest="show_options", help="Print the options selected (default=%(default)s).", action='store_true')
+        egroup.add_argument(      "--show-options", dest="show_options", help=h("Print the options selected (default=%(default)s)."), action='store_true')
 
         egroup.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages (default=%(default)s).", action='store_true')
 
-        if expert:
-            egroup.add_argument(      "--very-verbose", dest="very_verbose",
-                                      help=h("Print additional progress messages (default=%(default)s)."),
-                                      action='store_true')
+        egroup.add_argument(      "--very-verbose", dest="very_verbose",
+                                  help=h("Print additional progress messages (default=%(default)s)."),
+                                  action='store_true')
         
 def main():
     """

From 7552b87de69a4c8c20d7456a97b7f396a503f6ea Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 18 May 2020 20:53:38 -0700
Subject: [PATCH 221/278] Add missing args. prefix.

---
 kgtk/join/ifexists.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index d25dd6c7b..ace4efefc 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -237,7 +237,7 @@ def main():
     value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
 
    # Show the final option structures for debugging and documentation.                                                                                             
-    if show_options:
+    if args.show_options:
         input_reader_options.show(out=error_file, who="input")
         filter_reader_options.show(out=error_file, who="filter")
         value_options.show(out=error_file)

From ca0e8487d926a71e3ae607fb625370ba16b0e3f3 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 18 May 2020 21:00:19 -0700
Subject: [PATCH 222/278] Test if columns are empty or not.

---
 kgtk/join/ifempty.py | 179 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)
 create mode 100644 kgtk/join/ifempty.py

diff --git a/kgtk/join/ifempty.py b/kgtk/join/ifempty.py
new file mode 100644
index 000000000..a258d571e
--- /dev/null
+++ b/kgtk/join/ifempty.py
@@ -0,0 +1,179 @@
+"""
+Copy records from the first KGTK file to the output file, if
+one or more columns are (any/all) (not) empty.
+
+"""
+
+from argparse import ArgumentParser, Namespace
+import attr
+import gzip
+from pathlib import Path
+from multiprocessing import Queue
+import sys
+import typing
+
+from kgtk.kgtkformat import KgtkFormat
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
+from kgtk.io.kgtkwriter import KgtkWriter
+from kgtk.utils.argparsehelpers import optional_bool
+from kgtk.utils.enumnameaction import EnumNameAction
+from kgtk.utils.validationaction import ValidationAction
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
+
+@attr.s(slots=True, frozen=True)
+class IfEmpty(KgtkFormat):
+    input_file_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
+
+    filter_column_names: typing.List[str] = attr.ib(validator=attr.validators.deep_iterable(member_validator=attr.validators.instance_of(str),
+                                                                                            iterable_validator=attr.validators.instance_of(list)))
+
+    output_file_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
+
+    allare: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+    notempty: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+
+    # TODO: find working validators
+    # value_options: typing.Optional[KgtkValueOptions] = attr.ib(attr.validators.optional(attr.validators.instance_of(KgtkValueOptions)), default=None)
+    reader_options: typing.Optional[KgtkReaderOptions]= attr.ib(default=None)
+    value_options: typing.Optional[KgtkValueOptions] = attr.ib(default=None)
+
+    error_file: typing.TextIO = attr.ib(default=sys.stderr)
+    verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+    very_verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+
+    def filter(self,
+               row: typing.List[str],
+               filter_idx_list: typing.List[int])->bool:
+        idx: int
+        if self.notempty == False and self.allare == False:
+            # if any are empty.
+            for idx in filter_idx_list:
+                if len(row[idx]) == 0:
+                    return True
+            return False
+
+        elif self.notempty == False and self.allare == True:
+            # if all are empty.
+            for idx in filter_idx_list:
+                if len(row[idx]) != 0:
+                    return False
+            return True
+            
+        elif self.notempty == True and self.allare == False:
+            # If any are not empty.
+            for idx in filter_idx_list:
+                if len(row[idx]) != 0:
+                    return True
+            return False
+        
+        else:
+            # if all are not empty:
+            for idx in filter_idx_list:
+                if len(row[idx]) == 0:
+                    return False
+            return True
+
+    def process(self):
+        # Open the input file.
+        if self.verbose:
+            if self.input_file_path is not None:
+                print("Opening the input file: %s" % self.input_file_path, file=self.error_file, flush=True)
+            else:
+                print("Reading the input data from stdin", file=self.error_file, flush=True)
+
+        kr: KgtkReader =  KgtkReader.open(self.input_file_path,
+                                          error_file=self.error_file,
+                                          options=self.reader_options,
+                                          value_options = self.value_options,
+                                          verbose=self.verbose,
+                                          very_verbose=self.very_verbose,
+        )
+
+        filter_idx_list: typing.List[int] = [ ]
+        column_name: str
+        for column_name in self.filter_column_names:
+            if column_name not in kr.column_name_map:
+                raise ValueError("Column %s is not in the input file" % (column_name))
+            filter_idx_list.append(kr.column_name_map[column_name])
+            
+
+        if self.verbose:
+            print("Opening the output file: %s" % self.output_file_path, file=self.error_file, flush=True)
+        ew: KgtkWriter = KgtkWriter.open(kr.column_names,
+                                         self.output_file_path,
+                                         require_all_columns=False,
+                                         prohibit_extra_columns=True,
+                                         fill_missing_columns=True,
+                                         gzip_in_parallel=False,
+                                         verbose=self.verbose,
+                                         very_verbose=self.very_verbose)        
+
+        if self.verbose:
+            print("Filtering records from %s" % self.input_file_path, file=self.error_file, flush=True)
+        input_line_count: int = 0
+        output_line_count: int = 0;
+
+        row: typing.list[str]
+        for row in kr:
+            input_line_count += 1
+            if self.filter(row, filter_idx_list):
+                ew.write(row)
+                output_line_count += 1
+
+        if self.verbose:
+            print("Read %d records, wrote %d records." % (input_line_count, output_line_count), file=self.error_file, flush=True)
+        
+        ew.close()
+
+def main():
+    """
+    Test the KGTK file joiner.
+    """
+    parser: ArgumentParser = ArgumentParser()
+
+    parser.add_argument(dest="input_file_path", help="The KGTK file with the input data", type=Path, nargs="?")
+
+    parser.add_argument(      "--columns", dest="filter_column_names", help="The columns to filter on (default=None).", nargs='+', required=True)
+
+    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-")
+    
+    parser.add_argument(      "--all-are", dest="allare", help="False: Test if any are, True: test if all are (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
+
+    parser.add_argument(      "--not-empty", dest="notempty", help="False: test if empty, True: test if not empty (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
+
+
+    KgtkReader.add_debug_arguments(parser)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True)
+    KgtkValueOptions.add_arguments(parser)
+
+    args: Namespace = parser.parse_args()
+
+    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
+
+    # Build the option structures.                                                                                                                          
+    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
+
+   # Show the final option structures for debugging and documentation.                                                                                             
+    if args.show_options:
+        reader_options.show(out=error_file)
+        value_options.show(out=error_file)
+
+    ie: IfEmpty = IfEmpty(
+        input_file_path=args.input_file_path,
+        filter_column_names=args.filter_column_names,
+        output_file_path=args.output_file_path,
+        allare=args.allare,
+        notempty=args.notempty,
+        reader_options=reader_options,
+        value_options=value_options,
+        error_file=error_file,
+        verbose=args.verbose,
+        very_verbose=args.very_verbose)
+
+    ie.process()
+
+if __name__ == "__main__":
+    main()

From fe5e97f3cb21928c905396efb63b98b45ba5c7cc Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 18 May 2020 21:01:52 -0700
Subject: [PATCH 223/278] Add proposed features.

---
 kgtk/join/ifempty.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kgtk/join/ifempty.py b/kgtk/join/ifempty.py
index a258d571e..ef5053de3 100644
--- a/kgtk/join/ifempty.py
+++ b/kgtk/join/ifempty.py
@@ -2,6 +2,10 @@
 Copy records from the first KGTK file to the output file, if
 one or more columns are (any/all) (not) empty.
 
+TODO:  add --count-only to suppress copying?
+
+TODO: add --distribution?
+
 """
 
 from argparse import ArgumentParser, Namespace
@@ -127,7 +131,7 @@ def process(self):
 
 def main():
     """
-    Test the KGTK file joiner.
+    Test the KGTK ifempty processor.
     """
     parser: ArgumentParser = ArgumentParser()
 

From 4368f4e54d0aeb788b6942d16ee924f3720e77c3 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Mon, 18 May 2020 21:41:15 -0700
Subject: [PATCH 224/278] Implement --only-count.

---
 kgtk/join/ifempty.py | 50 +++++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/kgtk/join/ifempty.py b/kgtk/join/ifempty.py
index ef5053de3..6051a4ee2 100644
--- a/kgtk/join/ifempty.py
+++ b/kgtk/join/ifempty.py
@@ -1,10 +1,6 @@
-"""
-Copy records from the first KGTK file to the output file, if
-one or more columns are (any/all) (not) empty.
-
-TODO:  add --count-only to suppress copying?
-
-TODO: add --distribution?
+"""Copy records from the first KGTK file to the output file, if one or more
+columns are (any/all) (not) empty.  If --only-count is True, report the count
+of qualifying records but do not write the output file.
 
 """
 
@@ -36,6 +32,8 @@ class IfEmpty(KgtkFormat):
     allare: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     notempty: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
+    only_count: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+
     # TODO: find working validators
     # value_options: typing.Optional[KgtkValueOptions] = attr.ib(attr.validators.optional(attr.validators.instance_of(KgtkValueOptions)), default=None)
     reader_options: typing.Optional[KgtkReaderOptions]= attr.ib(default=None)
@@ -101,16 +99,17 @@ def process(self):
             filter_idx_list.append(kr.column_name_map[column_name])
             
 
-        if self.verbose:
-            print("Opening the output file: %s" % self.output_file_path, file=self.error_file, flush=True)
-        ew: KgtkWriter = KgtkWriter.open(kr.column_names,
-                                         self.output_file_path,
-                                         require_all_columns=False,
-                                         prohibit_extra_columns=True,
-                                         fill_missing_columns=True,
-                                         gzip_in_parallel=False,
-                                         verbose=self.verbose,
-                                         very_verbose=self.very_verbose)        
+        if not self.only_count:
+            if self.verbose:
+                print("Opening the output file: %s" % self.output_file_path, file=self.error_file, flush=True)
+            ew: KgtkWriter = KgtkWriter.open(kr.column_names,
+                                             self.output_file_path,
+                                             require_all_columns=False,
+                                             prohibit_extra_columns=True,
+                                             fill_missing_columns=True,
+                                             gzip_in_parallel=False,
+                                             verbose=self.verbose,
+                                             very_verbose=self.very_verbose)        
 
         if self.verbose:
             print("Filtering records from %s" % self.input_file_path, file=self.error_file, flush=True)
@@ -121,13 +120,18 @@ def process(self):
         for row in kr:
             input_line_count += 1
             if self.filter(row, filter_idx_list):
-                ew.write(row)
+                if not self.only_count:
+                    ew.write(row)
                 output_line_count += 1
 
-        if self.verbose:
-            print("Read %d records, wrote %d records." % (input_line_count, output_line_count), file=self.error_file, flush=True)
+
+        if self.only_count:
+            print("Read %d records, %d records passed the filter." % (input_line_count, output_line_count), file=self.error_file, flush=True)
+        else:
+            if self.verbose:
+                print("Read %d records, wrote %d records." % (input_line_count, output_line_count), file=self.error_file, flush=True)
         
-        ew.close()
+            ew.close()
 
 def main():
     """
@@ -147,6 +151,9 @@ def main():
     parser.add_argument(      "--not-empty", dest="notempty", help="False: test if empty, True: test if not empty (default=%(default)s).",
                               type=optional_bool, nargs='?', const=True, default=False)
 
+    parser.add_argument(      "--only-count", dest="only_count", help="Only count the records, do not copy them. (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
+
 
     KgtkReader.add_debug_arguments(parser)
     KgtkReaderOptions.add_arguments(parser, mode_options=True)
@@ -171,6 +178,7 @@ def main():
         output_file_path=args.output_file_path,
         allare=args.allare,
         notempty=args.notempty,
+        only_count = args.only_count,
         reader_options=reader_options,
         value_options=value_options,
         error_file=error_file,

From 8fde77d751cb4e3f33ec61c48b88bc18c628f714 Mon Sep 17 00:00:00 2001
From: Rongpeng <rongpenl@usc.edu>
Date: Tue, 19 May 2020 08:30:14 -0700
Subject: [PATCH 225/278] move the triple generator doc to the doc folder

---
 .../cli => docs}/generate_wikidata_triples.md |   0
 kgtk/json_generator.py                        | 550 ++++++++++++++++++
 2 files changed, 550 insertions(+)
 rename {kgtk/cli => docs}/generate_wikidata_triples.md (100%)
 create mode 100644 kgtk/json_generator.py

diff --git a/kgtk/cli/generate_wikidata_triples.md b/docs/generate_wikidata_triples.md
similarity index 100%
rename from kgtk/cli/generate_wikidata_triples.md
rename to docs/generate_wikidata_triples.md
diff --git a/kgtk/json_generator.py b/kgtk/json_generator.py
new file mode 100644
index 000000000..87cddf7de
--- /dev/null
+++ b/kgtk/json_generator.py
@@ -0,0 +1,550 @@
+
+# labels:
+
+#     "P1855": {
+#       "type": "property",
+#       "datatype": "wikibase-item",
+#       "id": "P1855",
+#       "labels": {
+#         "en": { "language": "en", "value": "Wikidata property example" }
+#       }
+#     },
+
+#     "Q20898239": {
+#       "type": "item",
+#       "id": "Q20898239",
+#       "labels": {
+#         "en": {
+#           "language": "en",
+#           "value": "The Hitch Hiker's Guide to the Galaxy (1979 edition)"
+#         }
+#       }
+#     },
+
+# info:
+
+#     "Q42": {
+#       "pageid": 138,
+#       "ns": 0,
+#       "title": "Q42",
+#       "lastrevid": 1175340593,
+#       "modified": "2020-05-06T19:28:31Z",
+#       "type": "item",
+#       "id": "Q42"
+#     }
+
+#     "P31": {
+#       "pageid": 3918489,
+#       "ns": 120,
+#       "title": "Property:P31",
+#       "lastrevid": 1179261400,
+#       "modified": "2020-05-11T22:37:17Z",
+#       "type": "property",
+#       "datatype": "wikibase-item",
+#       "id": "P31"
+#     }
+
+# misc:
+
+#     "Q42": {
+#         "pageid": 138,
+#         "ns": 0,
+#         "title": "Q42",
+#         "lastrevid": 1175340593,
+#         "modified": "2020-05-06T19:28:31Z",
+#         "type": "item",
+#         "id": "Q42",
+#         "labels": { "en": { "language": "en", "value": "Douglas Adams" } },
+#         "descriptions:{},
+#         "aliases":{},
+#         "claims":{},
+#         "sitelinks:{}
+#       }
+import sys
+import re
+import json
+from time import sleep
+from kgtk.exceptions import KGTKException
+
+BAD_CHARS = [":", "-", "&", ",", " ",
+             "(", ")", "\'", '\"', "/", "\\", "[", "]", ";", "|"]
+
+class JsonGenerator:
+    """
+    A class to maintain the status of the generator
+    """
+
+    def __init__(
+            self,
+            prop_file: str,
+            label_set: str,
+            alias_set: str,
+            description_set: str,
+            use_gz:bool,
+            # output_prefix:str="",
+            n:int,
+    ):
+        # indexing files
+        self.file_num = 0
+        self.output_prefix = "kgtk"
+        # current output files
+        self.set_json_file_names()
+        # curret dictionaries
+        self.set_json_dict()
+        # TODO no qualifiers or references for version 1
+        self.e_ids = set()
+        self.set_properties(prop_file)
+        self.set_sets(label_set, alias_set, description_set)
+        self.order_map = {}
+        self.quantity_pattern = re.compile(
+            "([\+|\-]?[0-9]+\.?[0-9]*[e|E]?[\-]?[0-9]*)(?:\[([\+|\-]?[0-9]+\.?[0-9]*),([\+|\-]?[0-9]+\.?[0-9]*)\])?([U|Q](?:[0-9]+))?")
+        self.yyyy_mm_dd_pattern = re.compile(
+            "[12]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])")
+        self.yyyy_pattern = re.compile("[12]\d{3}")
+    
+    def entry_point(self,line_number, edge):
+        # TODO
+        # first version only handles statement, not qualifiers
+
+        # serialization
+        edge_list = edge.strip("\n").split("\t")
+        l = len(edge_list)
+        if line_number == 1:
+            # initialize the order_map
+            node1_index = edge_list.index("node1")
+            node2_index = edge_list.index("node2")
+            prop_index = edge_list.index("property")
+            id_index = edge_list.index("id")
+            if not all([node1_index > -1, node2_index > -1, prop_index > -1, id_index > -1]):
+                raise KGTKException(
+                    "Header of kgtk file misses at least one of required column names: (node1, node2, property and id)")
+            else:
+                self.order_map["node1"] = node1_index
+                self.order_map["node2"] = node2_index
+                self.order_map["prop"] = prop_index
+                self.order_map["id"] = id_index
+                return
+        
+        node1 = edge_list[self.order_map["node1"]].strip()
+        node2 = edge_list[self.order_map["node2"]].strip()
+        prop = edge_list[self.order_map["prop"]].strip()
+        e_id = edge_list[self.order_map["id"]].strip()
+        self.e_ids.add(e_id)
+        if node1 in self.e_ids:
+            return #TODO not handling qualifiers        
+
+        # update info_json_dict
+        if node1 in self.prop_types:
+            self.update_info_json_dict(node1, self.prop_types[node1])
+        else:
+            self.update_info_json_dict(node1, None)
+        
+        if prop in self.prop_types:
+            self.update_info_json_dict(prop,self.prop_types[prop])
+            if self.prop_types[prop] == "wikibase-item":
+                self.update_info_json_dict(node2)
+        
+        # update label_json_dict
+        if prop in self.label_set:
+            self.update_label_json_dict(node1, prop, node2)
+            return
+        else :
+            # update with empty label
+            if node1 not in self.label_json_dict:
+                self.update_label_json_dict(node1, prop, None)
+        
+        # update alias and descriptions
+        if prop in self.description_set:
+            self.update_misc_json_dict(node1, prop, node2, line_number,"descriptions")
+            return
+
+        if prop in self.alias_set:
+            self.update_misc_json_dict(node1, prop, node2, line_number,"aliases")
+            return
+        
+        # normal update for claims
+        self.update_misc_json_dict(node1,prop,node2,line_number,None)
+        return
+
+    def update_label_json_dict(self,node1:str, prop:str, node2:str):
+        # for label_dict
+        if node1 not in self.prop_types:
+            label_type = "item"
+            self.label_json_dict[node1] = {
+            "type":label_type
+            }
+        else:
+            label_type = "property"
+            label_datatype = self.prop_types[node1]
+            self.label_json_dict[node1] = {
+            "type":label_type,
+            "datatype":label_datatype,
+            }
+        self.label_json_dict[node1]["id"] = node1
+        self.label_json_dict[node1]["labels"] = {}
+        if node2 != None:
+            text_string, lang = JsonGenerator.process_text_string(node2)
+            self.label_json_dict[node1]["labels"][lang] = {"language":lang, "value": text_string}
+        return
+
+    def update_info_json_dict(self, node:str,data_type = None):
+        # if node in self.info_json_dict:
+        #     return
+        #TODO, not robust but no easy way to figure it out
+        if node.startswith("Q"):
+            self.info_json_dict[node] = {
+                "pageid":-1,
+                "ns":-1,
+                "title":node,
+                "lastrevid":"2000-01-01T00:00:00Z", #TODO
+                "type":"item",
+                "id":node}
+        elif node.startswith("P"):
+            self.info_json_dict[node] = {
+                "pageid":-1,
+                "ns":-1,
+                "title":"Property:"+node,
+                "lastrevid":"2000-01-01T00:00:00Z",
+                "type":"property",
+                "datatype":data_type,
+                "id":node}
+        else:
+            raise KGTKException("node {} is neither an entity nor a property.".format(node)) 
+    def update_misc_json_dict(self, node1:str, prop:str, node2:str, line_number:int, field:str):
+        if node1 not in self.misc_json_dict:
+            self.misc_json_dict[node1] = {**self.label_json_dict[node1], **self.info_json_dict[node1]}
+            self.misc_json_dict[node1]["descriptions"] = {}
+            self.misc_json_dict[node1]["aliases"] = {}
+            self.misc_json_dict[node1]["claims"] = {}
+            self.misc_json_dict[node1]["sitelinks"] = {}
+        
+        if field == "descriptions":
+            description_text, lang = JsonGenerator.process_text_string(node2)
+            temp_des_dict = {lang:{"languange":lang,"value":description_text}}
+            self.misc_json_dict[node1]["descriptions"] = {**self.misc_json_dict[node1]["descriptions"], **temp_des_dict}
+            return 
+        
+        if field == "aliases":
+            alias_text, lang = JsonGenerator.process_text_string(node2)
+            temp_alias_dict = {lang, {"languange": lang, "value":alias_text}}
+            if lang in self.misc_json_dict[node1]["aliases"]:
+                self.misc_json_dict[node1]["aliases"][lang].append(temp_alias_dict)
+            else:
+                self.misc_json_dict[node1]["aliases"][lang] = [temp_alias_dict]
+            return
+
+        assert(field==None) #TODO better handling
+
+        if prop not in self.prop_types:
+            raise KGTKException("property {} at line {} is not defined.".format(prop,line_number))
+        
+        if prop not in self.misc_json_dict[node1]["claims"]:
+                self.misc_json_dict[node1]["claims"][prop] = []
+        try:
+            if self.prop_types[prop] == "wikibase-item":
+                self.update_misc_json_dict_item(node1,prop,node2)
+            elif self.prop_types[prop] == "time":
+                self.update_misc_json_dict_time(node1,prop,node2)
+            elif self.prop_types[prop] == "globe-coordinate":
+                self.update_misc_json_dict_coordinate(node1,prop,node2)
+            elif self.prop_types[prop] == "quantity":
+                self.update_misc_json_dict_quantity(node1,prop,node2)
+            elif self.prop_types[prop] == "monolingualtext":
+                self.update_misc_json_dict_monolingualtext(node1,prop,node2)
+            elif self.prop_types[prop] == "string":
+                self.update_misc_json_dict_string(node1,prop,node2)
+            elif self.prop_types[prop] == "external-id":
+                self.update_misc_json_dict_external_id(node1,prop,node2)
+            elif self.prop_types[prop] == "url":
+                self.update_misc_json_dict_url(node1,prop,node2)
+            else:
+                raise KGTKException("property tyepe {} of property {} at line {} is not defined.".format(self.prop_types[prop],prop,line_number))
+        except:
+            raise KGTKException("illegal edge at line {}.".format(line_number))
+    def update_misc_json_dict_item(self,node1:str,prop:str,node2:str):
+        temp_item_dict = {
+                "mainsnak":{
+                    "snaktype":"value",
+                    "property":prop,
+                    "hash":"hashplaceholder",
+                    "datavalue":{
+                        "value":{
+                            "entity-type":"item","numeric-id":0,"id":node2 # place holder for numeric id
+                        },
+                        "type":"wikibase-entityid"
+                    },
+                    "datatype":"wikibase-item"
+                },
+                "type":"statement",
+                "id":"id-place-holder",
+                "rank":"normal", #TODO
+                "references":[],
+                "qualifiers":{}
+            }
+        self.misc_json_dict[node1]["claims"][prop].append(temp_item_dict)
+        return
+    def update_misc_json_dict_time(self,node1,prop,node2):
+        if self.yyyy_pattern.match(node2):
+            time_string = node2 + "-01-01"
+            precision = 9
+        elif self.yyyy_mm_dd_pattern.match(node2):
+            time_string = node2
+            precision = 11
+        try:
+            time_string, precision = node2.split("/")
+            precision = int(precision)
+        except:
+            return # ignore the illegal time format for now
+        temp_time_dict = {
+            "mainsnak":{
+                "snaktype":"value",
+                "property":prop,
+                "hash":"hashplaceholder",
+                "datavalue":{
+                    "value":{
+                        "time":time_string,
+                        "timezone": 0,
+                        "before": 0,
+                        "after": 0,
+                        "precision": precision,
+                        "calendarmodel": "http://www.wikidata.org/entity/Q1985727"    
+                    },
+                    "type":"time"
+                },
+                "datatype":"time"
+            },
+            "type":"statement",
+            "id":"id-place-holder",
+            "rank":"normal", #TODO
+            "references":[],
+            "qualifiers":{}
+            }
+        self.misc_json_dict[node1]["claims"][prop].append(temp_time_dict)          
+        return
+    def update_misc_json_dict_coordinate(self,node1,prop,node2):
+        latitude, longitude = node2[1:].split("/")
+        latitude = float(latitude)
+        longitude = float(longitude)
+        temp_coordinate_dict = {
+            "mainsnak":{
+                "snaktype":"value",
+                "property":prop,
+                "hash":"hashplaceholder",
+                "datavalue":{
+                    "value":{
+                        "latitude":latitude,
+                        "longitude": longitude,
+                        "altitude": None,
+                        "precision": 0.00027777777777778, # TODO
+                        "globe": "http://www.wikidata.org/entity/Q2"    
+                    },
+                    "type":"globecoordinate"
+                },
+                "datatype":"globecoordinate"
+            },
+            "type":"statement",
+            "id":"id-place-holder",
+            "rank":"normal", #TODO
+            "references":[],
+            "qualifiers":{}
+            }
+        self.misc_json_dict[node1]["claims"][prop].append(temp_coordinate_dict)  
+        return
+    def update_misc_json_dict_quantity(self,node1,prop,node2):
+        res = self.quantity_pattern.match(node2).groups()
+        amount, lower_bound, upper_bound, unit = res
+        amount = JsonGenerator.clean_number_string(amount)
+        lower_bound = JsonGenerator.clean_number_string(lower_bound)
+        upper_bound = JsonGenerator.clean_number_string(upper_bound)
+        unit = "http://www.wikidata.org/entity/" + unit if unit != None else None
+        temp_quantity_dict = {
+            "mainsnak":{
+                "snaktype":"value",
+                "property":prop,
+                "hash":"hashplaceholder",
+                "datavalue":{
+                    "value":{
+                        "amount":amount,
+                        "unit": unit,  
+                        "lowerBound":lower_bound,
+                        "UpperBound":upper_bound 
+                    },
+                    "type":"quantity"
+                },
+                "datatype":"quantity"
+            },
+            "type":"statement",
+            "id":"id-place-holder",
+            "rank":"normal", #TODO
+            "references":[],
+            "qualifiers":{}
+            }
+        self.misc_json_dict[node1]["claims"][prop].append(temp_quantity_dict)  
+        return
+    def update_misc_json_dict_monolingualtext(self,node1,prop,node2):
+        text_string, lang = JsonGenerator.process_text_string(node2)
+        temp_mono_dict ={
+                "mainsnak":{
+                    "snaktype":"value",
+                    "property":prop,
+                    "hash":"hashplaceholder",
+                    "datavalue":{
+                        "value":{
+                            "text":text_string,
+                            "language":lang
+                        },
+                        "type":"monolingualtext"
+                    },
+                    "datatype":"monolingualtext"
+                },
+                "type":"statement",
+                "id":"id-place-holder",
+                "rank":"normal", #TODO
+                "references":[],
+                "qualifiers":{}
+                }
+        self.misc_json_dict[node1]["claims"][prop].append(temp_mono_dict)  
+        return
+    def update_misc_json_dict_string(self,node1,prop,node2):
+        string, lang = JsonGenerator.process_text_string(node2)
+        temp_string_dict = {
+            "mainsnak": {
+              "snaktype": "value",
+              "property": prop,
+              "hash": "hashplaceholder",
+              "datavalue": { "value": string, "type": "string" },
+              "datatype": "string"
+            },
+            "type": "statement",
+            "id": "id-place-holder",
+            "rank": "normal",
+            "references":[],
+            "qualifiers":{}
+            }
+        self.misc_json_dict[node1]["claims"][prop].append(temp_string_dict)  
+        return
+    def update_misc_json_dict_external_id(self,node1,prop,node2):
+        temp_e_id_dict = {"mainsnak": {
+            "snaktype": "value",
+            "property": prop,
+            "hash": "hashplaceholder",
+            "datavalue": { "value": node2, "type": "string" },
+            "datatype": "external-id"
+        },
+        "type": "statement",
+        "id": "id-place-holder",
+        "rank": "normal",            
+        "references":[],
+        "qualifiers":{}}
+        self.misc_json_dict[node1]["claims"][prop].append(temp_e_id_dict) 
+        return
+    def update_misc_json_dict_url(self,node1,prop,node2):
+        temp_url_dict ={
+        "mainsnak": {
+            "snaktype": "value",
+            "property": prop,
+            "hash": "hashplaceholder",
+            "datavalue": {
+            "value": node2,
+            "type": "string"
+            },
+            "datatype": "url"
+        },
+        "type": "statement",
+        "id": "id-place-holder",
+        "rank": "normal",            
+        "references":[],
+        "qualifiers":{}
+        }
+        self.misc_json_dict[node1]["claims"][prop].append(temp_url_dict) 
+        return
+
+    def set_sets(self, label_set: str, alias_set: str, description_set: str):
+        self.label_set, self.alias_set, self.description_set = set(label_set.split(",")), set(alias_set.split(",")), set(description_set.split(","))
+
+    def set_properties(self, prop_file: str):
+        datatype_mapping = {
+            "item": "wikibase-item",
+            "time": "time",
+            "globe-coordinate": "globe-coordinate",
+            "quantity": "quantity",
+            "monolingualtext": "monolingualtext",
+            "string": "string",
+            "external-identifier": "external-id",
+            "url": "url"
+        }
+        with open(prop_file, "r") as fp:
+            props = fp.readlines()
+        prop_types = {}
+        for line in props[1:]:
+            node1, _, node2 = line.split("\t")
+            try:
+                prop_types[node1] = datatype_mapping[node2.strip()]
+            except:
+                if not self.ignore:
+                    raise KGTKException(
+                        "DataType {} of node {} is not supported.\n".format(
+                            node2, node1
+                        )
+                    )
+        self.prop_types = prop_types
+
+    def serialize(self):
+        '''
+        serialize the dictionaries to the file pointer
+        '''
+        with open(self.label_json_file,"w") as fp:
+            json.dump(self.label_json_dict,fp)
+
+        with open(self.misc_json_file,"w") as fp:
+            json.dump(self.misc_json_dict,fp)
+        
+        with open(self.info_json_file,"w") as fp:
+            json.dump(self.info_json_dict,fp)
+        
+        # update dict and files
+        self.set_json_file_names()
+        self.set_json_dict()
+
+    def finalize(self):
+        # finalize the generator
+        self.serialize()
+        return
+
+    def set_json_dict(self):
+        self.label_json_dict = {}
+        self.misc_json_dict = {}
+        self.info_json_dict = {}
+    
+    def set_json_file_names(self):
+        self.file_num += 1
+        prefix = self.output_prefix + "_" + str(self.file_num) + "_"
+        self.label_json_file =  prefix + "labels.json"
+        self.misc_json_file = prefix + "misc.json"
+        self.info_json_file = prefix + "info.json"
+
+    @staticmethod
+    def process_text_string(string: str) -> [str, str]:
+        ''' 
+        Language detection is removed from triple generation. The user is responsible for detect the language
+        '''
+        if len(string) == 0:
+            return ["", "en"]
+        if "@" in string:
+            res = string.split("@")
+            text_string = "@".join(res[:-1]).replace('"', "").replace("'", "")
+            lang = res[-1].replace('"', '').replace("'", "")
+            if len(lang) > 2:
+                lang = "en"
+        else:
+            text_string = string.replace('"', "").replace("'", "")
+            lang = "en"
+        return [text_string, lang]
+    
+    @staticmethod
+    def clean_number_string(num):
+        from numpy import format_float_positional
+        if num == None:
+            return None
+        else:
+            return format_float_positional(float(num), trim="-")
\ No newline at end of file

From 00f81d47eee097a02c7478f6646441c44d4b3aa8 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Tue, 19 May 2020 18:03:20 -0700
Subject: [PATCH 226/278] ifempty and ifnotempty commands.

---
 kgtk/cli/ifempty.py    | 121 +++++++++++++++++++++++++++++++++++++++++
 kgtk/cli/ifnotempty.py | 121 +++++++++++++++++++++++++++++++++++++++++
 kgtk/join/ifempty.py   |  20 +++----
 kgtk/join/ifexists.py  |   2 -
 4 files changed, 251 insertions(+), 13 deletions(-)
 create mode 100644 kgtk/cli/ifempty.py
 create mode 100644 kgtk/cli/ifnotempty.py

diff --git a/kgtk/cli/ifempty.py b/kgtk/cli/ifempty.py
new file mode 100644
index 000000000..29d28e7d0
--- /dev/null
+++ b/kgtk/cli/ifempty.py
@@ -0,0 +1,121 @@
+"""Filter a KGTK file based on whether one or more records exist in a second
+KGTK file with matching values for one or more fields.
+
+TODO: Need KgtkWriterOptions
+"""
+
+from argparse import Namespace, SUPPRESS
+from pathlib import Path
+import sys
+import typing
+
+from kgtk.cli_argparse import KGTKArgumentParser
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
+from kgtk.io.kgtkwriter import KgtkWriter
+from kgtk.join.ifempty import IfEmpty
+from kgtk.utils.argparsehelpers import optional_bool
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
+
+def parser():
+    return {
+        'help': 'Filter a KGTK file for empty fields',
+        'description': 'Filter a KGTK file based on whether one or more fields are empty. ' +
+        'When multiple fields are specified, either any field or all fields must be empty.'
+    }
+
+
+def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace):
+    """
+    Parse arguments
+    Args:
+        parser (argparse.ArgumentParser)
+    """
+
+    _expert: bool = parsed_shared_args._expert
+
+    # This helper function makes it easy to suppress options from
+    # The help message.  The options are still there, and initialize
+    # what they need to initialize.
+    def h(msg: str)->str:
+        if _expert:
+            return msg
+        else:
+            return SUPPRESS
+
+    parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
+
+    parser.add_argument(      "--columns", dest="filter_column_names",
+                              help="The columns in the file being filtered (Required).", nargs='+')
+
+    parser.add_argument(      "--count", dest="only_count", help="Only count the records, do not copy them. (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
+
+    parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write (default=%(default)s).", type=Path, default="-")
+
+    parser.add_argument(      "--all", dest="all_are", help="False: Test if any are, True: test if all are (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
+
+    KgtkReader.add_debug_arguments(parser, expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
+    KgtkValueOptions.add_arguments(parser, expert=_expert)
+
+def run(input_kgtk_file: typing.Optional[Path],
+        output_kgtk_file: typing.Optional[Path],
+        filter_column_names: typing.List[str],
+        all_are: bool = False,
+
+        only_count: bool = False,
+
+        errors_to_stdout: bool = False,
+        errors_to_stderr: bool = True,
+        show_options: bool = False,
+        verbose: bool = False,
+        very_verbose: bool = False,
+
+        **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want.
+)->int:
+    # import modules locally
+    from kgtk.exceptions import KGTKException
+
+    # Select where to send error messages, defaulting to stderr.
+    error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr
+
+    # Build the option structures.
+    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
+
+    # Show the final option structures for debugging and documentation.
+    if show_options:
+        print("input: %s" % (str(input_kgtk_file) if input_kgtk_file is not None else "-"), file=error_file)
+        print("--columns=%s" % " ".join(filter_column_names), file=error_file)
+        print("--output-file=%s" % (str(output_kgtk_file) if output_kgtk_file is not None else "-"), file=error_file)
+        print("--count=%s" % str(only_count))
+        print("--all=%s" % str(all_are))
+        reader_options.show(out=error_file)
+        value_options.show(out=error_file)
+        print("=======", file=error_file, flush=True)
+
+    try:
+        ie: IfEmpty = IfEmpty(
+            input_file_path=input_kgtk_file,
+            filter_column_names=filter_column_names,
+            output_file_path=output_kgtk_file,
+            all_are=all_are,
+            notempty=False,
+            only_count=only_count,
+            reader_options=reader_options,
+            value_options=value_options,
+            error_file=error_file,
+            verbose=verbose,
+            very_verbose=very_verbose,
+        )
+        
+        ie.process()
+
+        return 0
+
+    except SystemExit as e:
+        raise KGTKException("Exit requested")
+    except Exception as e:
+        raise KGTKException(str(e))
+
diff --git a/kgtk/cli/ifnotempty.py b/kgtk/cli/ifnotempty.py
new file mode 100644
index 000000000..c10dd3d01
--- /dev/null
+++ b/kgtk/cli/ifnotempty.py
@@ -0,0 +1,121 @@
+"""Filter a KGTK file based on whether one or more records exist in a second
+KGTK file with matching values for one or more fields.
+
+TODO: Need KgtkWriterOptions
+"""
+
+from argparse import Namespace, SUPPRESS
+from pathlib import Path
+import sys
+import typing
+
+from kgtk.cli_argparse import KGTKArgumentParser
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
+from kgtk.io.kgtkwriter import KgtkWriter
+from kgtk.join.ifempty import IfEmpty
+from kgtk.utils.argparsehelpers import optional_bool
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
+
+def parser():
+    return {
+        'help': 'Filter a KGTK file for nonempty fields',
+        'description': 'Filter a KGTK file based on whether one or more fields are not empty. ' +
+        'When multiple fields are specified, either any field or all fields must be not empty.'
+    }
+
+
+def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace):
+    """
+    Parse arguments
+    Args:
+        parser (argparse.ArgumentParser)
+    """
+
+    _expert: bool = parsed_shared_args._expert
+
+    # This helper function makes it easy to suppress options from
+    # The help message.  The options are still there, and initialize
+    # what they need to initialize.
+    def h(msg: str)->str:
+        if _expert:
+            return msg
+        else:
+            return SUPPRESS
+
+    parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
+
+    parser.add_argument(      "--columns", dest="filter_column_names",
+                              help="The columns in the file being filtered (Required).", nargs='+')
+
+    parser.add_argument(      "--count", dest="only_count", help="Only count the records, do not copy them. (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
+
+    parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write (default=%(default)s).", type=Path, default="-")
+
+    parser.add_argument(      "--all", dest="all_are", help="False: Test if any are, True: test if all are (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
+
+    KgtkReader.add_debug_arguments(parser, expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
+    KgtkValueOptions.add_arguments(parser, expert=_expert)
+
+def run(input_kgtk_file: typing.Optional[Path],
+        output_kgtk_file: typing.Optional[Path],
+        filter_column_names: typing.List[str],
+        all_are: bool = False,
+
+        only_count: bool = False,
+
+        errors_to_stdout: bool = False,
+        errors_to_stderr: bool = True,
+        show_options: bool = False,
+        verbose: bool = False,
+        very_verbose: bool = False,
+
+        **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want.
+)->int:
+    # import modules locally
+    from kgtk.exceptions import KGTKException
+
+    # Select where to send error messages, defaulting to stderr.
+    error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr
+
+    # Build the option structures.
+    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
+
+    # Show the final option structures for debugging and documentation.
+    if show_options:
+        print("input: %s" % (str(input_kgtk_file) if input_kgtk_file is not None else "-"), file=error_file)
+        print("--columns=%s" % " ".join(filter_column_names), file=error_file)
+        print("--output-file=%s" % (str(output_kgtk_file) if output_kgtk_file is not None else "-"), file=error_file)
+        print("--count=%s" % str(only_count))
+        print("--all=%s" % str(all_are))
+        reader_options.show(out=error_file)
+        value_options.show(out=error_file)
+        print("=======", file=error_file, flush=True)
+
+    try:
+        ie: IfEmpty = IfEmpty(
+            input_file_path=input_kgtk_file,
+            filter_column_names=filter_column_names,
+            output_file_path=output_kgtk_file,
+            all_are=all_are,
+            notempty=True,
+            only_count=only_count,
+            reader_options=reader_options,
+            value_options=value_options,
+            error_file=error_file,
+            verbose=verbose,
+            very_verbose=very_verbose,
+        )
+        
+        ie.process()
+
+        return 0
+
+    except SystemExit as e:
+        raise KGTKException("Exit requested")
+    except Exception as e:
+        raise KGTKException(str(e))
+
diff --git a/kgtk/join/ifempty.py b/kgtk/join/ifempty.py
index 6051a4ee2..f55facb76 100644
--- a/kgtk/join/ifempty.py
+++ b/kgtk/join/ifempty.py
@@ -16,8 +16,6 @@
 from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
 from kgtk.utils.argparsehelpers import optional_bool
-from kgtk.utils.enumnameaction import EnumNameAction
-from kgtk.utils.validationaction import ValidationAction
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 @attr.s(slots=True, frozen=True)
@@ -29,7 +27,7 @@ class IfEmpty(KgtkFormat):
 
     output_file_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
 
-    allare: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+    all_are: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
     notempty: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
 
     only_count: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
@@ -47,21 +45,21 @@ def filter(self,
                row: typing.List[str],
                filter_idx_list: typing.List[int])->bool:
         idx: int
-        if self.notempty == False and self.allare == False:
+        if self.notempty == False and self.all_are == False:
             # if any are empty.
             for idx in filter_idx_list:
                 if len(row[idx]) == 0:
                     return True
             return False
 
-        elif self.notempty == False and self.allare == True:
+        elif self.notempty == False and self.all_are == True:
             # if all are empty.
             for idx in filter_idx_list:
                 if len(row[idx]) != 0:
                     return False
             return True
             
-        elif self.notempty == True and self.allare == False:
+        elif self.notempty == True and self.all_are == False:
             # If any are not empty.
             for idx in filter_idx_list:
                 if len(row[idx]) != 0:
@@ -143,17 +141,17 @@ def main():
 
     parser.add_argument(      "--columns", dest="filter_column_names", help="The columns to filter on (default=None).", nargs='+', required=True)
 
+    parser.add_argument(      "--count", dest="only_count", help="Only count the records, do not copy them. (default=%(default)s).",
+                              type=optional_bool, nargs='?', const=True, default=False)
+
     parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-")
     
-    parser.add_argument(      "--all-are", dest="allare", help="False: Test if any are, True: test if all are (default=%(default)s).",
+    parser.add_argument(      "--all", dest="all_are", help="False: Test if any are, True: test if all are (default=%(default)s).",
                               type=optional_bool, nargs='?', const=True, default=False)
 
     parser.add_argument(      "--not-empty", dest="notempty", help="False: test if empty, True: test if not empty (default=%(default)s).",
                               type=optional_bool, nargs='?', const=True, default=False)
 
-    parser.add_argument(      "--only-count", dest="only_count", help="Only count the records, do not copy them. (default=%(default)s).",
-                              type=optional_bool, nargs='?', const=True, default=False)
-
 
     KgtkReader.add_debug_arguments(parser)
     KgtkReaderOptions.add_arguments(parser, mode_options=True)
@@ -176,7 +174,7 @@ def main():
         input_file_path=args.input_file_path,
         filter_column_names=args.filter_column_names,
         output_file_path=args.output_file_path,
-        allare=args.allare,
+        all_are=args.all_are,
         notempty=args.notempty,
         only_count = args.only_count,
         reader_options=reader_options,
diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index ace4efefc..eee03959b 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -28,8 +28,6 @@
 from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
 from kgtk.io.kgtkwriter import KgtkWriter
 from kgtk.utils.argparsehelpers import optional_bool
-from kgtk.utils.enumnameaction import EnumNameAction
-from kgtk.utils.validationaction import ValidationAction
 from kgtk.value.kgtkvalueoptions import KgtkValueOptions
 
 @attr.s(slots=True, frozen=True)

From ad4577903ded6fcb8aa466c09407409d532b2018 Mon Sep 17 00:00:00 2001
From: filievski <filip.dbrsk@gmail.com>
Date: Tue, 19 May 2020 20:06:19 -0700
Subject: [PATCH 227/278] remove data folder

---
 data/conceptnet_first10.tsv                |   11 -
 data/maa_m18_attribute_table_file1.csv     |  525 --
 data/maa_m18_edge_table_file1.csv          |  898 ---
 data/maa_m18_node_table_file1.csv          |   31 -
 data/maa_m18_ontology_node_table_file1.csv |  472 --
 data/test_to_triple_edges.tsv              |   20 -
 data/test_to_triple_prop_types.tsv         |    9 -
 data/wikidataProps.tsv                     | 7439 --------------------
 8 files changed, 9405 deletions(-)
 delete mode 100644 data/conceptnet_first10.tsv
 delete mode 100644 data/maa_m18_attribute_table_file1.csv
 delete mode 100644 data/maa_m18_edge_table_file1.csv
 delete mode 100644 data/maa_m18_node_table_file1.csv
 delete mode 100644 data/maa_m18_ontology_node_table_file1.csv
 delete mode 100644 data/test_to_triple_edges.tsv
 delete mode 100644 data/test_to_triple_prop_types.tsv
 delete mode 100644 data/wikidataProps.tsv

diff --git a/data/conceptnet_first10.tsv b/data/conceptnet_first10.tsv
deleted file mode 100644
index c2bba5cb9..000000000
--- a/data/conceptnet_first10.tsv
+++ /dev/null
@@ -1,11 +0,0 @@
-node1	label	node2	datasource	weight	other
-/c/en/0	/r/DefinedAs	/c/en/empty_set	CN	1.0	{'dataset': '/d/conceptnet/4/en'}
-/c/en/0	/r/DefinedAs	/c/en/first_limit_ordinal	CN	1.0	{'dataset': '/d/conceptnet/4/en'}
-/c/en/0	/r/DefinedAs	/c/en/number_zero	CN	1.0	{'dataset': '/d/conceptnet/4/en'}
-/c/en/0	/r/EtymologicallyRelatedTo	/c/en/2.0	CN	0.25	{'dataset': 'd/mowgli'}
-/c/en/0	/r/HasContext	/c/en/internet_slang	CN	1.0	{'dataset': '/d/wiktionary/en'}
-/c/en/0	/r/HasProperty	/c/en/pronounced_zero	CN	1.0	{'dataset': '/d/conceptnet/4/en'}
-/c/en/0	/r/IsA	/c/en/set_containing_one_element	CN	1.0	{'dataset': '/d/conceptnet/4/en'}
-/c/en/0	/r/RelatedTo	/c/en/0s/n	CN	1.0	{'dataset': 'd/mowgli'}
-/c/en/0	/r/RelatedTo	/c/en/1	CN	1.0	{'dataset': '/d/wiktionary/en'}
-/c/en/0	/r/RelatedTo	/c/en/2	CN	1.0	{'dataset': '/d/wiktionary/en'}
diff --git a/data/maa_m18_attribute_table_file1.csv b/data/maa_m18_attribute_table_file1.csv
deleted file mode 100644
index c2dbc5d64..000000000
--- a/data/maa_m18_attribute_table_file1.csv
+++ /dev/null
@@ -1,525 +0,0 @@
-node,property,value,document_id
-aida-item:PER.ProfessionalPosition,aida-property:hasName,"en:""aida-item:PER.ProfessionalPosition""",
-aida-item:VEH.Aircraft,aida-property:hasName,"en:""aida-item:VEH.Aircraft""",
-aida-item:PER.Protester.ProtestLeader,aida-property:hasName,"en:""aida-item:PER.Protester.ProtestLeader""",
-aida-item:CRM.ViolentCrime.Terrorism,aida-property:hasName,"en:""aida-item:CRM.ViolentCrime.Terrorism""",
-aida-item:Conflict.Yield,aida-property:hasName,"en:""aida-item:Conflict.Yield""",
-aida-entity:cab521fd-369e-40e9-ba8d-a97cc7c723b9,aida-property:source,HC0005QF4,HC0005QF4
-aida-entity:cab521fd-369e-40e9-ba8d-a97cc7c723b9,aida-property:hasName,"en:""Vladimir Putin""",HC0005QF4
-aida-entity:cab521fd-369e-40e9-ba8d-a97cc7c723b9,aida-property:linkTarget,LDC2019E43:30003842,HC0005QF4
-aida-item:COM.Document,aida-property:hasName,"en:""aida-item:COM.Document""",
-aida-item:PER.ProfessionalPosition.Scientist,aida-property:hasName,"en:""aida-item:PER.ProfessionalPosition.Scientist""",
-aida-item:PersonalSocial.Unspecified,aida-property:hasName,"en:""aida-item:PersonalSocial.Unspecified""",
-aida-item:PER.Politician,aida-property:hasName,"en:""aida-item:PER.Politician""",
-aida-item:Contact.ThreatenCoerce,aida-property:hasName,"en:""aida-item:Contact.ThreatenCoerce""",
-aida-property:GeneralAffiliation.Sponsorship_Sponsor,aida-property:hasName,"en:""aida-property:GeneralAffiliation.Sponsorship_Sponsor""",
-aida-entity:4fba8ada-3361-459c-a37f-a0ccc4624ac5,aida-property:linkTarget,LDC2019E43:5134662,HC0005QF4
-aida-entity:4fba8ada-3361-459c-a37f-a0ccc4624ac5,aida-property:hasName,"en:""Russia""",HC0005QF4
-aida-entity:4fba8ada-3361-459c-a37f-a0ccc4624ac5,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:WEA.Cannon.Cannon,aida-property:hasName,"en:""aida-item:WEA.Cannon.Cannon""",
-aida-item:SID.Religious.Religious,aida-property:hasName,"en:""aida-item:SID.Religious.Religious""",
-aida-item:WEA.MissileSystem.Missile,aida-property:hasName,"en:""aida-item:WEA.MissileSystem.Missile""",
-aida-item:Contact.CommitmentPromiseExpressIntent,aida-property:hasName,"en:""aida-item:Contact.CommitmentPromiseExpressIntent""",
-aida-item:Transaction.TransferOwnership,aida-property:hasName,"en:""aida-item:Transaction.TransferOwnership""",
-aida-entity:e45a2f47-4a27-4bfe-8a09-221d2216cd7f,aida-property:linkTarget,LDC2019E43:565348,HC0005QF4
-aida-entity:e45a2f47-4a27-4bfe-8a09-221d2216cd7f,aida-property:hasName,"en:""Donetsk""",HC0005QF4
-aida-entity:e45a2f47-4a27-4bfe-8a09-221d2216cd7f,aida-property:source,HC0005QF4,HC0005QF4
-aida-entity:ed73b6b9-bd48-4560-a5ef-381937cca366,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:ORG.CommercialOrganization,aida-property:hasName,"en:""aida-item:ORG.CommercialOrganization""",
-aida-item:SID.Sports.Sports,aida-property:hasName,"en:""aida-item:SID.Sports.Sports""",
-aida-item:ORG.PoliticalOrganization.Court,aida-property:hasName,"en:""aida-item:ORG.PoliticalOrganization.Court""",
-aida-item:Inspection.SensoryObserve.InspectPeopleOrganization,aida-property:hasName,"en:""aida-item:Inspection.SensoryObserve.InspectPeopleOrganization""",
-aida-item:Contact.Discussion.Meet,aida-property:hasName,"en:""aida-item:Contact.Discussion.Meet""",
-aida-property:OrganizationAffiliation.Leadership.Government_GovernmentBodyOrGPE,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.Leadership.Government_GovernmentBodyOrGPE""",
-aida-item:FAC.Installation.MilitaryInstallation,aida-property:hasName,"en:""aida-item:FAC.Installation.MilitaryInstallation""",
-aida-item:ORG.Association.Club,aida-property:hasName,"en:""aida-item:ORG.Association.Club""",
-aida-item:PER.Police.ChiefOfPolice,aida-property:hasName,"en:""aida-item:PER.Police.ChiefOfPolice""",
-aida-item:VEH.MilitaryVehicle.MilitaryTransportAircraft,aida-property:hasName,"en:""aida-item:VEH.MilitaryVehicle.MilitaryTransportAircraft""",
-aida-property-type:dataType,aida-property:hasName,"en:""aida-property-type:dataType""",
-aida-entity:e8d98762-d520-49ed-8de4-5b208d2e3fc9,aida-property:linkTarget,LDC2019E43:690791,HC0005QF4
-aida-entity:e8d98762-d520-49ed-8de4-5b208d2e3fc9,aida-property:source,HC0005QF4,HC0005QF4
-aida-entity:e8d98762-d520-49ed-8de4-5b208d2e3fc9,aida-property:hasName,"en:""Ukrainian""",HC0005QF4
-aida-item:Measurement.Size.HeightLengthWidth,aida-property:hasName,"en:""aida-item:Measurement.Size.HeightLengthWidth""",
-aida-item:Personnel.EndPosition.QuitRetire,aida-property:hasName,"en:""aida-item:Personnel.EndPosition.QuitRetire""",
-aida-item:Personnel.EndPosition,aida-property:hasName,"en:""aida-item:Personnel.EndPosition""",
-aida-entity:9789ff8a-7d0a-45cc-9b2a-9a7465182d09,aida-property:hasName,"en:""Malaysian Airlines""",HC0005QF4
-aida-entity:9789ff8a-7d0a-45cc-9b2a-9a7465182d09,aida-property:source,HC0005QF4,HC0005QF4
-aida-property:Evaluate.Deliberateness.Accidental_Event,aida-property:hasName,"en:""aida-property:Evaluate.Deliberateness.Accidental_Event""",
-aida-item:CRM.PoliticalCrime.AbuseOfOffice,aida-property:hasName,"en:""aida-item:CRM.PoliticalCrime.AbuseOfOffice""",
-aida-property:OrganizationAffiliation.Leadership_Organization,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.Leadership_Organization""",HC0005QF4
-aida-property:OrganizationAffiliation.Leadership_Organization,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:VEH.MilitaryVehicle.FighterAircraft,aida-property:hasName,"en:""aida-item:VEH.MilitaryVehicle.FighterAircraft""",
-aida-item:PER.ProfessionalPosition.Spy,aida-property:hasName,"en:""aida-item:PER.ProfessionalPosition.Spy""",
-aida-item:Transaction.TransferMoney.BorrowLend,aida-property:hasName,"en:""aida-item:Transaction.TransferMoney.BorrowLend""",
-aida-item:VEH.Aircraft.Airplane,aida-property:hasName,"en:""aida-item:VEH.Aircraft.Airplane""",
-aida-item:Contact.FuneralVigil.Meet,aida-property:hasName,"en:""aida-item:Contact.FuneralVigil.Meet""",
-aida-item:PER.ProfessionalPosition.Journalist,aida-property:hasName,"en:""aida-item:PER.ProfessionalPosition.Journalist""",
-aida-item:Contact.Prevarication.Broadcast,aida-property:hasName,"en:""aida-item:Contact.Prevarication.Broadcast""",
-aida-property:OrganizationAffiliation.Leadership_Leader,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.Leadership_Leader""",HC0005QF4
-aida-property:OrganizationAffiliation.Leadership_Leader,aida-property:source,HC0005QF4,HC0005QF4
-aida-property-type:DateTime,aida-property:hasName,"en:""aida-property-type:DateTime""",
-aida-item:Contact.CommitmentPromiseExpressIntent.Correspondence,aida-property:hasName,"en:""aida-item:Contact.CommitmentPromiseExpressIntent.Correspondence""",
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity_EntityOrFiller,aida-property:hasName,"en:""aida-property:GeneralAffiliation.MemberOriginReligionEthnicity_EntityOrFiller""",
-aida-entity:ba5bd100-3c53-45bb-8c1e-81d7827e948d,aida-property:linkTarget,LDC2019E43:10922499,HC0005QF4
-aida-entity:ba5bd100-3c53-45bb-8c1e-81d7827e948d,aida-property:hasName,"en:""Black Sea""",HC0005QF4
-aida-entity:ba5bd100-3c53-45bb-8c1e-81d7827e948d,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:Justice.Investigate,aida-property:hasName,"en:""aida-item:Justice.Investigate""",
-aida-entity:1adf20cf-9ade-467e-82a3-f4583a7b5c19,aida-property:source,HC0005QF4,HC0005QF4
-aida-entity:1adf20cf-9ade-467e-82a3-f4583a7b5c19,aida-property:linkTarget,LDC2019E43:5330810,HC0005QF4
-aida-entity:1adf20cf-9ade-467e-82a3-f4583a7b5c19,aida-property:hasName,"en:""Brisbane""",HC0005QF4
-aida-property:ResponsibilityBlame.AssignBlame.AssignBlame_EntityResponsible,aida-property:hasName,"en:""aida-property:ResponsibilityBlame.AssignBlame.AssignBlame_EntityResponsible""",
-aida-item:WEA.Bullets,aida-property:hasName,"en:""aida-item:WEA.Bullets""",
-aida-item:ORG.CommercialOrganization.Corporation,aida-property:hasName,"en:""aida-item:ORG.CommercialOrganization.Corporation""",
-aida-entity:42fe32a7-57a6-499e-9043-d45cf4522f76,aida-property:hasName,"en:""the National Guard""",HC0005QF4
-aida-entity:42fe32a7-57a6-499e-9043-d45cf4522f76,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:COM.Flag.Flag,aida-property:hasName,"en:""aida-item:COM.Flag.Flag""",
-aida-item:LOC.Position.Field,aida-property:hasName,"en:""aida-item:LOC.Position.Field""",
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession_Artifact,aida-property:hasName,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession_Artifact""",
-aida-item:Conflict.Yield.Surrender,aida-property:hasName,"en:""aida-item:Conflict.Yield.Surrender""",
-aida-property:Measurement.Size.Count_EntityOrFiller,aida-property:source,HC0005QF4,HC0005QF4
-aida-property:Measurement.Size.Count_EntityOrFiller,aida-property:hasName,"en:""aida-property:Measurement.Size.Count_EntityOrFiller""",HC0005QF4
-aida-item:OrganizationAffiliation.EmploymentMembership,aida-property:hasName,"en:""aida-item:OrganizationAffiliation.EmploymentMembership""",
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity_Person,aida-property:hasName,"en:""aida-property:GeneralAffiliation.MemberOriginReligionEthnicity_Person""",
-aida-item:COM.Equipment,aida-property:hasName,"en:""aida-item:COM.Equipment""",
-aida-item:Movement.TransportArtifact.PreventEntry,aida-property:hasName,"en:""aida-item:Movement.TransportArtifact.PreventEntry""",
-aida-item:Life.Die.NonviolentDeath,aida-property:hasName,"en:""aida-item:Life.Die.NonviolentDeath""",
-aida-item:PER.Politician.Mayor,aida-property:hasName,"en:""aida-item:PER.Politician.Mayor""",
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity_Person,aida-property:hasName,"en:""aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity_Person""",
-aida-item:FAC.Building.GovernmentBuilding,aida-property:hasName,"en:""aida-item:FAC.Building.GovernmentBuilding""",
-aida-item:ORG.PoliticalOrganization,aida-property:hasName,"en:""aida-item:ORG.PoliticalOrganization""",
-aida-property:Information.Make.Make_Brand,aida-property:hasName,"en:""aida-property:Information.Make.Make_Brand""",HC0005QF4
-aida-property:Information.Make.Make_Brand,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:RES.NumberPercentageVotes.NumberPercentageVotes,aida-property:hasName,"en:""aida-item:RES.NumberPercentageVotes.NumberPercentageVotes""",
-aida-entity:d48106d9-91c7-4d10-a4dc-7d82d735a925,aida-property:linkTarget,LDC2019E43:703446,HC0005QF4
-aida-entity:d48106d9-91c7-4d10-a4dc-7d82d735a925,aida-property:hasName,"en:""Kyiv""",HC0005QF4
-aida-entity:d48106d9-91c7-4d10-a4dc-7d82d735a925,aida-property:source,HC0005QF4,HC0005QF4
-aida-property:Measurement.Size.HeightLengthWidth_EntityOrFiller,aida-property:hasName,"en:""aida-property:Measurement.Size.HeightLengthWidth_EntityOrFiller""",
-aida-item:PartWhole.Subsidiary.NationalityCitizen,aida-property:hasName,"en:""aida-item:PartWhole.Subsidiary.NationalityCitizen""",
-aida-item:PER.Combatant,aida-property:hasName,"en:""aida-item:PER.Combatant""",
-aida-item:Movement.TransportPerson.EvacuationRescue,aida-property:hasName,"en:""aida-item:Movement.TransportPerson.EvacuationRescue""",
-aida-item:Government.Formation.StartGPE,aida-property:hasName,"en:""aida-item:Government.Formation.StartGPE""",
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen_Artifact,aida-property:hasName,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen_Artifact""",
-aida-item:ORG.Government.Agency,aida-property:hasName,"en:""aida-item:ORG.Government.Agency""",
-aida-item:FAC.Installation.TrainStation,aida-property:hasName,"en:""aida-item:FAC.Installation.TrainStation""",
-aida-item:WEA.Bullets.Ammunition,aida-property:hasName,"en:""aida-item:WEA.Bullets.Ammunition""",
-aida-item:VEH.Rocket.Rocket,aida-property:hasName,"en:""aida-item:VEH.Rocket.Rocket""",
-aida-property:Evaluate.Legitimacy.Illegitimate_Event,aida-property:hasName,"en:""aida-property:Evaluate.Legitimacy.Illegitimate_Event""",
-aida-entity:0e0214a7-2f64-4dae-b1ba-eff4b6663ef4,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:RES.TurnoutVoters.TurnoutVoters,aida-property:hasName,"en:""aida-item:RES.TurnoutVoters.TurnoutVoters""",
-aida-item:Life.Injure.IllnessDegradationPhysical,aida-property:hasName,"en:""aida-item:Life.Injure.IllnessDegradationPhysical""",
-aida-item:WEA.ThrownProjectile,aida-property:hasName,"en:""aida-item:WEA.ThrownProjectile""",
-aida-item:ORG.CriminalOrganization.CriminalOrganization,aida-property:hasName,"en:""aida-item:ORG.CriminalOrganization.CriminalOrganization""",
-aida-item:PER.Fan.SportsFan,aida-property:hasName,"en:""aida-item:PER.Fan.SportsFan""",
-aida-entity:de116796-d44b-4a1a-8491-4dcced8e8a9f,aida-property:linkTarget,LDC2019E43:708230,HC0005QF4
-aida-entity:de116796-d44b-4a1a-8491-4dcced8e8a9f,aida-property:source,HC0005QF4,HC0005QF4
-aida-entity:de116796-d44b-4a1a-8491-4dcced8e8a9f,aida-property:hasName,"en:""Hrabove""",HC0005QF4
-aida-property:Evaluate.Sentiment.Positive_Holder,aida-property:hasName,"en:""aida-property:Evaluate.Sentiment.Positive_Holder""",
-aida-property:PersonalSocial.Role.TitleFormOfAddress_Title,aida-property:hasName,"en:""aida-property:PersonalSocial.Role.TitleFormOfAddress_Title""",
-aida-item:BAL.PaperBallot.PaperBallot,aida-property:hasName,"en:""aida-item:BAL.PaperBallot.PaperBallot""",
-aida-item:Inspection.SensoryObserve.MonitorElection,aida-property:hasName,"en:""aida-item:Inspection.SensoryObserve.MonitorElection""",
-aida-item:GPE.UrbanArea.City,aida-property:hasName,"en:""aida-item:GPE.UrbanArea.City""",
-aida-entity:90846e8e-d544-4c5c-8405-eb0e62e73627,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:PER.ProfessionalPosition.Spokesperson,aida-property:hasName,"en:""aida-item:PER.ProfessionalPosition.Spokesperson""",
-aida-property:PartWhole.Subsidiary.NationalityCitizen_Subsidiary,aida-property:hasName,"en:""aida-property:PartWhole.Subsidiary.NationalityCitizen_Subsidiary""",
-aida-item:GeneralAffiliation.Sponsorship.AdvisePlanOrganize,aida-property:hasName,"en:""aida-item:GeneralAffiliation.Sponsorship.AdvisePlanOrganize""",
-aida-entity:110be185-7b8c-4a12-a0d7-fcfe7687a7f0,aida-property:source,HC0005QF4,HC0005QF4
-aida-entity:110be185-7b8c-4a12-a0d7-fcfe7687a7f0,aida-property:hasName,"en:""bodyguard""",HC0005QF4
-aida-item:Transaction.Transaction.TransferControl,aida-property:hasName,"en:""aida-item:Transaction.Transaction.TransferControl""",
-aida-item:OrganizationAffiliation.EmploymentMembership.Employment,aida-property:hasName,"en:""aida-item:OrganizationAffiliation.EmploymentMembership.Employment""",
-aida-item:GPE.ProvinceState.ProvinceState,aida-property:hasName,"en:""aida-item:GPE.ProvinceState.ProvinceState""",
-aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen_Organization,aida-property:hasName,"en:""aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen_Organization""",
-aida-item:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen,aida-property:hasName,"en:""aida-item:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen""",
-aida-item:GeneralAffiliation.Sponsorship,aida-property:hasName,"en:""aida-item:GeneralAffiliation.Sponsorship""",
-aida-item:Contact.CommitmentPromiseExpressIntent.Meet,aida-property:hasName,"en:""aida-item:Contact.CommitmentPromiseExpressIntent.Meet""",
-aida-property:OrganizationAffiliation.Founder.Founder_Organization,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.Founder.Founder_Organization""",
-aida-property:Measurement.Size.Count_Measurement,aida-property:hasName,"en:""aida-property:Measurement.Size.Count_Measurement""",HC0005QF4
-aida-property:Measurement.Size.Count_Measurement,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:WEA.Gun.Firearm,aida-property:hasName,"en:""aida-item:WEA.Gun.Firearm""",
-aida-entity:01f0cc75-2ed2-4968-add0-715d91f1b7ec,aida-property:source,HC0005QF4,HC0005QF4
-aida-entity:6bcbc73a-2907-4c6c-8fc2-ad149846d6ac,aida-property:source,HC0005QF4,HC0005QF4
-aida-entity:6bcbc73a-2907-4c6c-8fc2-ad149846d6ac,aida-property:hasName,"en:""Ilovaysk""",HC0005QF4
-aida-item:ORG.Government.LegislativeBody,aida-property:hasName,"en:""aida-item:ORG.Government.LegislativeBody""",
-aida-item:Contact.FuneralVigil,aida-property:hasName,"en:""aida-item:Contact.FuneralVigil""",
-aida-item:Movement.TransportPerson,aida-property:hasName,"en:""aida-item:Movement.TransportPerson""",
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen_Citizen,aida-property:hasName,"en:""aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen_Citizen""",
-aida-item:SID.Cultural.Cultural,aida-property:hasName,"en:""aida-item:SID.Cultural.Cultural""",
-aida-item:Measurement.Size,aida-property:hasName,"en:""aida-item:Measurement.Size""",
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen,aida-property:hasName,"en:""aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen""",
-aida-item:ORG.Government,aida-property:hasName,"en:""aida-item:ORG.Government""",
-aida-item:CRM.BehaviorCrime,aida-property:hasName,"en:""aida-item:CRM.BehaviorCrime""",
-aida-item:Government.Agreements.ViolateAgreement,aida-property:hasName,"en:""aida-item:Government.Agreements.ViolateAgreement""",
-aida-item:FAC.Installation.Airport,aida-property:hasName,"en:""aida-item:FAC.Installation.Airport""",
-aida-item:ORG.International.MonitoringGroup,aida-property:hasName,"en:""aida-item:ORG.International.MonitoringGroup""",
-aida-property:Physical.LocatedNear.Surround_Place,aida-property:hasName,"en:""aida-property:Physical.LocatedNear.Surround_Place""",
-aida-item:PER.Politician.HeadOfGovernment,aida-property:hasName,"en:""aida-item:PER.Politician.HeadOfGovernment""",
-aida-item:Personnel.Elect,aida-property:hasName,"en:""aida-item:Personnel.Elect""",
-aida-property:OrganizationAffiliation.Leadership.MilitaryPolice_MilitaryPoliceORG,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.Leadership.MilitaryPolice_MilitaryPoliceORG""",
-aida-item:GPE,aida-property:hasName,"en:""aida-item:GPE""",
-aida-item:Movement.TransportArtifact.SendSupplyExport,aida-property:hasName,"en:""aida-item:Movement.TransportArtifact.SendSupplyExport""",
-aida-property:GeneralAffiliation.Sponsorship.AdvisePlanOrganize_ActorOrEvent,aida-property:hasName,"en:""aida-property:GeneralAffiliation.Sponsorship.AdvisePlanOrganize_ActorOrEvent""",
-aida-item:VEH.WheeledVehicle,aida-property:hasName,"en:""aida-item:VEH.WheeledVehicle""",
-aida-entity:f6959485-0657-46a7-8712-5e32a27d0619,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:FAC.GeographicalArea.Border,aida-property:hasName,"en:""aida-item:FAC.GeographicalArea.Border""",
-aida-item:Transaction.Transaction,aida-property:hasName,"en:""aida-item:Transaction.Transaction""",
-aida-item:LAW,aida-property:hasName,"en:""aida-item:LAW""",
-aida-item:Contact.CommandOrder,aida-property:hasName,"en:""aida-item:Contact.CommandOrder""",
-aida-item:Contact.ThreatenCoerce.Correspondence,aida-property:hasName,"en:""aida-item:Contact.ThreatenCoerce.Correspondence""",
-aida-item:RES,aida-property:hasName,"en:""aida-item:RES""",
-aida-item:FAC.Building.OfficeBuilding,aida-property:hasName,"en:""aida-item:FAC.Building.OfficeBuilding""",
-aida-item:Contact.MediaStatement,aida-property:hasName,"en:""aida-item:Contact.MediaStatement""",
-aida-item:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity,aida-property:hasName,"en:""aida-item:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity""",
-aida-item:LAW.Referendum.Referendum,aida-property:hasName,"en:""aida-item:LAW.Referendum.Referendum""",
-aida-item:ArtifactExistence.DamageDestroy.Destroy,aida-property:hasName,"en:""aida-item:ArtifactExistence.DamageDestroy.Destroy""",
-aida-property:GeneralAffiliation.OrganizationWebsite.OrganizationWebsite_Website,aida-property:hasName,"en:""aida-property:GeneralAffiliation.OrganizationWebsite.OrganizationWebsite_Website""",
-aida-entity:f4cfc546-ff27-4df0-b19b-71dbdbc93a29,aida-property:linkTarget,LDC2019E43:30004989,HC0005QF4
-aida-entity:f4cfc546-ff27-4df0-b19b-71dbdbc93a29,aida-property:source,HC0005QF4,HC0005QF4
-aida-entity:f4cfc546-ff27-4df0-b19b-71dbdbc93a29,aida-property:hasName,"en:""Petro Poroshenko""",HC0005QF4
-aida-item:GPE.UrbanArea.Village,aida-property:hasName,"en:""aida-item:GPE.UrbanArea.Village""",
-aida-item:Manufacture.Artifact,aida-property:hasName,"en:""aida-item:Manufacture.Artifact""",
-aida-property:Information.Make.Make_EntityOrFiller,aida-property:hasName,"en:""aida-property:Information.Make.Make_EntityOrFiller""",HC0005QF4
-aida-property:Information.Make.Make_EntityOrFiller,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:ArtifactExistence.DamageDestroy,aida-property:hasName,"en:""aida-item:ArtifactExistence.DamageDestroy""",
-aida-property:Physical.LocatedNear_Place,aida-property:source,HC0005QF4,HC0005QF4
-aida-property:Physical.LocatedNear_Place,aida-property:hasName,"en:""aida-property:Physical.LocatedNear_Place""",HC0005QF4
-aida-property:OrganizationAffiliation.Founder.Founder_Founder,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.Founder.Founder_Founder""",
-aida-item:WEA.Gas.PoisonGas,aida-property:hasName,"en:""aida-item:WEA.Gas.PoisonGas""",
-aida-item:MON.Cash.Cash,aida-property:hasName,"en:""aida-item:MON.Cash.Cash""",
-aida-item:Contact.Collaborate.Meet,aida-property:hasName,"en:""aida-item:Contact.Collaborate.Meet""",
-aida-item:VEH.Aircraft.Helicopter,aida-property:hasName,"en:""aida-item:VEH.Aircraft.Helicopter""",
-aida-item:FAC.Way,aida-property:hasName,"en:""aida-item:FAC.Way""",
-aida-item:COM.Wreckage.Wreckage,aida-property:hasName,"en:""aida-item:COM.Wreckage.Wreckage""",
-aida-item:GeneralAffiliation.MemberOriginReligionEthnicity,aida-property:hasName,"en:""aida-item:GeneralAffiliation.MemberOriginReligionEthnicity""",
-aida-item:ORG.Government.LawEnforcementAgency,aida-property:hasName,"en:""aida-item:ORG.Government.LawEnforcementAgency""",
-aida-item:OrganizationAffiliation.Leadership.Government,aida-property:hasName,"en:""aida-item:OrganizationAffiliation.Leadership.Government""",
-aida-item:COM.Document.PersonalIdentification,aida-property:hasName,"en:""aida-item:COM.Document.PersonalIdentification""",
-aida-item:Transaction.Transaction.EmbargoSanction,aida-property:hasName,"en:""aida-item:Transaction.Transaction.EmbargoSanction""",
-aida-item:Contact.RequestAdvise.Broadcast,aida-property:hasName,"en:""aida-item:Contact.RequestAdvise.Broadcast""",
-aida-item:ORG.Government.Railway,aida-property:hasName,"en:""aida-item:ORG.Government.Railway""",
-aida-item:COM.Document.Map,aida-property:hasName,"en:""aida-item:COM.Document.Map""",
-aida-item:WEA,aida-property:hasName,"en:""aida-item:WEA""",
-aida-item:PersonalSocial.Unspecified.Political,aida-property:hasName,"en:""aida-item:PersonalSocial.Unspecified.Political""",
-aida-item:Movement.TransportPerson.GrantEntryAsylum,aida-property:hasName,"en:""aida-item:Movement.TransportPerson.GrantEntryAsylum""",
-aida-entity:acb74d8b-5845-4f07-9207-190313865729,aida-property:source,HC0005QF4,HC0005QF4
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity_Ethnicity,aida-property:hasName,"en:""aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity_Ethnicity""",
-aida-property:GeneralAffiliation.Sponsorship_ActorOrEvent,aida-property:hasName,"en:""aida-property:GeneralAffiliation.Sponsorship_ActorOrEvent""",
-aida-item:ORG.Government.FireDepartment,aida-property:hasName,"en:""aida-item:ORG.Government.FireDepartment""",
-aida-property:Evaluate.Deliberateness.Deliberate_Event,aida-property:hasName,"en:""aida-property:Evaluate.Deliberateness.Deliberate_Event""",
-aida-item:ORG.International.Commission,aida-property:hasName,"en:""aida-item:ORG.International.Commission""",
-aida-item:Government.Vote,aida-property:hasName,"en:""aida-item:Government.Vote""",
-aida-entity:b993cba5-67e3-4e8e-b843-0f8103af98a0,aida-property:hasName,"en:""Stepan Poltorak""",HC0005QF4
-aida-entity:b993cba5-67e3-4e8e-b843-0f8103af98a0,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:LOC.Land,aida-property:hasName,"en:""aida-item:LOC.Land""",
-aida-item:PER.Police,aida-property:hasName,"en:""aida-item:PER.Police""",
-aida-item:Transaction.TransferMoney.PayForService,aida-property:hasName,"en:""aida-item:Transaction.TransferMoney.PayForService""",
-aida-property:Measurement.Size.Percentage_Measurement,aida-property:hasName,"en:""aida-property:Measurement.Size.Percentage_Measurement""",
-aida-item:Contact.ThreatenCoerce.Meet,aida-property:hasName,"en:""aida-item:Contact.ThreatenCoerce.Meet""",
-aida-item:VEH.Watercraft.Yacht,aida-property:hasName,"en:""aida-item:VEH.Watercraft.Yacht""",
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession_Owner,aida-property:hasName,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession_Owner""",
-aida-item:LOC.Land.Continent,aida-property:hasName,"en:""aida-item:LOC.Land.Continent""",
-aida-item:ORG.CommercialOrganization.BroadcastingCompany,aida-property:hasName,"en:""aida-item:ORG.CommercialOrganization.BroadcastingCompany""",
-aida-item:PER.MilitaryPersonnel.MilitaryOfficer,aida-property:hasName,"en:""aida-item:PER.MilitaryPersonnel.MilitaryOfficer""",
-aida-item:Measurement.Size.Count,aida-property:hasName,"en:""aida-item:Measurement.Size.Count""",
-aida-entity:a27c1037-3b21-4372-9c53-2a81065b3b16,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:ORG.International,aida-property:hasName,"en:""aida-item:ORG.International""",
-aida-item:Conflict.Attack.Stabbing,aida-property:hasName,"en:""aida-item:Conflict.Attack.Stabbing""",
-aida-item:ORG.PoliticalOrganization.Party,aida-property:hasName,"en:""aida-item:ORG.PoliticalOrganization.Party""",
-aida-item:Conflict.Attack.Invade,aida-property:hasName,"en:""aida-item:Conflict.Attack.Invade""",
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory_Territory,aida-property:hasName,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory_Territory""",
-aida-item:FAC.Building,aida-property:hasName,"en:""aida-item:FAC.Building""",
-aida-item:WEA.MissileSystem.SurfaceToAirMissile,aida-property:hasName,"en:""aida-item:WEA.MissileSystem.SurfaceToAirMissile""",
-aida-property:OrganizationAffiliation.Leadership.MilitaryPolice_Leader,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.Leadership.MilitaryPolice_Leader""",
-aida-item:ORG.CommercialOrganization.NewsAgency,aida-property:hasName,"en:""aida-item:ORG.CommercialOrganization.NewsAgency""",
-aida-item:PER.Fan,aida-property:hasName,"en:""aida-item:PER.Fan""",
-aida-property:PersonalSocial.Role_Person,aida-property:hasName,"en:""aida-property:PersonalSocial.Role_Person""",
-aida-item:WEA.Club.Bat,aida-property:hasName,"en:""aida-item:WEA.Club.Bat""",
-aida-item:WEA.Gas,aida-property:hasName,"en:""aida-item:WEA.Gas""",
-aida-item:ORG.MilitaryOrganization.NonGovernmentMilitia,aida-property:hasName,"en:""aida-item:ORG.MilitaryOrganization.NonGovernmentMilitia""",
-aida-item:Movement.TransportArtifact.PreventExit,aida-property:hasName,"en:""aida-item:Movement.TransportArtifact.PreventExit""",
-aida-item:Movement.TransportPerson.SmuggleExtract,aida-property:hasName,"en:""aida-item:Movement.TransportPerson.SmuggleExtract""",
-aida-property:Physical.OrganizationHeadquarters.OrganizationHeadquarters_Place,aida-property:hasName,"en:""aida-property:Physical.OrganizationHeadquarters.OrganizationHeadquarters_Place""",
-aida-item:GeneralAffiliation.Sponsorship.Affiliated,aida-property:hasName,"en:""aida-item:GeneralAffiliation.Sponsorship.Affiliated""",
-aida-item:OrganizationAffiliation.EmploymentMembership.Membership,aida-property:hasName,"en:""aida-item:OrganizationAffiliation.EmploymentMembership.Membership""",
-aida-item:Life.Die.DeathCausedByViolentEvents,aida-property:hasName,"en:""aida-item:Life.Die.DeathCausedByViolentEvents""",
-aida-item:VEH.WheeledVehicle.Truck,aida-property:hasName,"en:""aida-item:VEH.WheeledVehicle.Truck""",
-aida-item:Government.Agreements,aida-property:hasName,"en:""aida-item:Government.Agreements""",
-aida-item:BAL.BallotSlate.BallotSlate,aida-property:hasName,"en:""aida-item:BAL.BallotSlate.BallotSlate""",
-aida-item:PER.Politician.Governor,aida-property:hasName,"en:""aida-item:PER.Politician.Governor""",
-aida-item:Conflict.Yield.Retreat,aida-property:hasName,"en:""aida-item:Conflict.Yield.Retreat""",
-aida-item:Personnel.EndPosition.FiringLayoff,aida-property:hasName,"en:""aida-item:Personnel.EndPosition.FiringLayoff""",
-aida-entity:e3d187c7-5e2b-48f3-a05e-dc9724cb9596,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:FAC.GeographicalArea,aida-property:hasName,"en:""aida-item:FAC.GeographicalArea""",
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation_Artifact,aida-property:hasName,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation_Artifact""",
-aida-property:Physical.OrganizationHeadquarters.OrganizationHeadquarters_Organization,aida-property:hasName,"en:""aida-property:Physical.OrganizationHeadquarters.OrganizationHeadquarters_Organization""",
-aida-entity:5aa209ed-0b29-42c9-99f6-bb7f451b418e,aida-property:hasName,"en:""Crimea""",HC0005QF4
-aida-entity:5aa209ed-0b29-42c9-99f6-bb7f451b418e,aida-property:source,HC0005QF4,HC0005QF4
-aida-entity:5aa209ed-0b29-42c9-99f6-bb7f451b418e,aida-property:linkTarget,LDC2019E43:703883,HC0005QF4
-aida-item:VAL.Number.Number,aida-property:hasName,"en:""aida-item:VAL.Number.Number""",
-aida-property:GeneralAffiliation.Sponsorship.AdvisePlanOrganize_Sponsor,aida-property:hasName,"en:""aida-property:GeneralAffiliation.Sponsorship.AdvisePlanOrganize_Sponsor""",
-aida-item:FAC.Structure.Bridge,aida-property:hasName,"en:""aida-item:FAC.Structure.Bridge""",
-aida-item:COM.Equipment.MilitaryEquipment,aida-property:hasName,"en:""aida-item:COM.Equipment.MilitaryEquipment""",
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen_Nationality,aida-property:hasName,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen_Nationality""",
-aida-item:MON,aida-property:hasName,"en:""aida-item:MON""",
-aida-item:Movement.TransportPerson.SelfMotion,aida-property:hasName,"en:""aida-item:Movement.TransportPerson.SelfMotion""",
-aida-item:Justice.JudicialConsequences.Convict,aida-property:hasName,"en:""aida-item:Justice.JudicialConsequences.Convict""",
-aida-item:Government.Agreements.AcceptAgreementContractCeasefire,aida-property:hasName,"en:""aida-item:Government.Agreements.AcceptAgreementContractCeasefire""",
-aida-item:Movement.TransportPerson.BringCarryUnload,aida-property:hasName,"en:""aida-item:Movement.TransportPerson.BringCarryUnload""",
-aida-item:WEA.Bullets.LiveRounds,aida-property:hasName,"en:""aida-item:WEA.Bullets.LiveRounds""",
-aida-item:Life.Die,aida-property:hasName,"en:""aida-item:Life.Die""",
-aida-item:Measurement.Size.Percentage,aida-property:hasName,"en:""aida-item:Measurement.Size.Percentage""",
-aida-item:ORG.Government.ProsecutorOffice,aida-property:hasName,"en:""aida-item:ORG.Government.ProsecutorOffice""",
-aida-item:Government.Formation.MergeGPE,aida-property:hasName,"en:""aida-item:Government.Formation.MergeGPE""",
-aida-entity:d93aa0ec-a038-47c6-9983-334903ee39a8,aida-property:hasName,"en:""Tony Abbott""",HC0005QF4
-aida-entity:d93aa0ec-a038-47c6-9983-334903ee39a8,aida-property:source,HC0005QF4,HC0005QF4
-aida-entity:d93aa0ec-a038-47c6-9983-334903ee39a8,aida-property:linkTarget,LDC2019E43:30000226,HC0005QF4
-aida-property:PartWhole.Subsidiary_Parent,aida-property:hasName,"en:""aida-property:PartWhole.Subsidiary_Parent""",
-aida-item:CRM.PoliticalCrime,aida-property:hasName,"en:""aida-item:CRM.PoliticalCrime""",
-aida-entity:fae5a094-6aea-44e3-bfba-2f50fe87cb5b,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:Life.Injure.InjuryCausedByViolentEvents,aida-property:hasName,"en:""aida-item:Life.Injure.InjuryCausedByViolentEvents""",
-aida-item:VEH.MilitaryVehicle.MilitaryBoat,aida-property:hasName,"en:""aida-item:VEH.MilitaryVehicle.MilitaryBoat""",
-aida-item:Movement.TransportArtifact.ReceiveImport,aida-property:hasName,"en:""aida-item:Movement.TransportArtifact.ReceiveImport""",
-aida-item:Inspection.SensoryObserve.PhysicalInvestigateInspect,aida-property:hasName,"en:""aida-item:Inspection.SensoryObserve.PhysicalInvestigateInspect""",
-aida-property:OrganizationAffiliation.EmploymentMembership.Membership_Member,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.EmploymentMembership.Membership_Member""",
-aida-entity:312b6440-a00e-4e4f-b0a0-99a7f0084fdf,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:Conflict.Attack.FirearmAttack,aida-property:hasName,"en:""aida-item:Conflict.Attack.FirearmAttack""",
-aida-item:WEA.Bullets.RubberBullets,aida-property:hasName,"en:""aida-item:WEA.Bullets.RubberBullets""",
-aida-item:CRM.PoliticalCrime.PoliceMisconduct,aida-property:hasName,"en:""aida-item:CRM.PoliticalCrime.PoliceMisconduct""",
-aida-item:WEA.ThrownProjectile.Rock,aida-property:hasName,"en:""aida-item:WEA.ThrownProjectile.Rock""",
-aida-item:Justice.InitiateJudicialProcess.ChargeIndict,aida-property:hasName,"en:""aida-item:Justice.InitiateJudicialProcess.ChargeIndict""",
-aida-item:Contact.RequestAdvise.Meet,aida-property:hasName,"en:""aida-item:Contact.RequestAdvise.Meet""",
-aida-item:ArtifactExistence.DamageDestroy.Damage,aida-property:hasName,"en:""aida-item:ArtifactExistence.DamageDestroy.Damage""",
-aida-item:LOC.Position.AirSpace,aida-property:hasName,"en:""aida-item:LOC.Position.AirSpace""",
-aida-item:Movement.TransportArtifact,aida-property:hasName,"en:""aida-item:Movement.TransportArtifact""",
-aida-item:Contact.Discussion.Correspondence,aida-property:hasName,"en:""aida-item:Contact.Discussion.Correspondence""",
-aida-item:Conflict.Demonstrate,aida-property:hasName,"en:""aida-item:Conflict.Demonstrate""",
-aida-event:c8d650fb-506c-4854-a28e-e058c3f2d168,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:Physical.LocatedNear.Surround,aida-property:hasName,"en:""aida-item:Physical.LocatedNear.Surround""",
-aida-item:PER.MilitaryPersonnel,aida-property:hasName,"en:""aida-item:PER.MilitaryPersonnel""",
-aida-item:Conflict.Attack.SetFire,aida-property:hasName,"en:""aida-item:Conflict.Attack.SetFire""",
-aida-item:Transaction.Transaction.GiftGrantProvideAid,aida-property:hasName,"en:""aida-item:Transaction.Transaction.GiftGrantProvideAid""",
-aida-item:Contact.CommandOrder.Correspondence,aida-property:hasName,"en:""aida-item:Contact.CommandOrder.Correspondence""",
-aida-item:FAC.Way.Highway,aida-property:hasName,"en:""aida-item:FAC.Way.Highway""",
-aida-item:LOC.Position.Region,aida-property:hasName,"en:""aida-item:LOC.Position.Region""",
-aida-item:Government.Formation,aida-property:hasName,"en:""aida-item:Government.Formation""",
-aida-property:Evaluate.Legitimacy.Legitimate_Holder,aida-property:hasName,"en:""aida-property:Evaluate.Legitimacy.Legitimate_Holder""",
-aida-item:Manufacture.Artifact.CreateManufacture,aida-property:hasName,"en:""aida-item:Manufacture.Artifact.CreateManufacture""",
-aida-item:FAC.GeographicalArea.Checkpoint,aida-property:hasName,"en:""aida-item:FAC.GeographicalArea.Checkpoint""",
-aida-item:Conflict.Attack.AirstrikeMissileStrike,aida-property:hasName,"en:""aida-item:Conflict.Attack.AirstrikeMissileStrike""",
-aida-item:Contact.Negotiate,aida-property:hasName,"en:""aida-item:Contact.Negotiate""",
-aida-item:Manufacture.Artifact.CreateIntellectualProperty,aida-property:hasName,"en:""aida-item:Manufacture.Artifact.CreateIntellectualProperty""",
-aida-property:GeneralAffiliation.Sponsorship.HelpSupport_ActorOrEvent,aida-property:hasName,"en:""aida-property:GeneralAffiliation.Sponsorship.HelpSupport_ActorOrEvent""",
-aida-item:LOC.Position,aida-property:hasName,"en:""aida-item:LOC.Position""",
-aida-item:BAL,aida-property:hasName,"en:""aida-item:BAL""",
-aida-item:Transaction.TransferOwnership.GiftGrantProvideAid,aida-property:hasName,"en:""aida-item:Transaction.TransferOwnership.GiftGrantProvideAid""",
-aida-item:VEH.Watercraft,aida-property:hasName,"en:""aida-item:VEH.Watercraft""",
-aida-item:FAC.Building.House,aida-property:hasName,"en:""aida-item:FAC.Building.House""",
-aida-property:OrganizationAffiliation.EmploymentMembership.Membership_PlaceOfMembership,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.EmploymentMembership.Membership_PlaceOfMembership""",
-aida-item:Transaction.TransferMoney.Purchase,aida-property:hasName,"en:""aida-item:Transaction.TransferMoney.Purchase""",
-aida-property:PartWhole.Subsidiary_Subsidiary,aida-property:hasName,"en:""aida-property:PartWhole.Subsidiary_Subsidiary""",
-aida-item:PER.ProfessionalPosition.Paramedic,aida-property:hasName,"en:""aida-item:PER.ProfessionalPosition.Paramedic""",
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation_EntityOrFiller,aida-property:hasName,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation_EntityOrFiller""",
-aida-item:WEA.Club,aida-property:hasName,"en:""aida-item:WEA.Club""",
-aida-property:ResponsibilityBlame.AssignBlame.AssignBlame_Event,aida-property:hasName,"en:""aida-property:ResponsibilityBlame.AssignBlame.AssignBlame_Event""",
-aida-property:PersonalSocial.Role.ProfessionalRole_JobRole,aida-property:hasName,"en:""aida-property:PersonalSocial.Role.ProfessionalRole_JobRole""",
-aida-item:VEH.Aircraft.CargoAircraft,aida-property:hasName,"en:""aida-item:VEH.Aircraft.CargoAircraft""",
-aida-item:WEA.DaggerKnifeSword.Hatchet,aida-property:hasName,"en:""aida-item:WEA.DaggerKnifeSword.Hatchet""",
-aida-item:Conflict.Attack,aida-property:hasName,"en:""aida-item:Conflict.Attack""",
-aida-item:FAC.Structure.Plaza,aida-property:hasName,"en:""aida-item:FAC.Structure.Plaza""",
-aida-property:Evaluate.Deliberateness.Deliberate_Holder,aida-property:hasName,"en:""aida-property:Evaluate.Deliberateness.Deliberate_Holder""",
-aida-property:OrganizationAffiliation.EmploymentMembership.Employment_PlaceOfEmployment,aida-property:source,HC0005QF4,HC0005QF4
-aida-property:OrganizationAffiliation.EmploymentMembership.Employment_PlaceOfEmployment,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.EmploymentMembership.Employment_PlaceOfEmployment""",HC0005QF4
-aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation_EntityOrFiller,aida-property:hasName,"en:""aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation_EntityOrFiller""",
-aida-item:Contact.CommandOrder.Meet,aida-property:hasName,"en:""aida-item:Contact.CommandOrder.Meet""",
-aida-item:GPE.Country.Country,aida-property:hasName,"en:""aida-item:GPE.Country.Country""",
-aida-property:PartWhole.Subsidiary.NationalityCitizen_Parent,aida-property:hasName,"en:""aida-property:PartWhole.Subsidiary.NationalityCitizen_Parent""",
-aida-item:CRM.FinancialCrime,aida-property:hasName,"en:""aida-item:CRM.FinancialCrime""",
-aida-item:PER.ProfessionalPosition.Minister,aida-property:hasName,"en:""aida-item:PER.ProfessionalPosition.Minister""",
-aida-item:Movement.TransportPerson.Hide,aida-property:hasName,"en:""aida-item:Movement.TransportPerson.Hide""",
-aida-item:FAC.Structure.Tower,aida-property:hasName,"en:""aida-item:FAC.Structure.Tower""",
-aida-item:CRM.BehaviorCrime.OrganizedCrime,aida-property:hasName,"en:""aida-item:CRM.BehaviorCrime.OrganizedCrime""",
-aida-item:WEA.DaggerKnifeSword,aida-property:hasName,"en:""aida-item:WEA.DaggerKnifeSword""",
-aida-item:Manufacture.Artifact.Build,aida-property:hasName,"en:""aida-item:Manufacture.Artifact.Build""",
-aida-item:Transaction.TransferMoney.EmbargoSanction,aida-property:hasName,"en:""aida-item:Transaction.TransferMoney.EmbargoSanction""",
-aida-property:Physical.LocatedNear.Surround_EntityOrFiller,aida-property:hasName,"en:""aida-property:Physical.LocatedNear.Surround_EntityOrFiller""",
-aida-item:Movement.TransportPerson.PreventEntry,aida-property:hasName,"en:""aida-item:Movement.TransportPerson.PreventEntry""",
-aida-item:PersonalSocial.Role.ProfessionalRole,aida-property:hasName,"en:""aida-item:PersonalSocial.Role.ProfessionalRole""",
-aida-item:Contact.Collaborate,aida-property:hasName,"en:""aida-item:Contact.Collaborate""",
-aida-item:FAC.Structure.Barricade,aida-property:hasName,"en:""aida-item:FAC.Structure.Barricade""",
-aida-item:PER.ProfessionalPosition.Firefighter,aida-property:hasName,"en:""aida-item:PER.ProfessionalPosition.Firefighter""",
-aida-property:Measurement.Size.Weight_Measurement,aida-property:hasName,"en:""aida-property:Measurement.Size.Weight_Measurement""",
-aida-item:VEH.WheeledVehicle.FireApparatus,aida-property:hasName,"en:""aida-item:VEH.WheeledVehicle.FireApparatus""",
-aida-item:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation,aida-property:hasName,"en:""aida-item:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation""",
-aida-item:CRM.FinancialCrime.Forgery,aida-property:hasName,"en:""aida-item:CRM.FinancialCrime.Forgery""",
-aida-item:WEA.Gas.TearGas,aida-property:hasName,"en:""aida-item:WEA.Gas.TearGas""",
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen_Nationality,aida-property:hasName,"en:""aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen_Nationality""",
-aida-property:OrganizationAffiliation.EmploymentMembership_PlaceOfEmploymentMembership,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.EmploymentMembership_PlaceOfEmploymentMembership""",
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory_Controller,aida-property:hasName,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory_Controller""",
-aida-item:GPE.OrganizationOfCountries.OrganizationOfCountries,aida-property:hasName,"en:""aida-item:GPE.OrganizationOfCountries.OrganizationOfCountries""",
-aida-item:Justice.InitiateJudicialProcess,aida-property:hasName,"en:""aida-item:Justice.InitiateJudicialProcess""",
-aida-property:OrganizationAffiliation.Leadership.HeadOfState_Country,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.Leadership.HeadOfState_Country""",
-aida-item:ORG.Association.Team,aida-property:hasName,"en:""aida-item:ORG.Association.Team""",
-aida-item:LOC.Position.Neighborhood,aida-property:hasName,"en:""aida-item:LOC.Position.Neighborhood""",
-aida-item:Personnel.StartPosition.Hiring,aida-property:hasName,"en:""aida-item:Personnel.StartPosition.Hiring""",
-aida-item:WEA.Bomb.Grenade,aida-property:hasName,"en:""aida-item:WEA.Bomb.Grenade""",
-aida-item:COM,aida-property:hasName,"en:""aida-item:COM""",
-aida-item:VEH.WheeledVehicle.Train,aida-property:hasName,"en:""aida-item:VEH.WheeledVehicle.Train""",
-aida-item:MON.Assets.Assets,aida-property:hasName,"en:""aida-item:MON.Assets.Assets""",
-aida-item:Inspection.SensoryObserve,aida-property:hasName,"en:""aida-item:Inspection.SensoryObserve""",
-aida-property:OrganizationAffiliation.Leadership.HeadOfState_Leader,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.Leadership.HeadOfState_Leader""",
-aida-item:OrganizationAffiliation.Leadership,aida-property:hasName,"en:""aida-item:OrganizationAffiliation.Leadership""",
-aida-item:Justice.InitiateJudicialProcess.TrialHearing,aida-property:hasName,"en:""aida-item:Justice.InitiateJudicialProcess.TrialHearing""",
-aida-item:FAC.Building.ApartmentBuilding,aida-property:hasName,"en:""aida-item:FAC.Building.ApartmentBuilding""",
-aida-item:Contact.Prevarication,aida-property:hasName,"en:""aida-item:Contact.Prevarication""",
-aida-item:Transaction.TransferOwnership.Purchase,aida-property:hasName,"en:""aida-item:Transaction.TransferOwnership.Purchase""",
-aida-item:LOC.GeographicPoint,aida-property:hasName,"en:""aida-item:LOC.GeographicPoint""",
-aida-item:GeneralAffiliation.Sponsorship.HelpSupport,aida-property:hasName,"en:""aida-item:GeneralAffiliation.Sponsorship.HelpSupport""",
-aida-item:Life.Injure.IllnessDegradationHungerThirst,aida-property:hasName,"en:""aida-item:Life.Injure.IllnessDegradationHungerThirst""",
-aida-item:Contact.CommitmentPromiseExpressIntent.Broadcast,aida-property:hasName,"en:""aida-item:Contact.CommitmentPromiseExpressIntent.Broadcast""",
-aida-property:Physical.Resident.Resident_Place,aida-property:hasName,"en:""aida-property:Physical.Resident.Resident_Place""",
-aida-item:COM.Equipment.HumanitarianAid,aida-property:hasName,"en:""aida-item:COM.Equipment.HumanitarianAid""",
-aida-item:CRM,aida-property:hasName,"en:""aida-item:CRM""",
-aida-item:Movement.TransportPerson.Fall,aida-property:hasName,"en:""aida-item:Movement.TransportPerson.Fall""",
-aida-item:WEA.MissileSystem,aida-property:hasName,"en:""aida-item:WEA.MissileSystem""",
-aida-item:Conflict.Attack.BiologicalChemicalPoisonAttack,aida-property:hasName,"en:""aida-item:Conflict.Attack.BiologicalChemicalPoisonAttack""",
-aida-item:WEA.MissileSystem.MissileLauncher,aida-property:hasName,"en:""aida-item:WEA.MissileSystem.MissileLauncher""",
-aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation_Organization,aida-property:hasName,"en:""aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation_Organization""",
-aida-item:Contact.MediaStatement.Broadcast,aida-property:hasName,"en:""aida-item:Contact.MediaStatement.Broadcast""",
-aida-item:PER.Combatant.Mercenary,aida-property:hasName,"en:""aida-item:PER.Combatant.Mercenary""",
-aida-item:Transaction.TransferMoney.GiftGrantProvideAid,aida-property:hasName,"en:""aida-item:Transaction.TransferMoney.GiftGrantProvideAid""",
-aida-item:CRM.FinancialCrime.Fraud,aida-property:hasName,"en:""aida-item:CRM.FinancialCrime.Fraud""",
-aida-item:Contact.Negotiate.Correspondence,aida-property:hasName,"en:""aida-item:Contact.Negotiate.Correspondence""",
-aida-item:Personnel.StartPosition,aida-property:hasName,"en:""aida-item:Personnel.StartPosition""",
-aida-item:WEA.Gun,aida-property:hasName,"en:""aida-item:WEA.Gun""",
-aida-property:OrganizationAffiliation.Leadership.Government_Leader,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.Leadership.Government_Leader""",
-aida-item:FAC.Building.StoreShop,aida-property:hasName,"en:""aida-item:FAC.Building.StoreShop""",
-aida-property:PersonalSocial.Role_Role,aida-property:hasName,"en:""aida-property:PersonalSocial.Role_Role""",
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation,aida-property:hasName,"en:""aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation""",
-aida-item:Transaction.TransferOwnership.EmbargoSanction,aida-property:hasName,"en:""aida-item:Transaction.TransferOwnership.EmbargoSanction""",
-aida-item:ORG.Association,aida-property:hasName,"en:""aida-item:ORG.Association""",
-aida-property:Measurement.Size.Weight_EntityOrFiller,aida-property:hasName,"en:""aida-property:Measurement.Size.Weight_EntityOrFiller""",
-aida-item:FAC.Building.School,aida-property:hasName,"en:""aida-item:FAC.Building.School""",
-aida-item:WEA.MissileSystem.AntiAircraftMissle,aida-property:hasName,"en:""aida-item:WEA.MissileSystem.AntiAircraftMissle""",
-aida-item:Movement.TransportArtifact.BringCarryUnload,aida-property:hasName,"en:""aida-item:Movement.TransportArtifact.BringCarryUnload""",
-aida-item:COM.Equipment.Satellite,aida-property:hasName,"en:""aida-item:COM.Equipment.Satellite""",
-aida-item:Conflict.Attack.SelfDirectedBattle,aida-property:hasName,"en:""aida-item:Conflict.Attack.SelfDirectedBattle""",
-aida-property:PersonalSocial.Role.TitleFormOfAddress_Person,aida-property:hasName,"en:""aida-property:PersonalSocial.Role.TitleFormOfAddress_Person""",
-aida-property:Measurement.Size_EntityOrFiller,aida-property:hasName,"en:""aida-property:Measurement.Size_EntityOrFiller""",
-aida-property:GeneralAffiliation.OrganizationWebsite.OrganizationWebsite_Organization,aida-property:hasName,"en:""aida-property:GeneralAffiliation.OrganizationWebsite.OrganizationWebsite_Organization""",
-aida-item:Contact.Collaborate.Correspondence,aida-property:hasName,"en:""aida-item:Contact.Collaborate.Correspondence""",
-aida-item:FAC.Installation,aida-property:hasName,"en:""aida-item:FAC.Installation""",
-aida-item:Movement.TransportArtifact.DisperseSeparate,aida-property:hasName,"en:""aida-item:Movement.TransportArtifact.DisperseSeparate""",
-aida-item:PartWhole.Subsidiary.OrganizationSubsidiary,aida-property:hasName,"en:""aida-item:PartWhole.Subsidiary.OrganizationSubsidiary""",
-aida-item:Government.Vote.ViolationsPreventVote,aida-property:hasName,"en:""aida-item:Government.Vote.ViolationsPreventVote""",
-aida-item:Justice.JudicialConsequences.Extradite,aida-property:hasName,"en:""aida-item:Justice.JudicialConsequences.Extradite""",
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession,aida-property:hasName,"en:""aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession""",
-aida-item:VEH,aida-property:hasName,"en:""aida-item:VEH""",
-aida-item:VEH.WheeledVehicle.Car,aida-property:hasName,"en:""aida-item:VEH.WheeledVehicle.Car""",
-aida-item:CRM.ViolentCrime,aida-property:hasName,"en:""aida-item:CRM.ViolentCrime""",
-aida-item:LOC,aida-property:hasName,"en:""aida-item:LOC""",
-aida-item:WEA.Gun.Artillery,aida-property:hasName,"en:""aida-item:WEA.Gun.Artillery""",
-aida-property:ResponsibilityBlame.ClaimResponsibility.ClaimResponsibility_Event,aida-property:hasName,"en:""aida-property:ResponsibilityBlame.ClaimResponsibility.ClaimResponsibility_Event""",
-aida-property:PartWhole.Subsidiary.OrganizationSubsidiary_Parent,aida-property:hasName,"en:""aida-property:PartWhole.Subsidiary.OrganizationSubsidiary_Parent""",
-aida-item:SID.Political.Opposition,aida-property:hasName,"en:""aida-item:SID.Political.Opposition""",
-aida-item:VEH.Watercraft.Boat,aida-property:hasName,"en:""aida-item:VEH.Watercraft.Boat""",
-aida-item:Conflict.Attack.Bombing,aida-property:hasName,"en:""aida-item:Conflict.Attack.Bombing""",
-aida-item:VAL,aida-property:hasName,"en:""aida-item:VAL""",
-aida-item:Contact.ThreatenCoerce.Broadcast,aida-property:hasName,"en:""aida-item:Contact.ThreatenCoerce.Broadcast""",
-aida-item:Justice.Investigate.InvestigateCrime,aida-property:hasName,"en:""aida-item:Justice.Investigate.InvestigateCrime""",
-aida-property:PersonalSocial.Unspecified.Political_Person,aida-property:hasName,"en:""aida-property:PersonalSocial.Unspecified.Political_Person""",
-aida-item:Contact.CommandOrder.Broadcast,aida-property:hasName,"en:""aida-item:Contact.CommandOrder.Broadcast""",
-aida-item:FAC,aida-property:hasName,"en:""aida-item:FAC""",
-aida-item:VEH.MilitaryVehicle,aida-property:hasName,"en:""aida-item:VEH.MilitaryVehicle""",
-aida-item:Contact.PublicStatementInPerson,aida-property:hasName,"en:""aida-item:Contact.PublicStatementInPerson""",
-aida-item:CRM.BehaviorCrime.FailToAssist,aida-property:hasName,"en:""aida-item:CRM.BehaviorCrime.FailToAssist""",
-aida-property-type:MonolingualText,aida-property:hasName,"en:""aida-property-type:MonolingualText""",
-aida-item:PersonalSocial.Role.TitleFormOfAddress,aida-property:hasName,"en:""aida-item:PersonalSocial.Role.TitleFormOfAddress""",
-aida-item:PER.Protester,aida-property:hasName,"en:""aida-item:PER.Protester""",
-aida-item:Contact.Discussion,aida-property:hasName,"en:""aida-item:Contact.Discussion""",
-aida-item:GPE.UrbanArea,aida-property:hasName,"en:""aida-item:GPE.UrbanArea""",
-aida-item:FAC.Building.VotingFacility,aida-property:hasName,"en:""aida-item:FAC.Building.VotingFacility""",
-aida-item:PersonalSocial.Role,aida-property:hasName,"en:""aida-item:PersonalSocial.Role""",
-aida-item:VEH.WheeledVehicle.Bus,aida-property:hasName,"en:""aida-item:VEH.WheeledVehicle.Bus""",
-aida-entity:11d2d098-804e-4379-afa0-7475a6a9986f,aida-property:hasName,"en:""MH17""",HC0005QF4
-aida-entity:11d2d098-804e-4379-afa0-7475a6a9986f,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:Movement.TransportArtifact.SmuggleExtract,aida-property:hasName,"en:""aida-item:Movement.TransportArtifact.SmuggleExtract""",
-aida-property:Measurement.Size.HeightLengthWidth_Measurement,aida-property:hasName,"en:""aida-property:Measurement.Size.HeightLengthWidth_Measurement""",
-aida-item:PER.ProfessionalPosition.Ambassador,aida-property:hasName,"en:""aida-item:PER.ProfessionalPosition.Ambassador""",
-aida-property-type:Value,aida-property:hasName,"en:""aida-property-type:Value""",
-aida-property:Evaluate.Sentiment.Negative_Holder,aida-property:hasName,"en:""aida-property:Evaluate.Sentiment.Negative_Holder""",
-aida-item:WEA.Bomb,aida-property:hasName,"en:""aida-item:WEA.Bomb""",
-aida-item:ORG.MilitaryOrganization,aida-property:hasName,"en:""aida-item:ORG.MilitaryOrganization""",
-aida-item:Measurement.Size.Weight,aida-property:hasName,"en:""aida-item:Measurement.Size.Weight""",
-aida-item:SID.Ideological.Ideological,aida-property:hasName,"en:""aida-item:SID.Ideological.Ideological""",
-aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen_Nationality,aida-property:hasName,"en:""aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen_Nationality""",
-aida-entity:12c3230f-cc45-4c14-b34a-c8a889177502,aida-property:source,HC0005QF4,HC0005QF4
-aida-item:Contact.RequestAdvise.Correspondence,aida-property:hasName,"en:""aida-item:Contact.RequestAdvise.Correspondence""",
-aida-item:Personnel.Elect.WinElection,aida-property:hasName,"en:""aida-item:Personnel.Elect.WinElection""",
-aida-item:Transaction.TransferMoney,aida-property:hasName,"en:""aida-item:Transaction.TransferMoney""",
-aida-item:Movement.TransportArtifact.Fall,aida-property:hasName,"en:""aida-item:Movement.TransportArtifact.Fall""",
-aida-item:Government.Vote.CastVote,aida-property:hasName,"en:""aida-item:Government.Vote.CastVote""",
-aida-item:ORG.MilitaryOrganization.GovernmentArmedForces,aida-property:hasName,"en:""aida-item:ORG.MilitaryOrganization.GovernmentArmedForces""",
-aida-item:LOC.GeographicPoint.Address,aida-property:hasName,"en:""aida-item:LOC.GeographicPoint.Address""",
-aida-item:Transaction.TransferOwnership.BorrowLend,aida-property:hasName,"en:""aida-item:Transaction.TransferOwnership.BorrowLend""",
-aida-item:Movement.TransportArtifact.Hide,aida-property:hasName,"en:""aida-item:Movement.TransportArtifact.Hide""",
-aida-item:FAC.Way.Street,aida-property:hasName,"en:""aida-item:FAC.Way.Street""",
-aida-item:LOC.Position.CrimeScene,aida-property:hasName,"en:""aida-item:LOC.Position.CrimeScene""",
-aida-item:COM.Equipment.MedicalEquipment,aida-property:hasName,"en:""aida-item:COM.Equipment.MedicalEquipment""",
-aida-item:PER.Combatant.Sniper,aida-property:hasName,"en:""aida-item:PER.Combatant.Sniper""",
-aida-item:Conflict.Attack.Hanging,aida-property:hasName,"en:""aida-item:Conflict.Attack.Hanging""",
-aida-property:Evaluate.Sentiment.Positive_SentimentTarget,aida-property:hasName,"en:""aida-property:Evaluate.Sentiment.Positive_SentimentTarget""",
-aida-item:Movement.TransportArtifact.GrantEntry,aida-property:hasName,"en:""aida-item:Movement.TransportArtifact.GrantEntry""",
-aida-item:ORG,aida-property:hasName,"en:""aida-item:ORG""",
-aida-item:ORG.Government.Council,aida-property:hasName,"en:""aida-item:ORG.Government.Council""",
-aida-item:Movement.TransportArtifact.NonviolentThrowLaunch,aida-property:hasName,"en:""aida-item:Movement.TransportArtifact.NonviolentThrowLaunch""",
-aida-item:CRM.FinancialCrime.Extortion,aida-property:hasName,"en:""aida-item:CRM.FinancialCrime.Extortion""",
-aida-item:WEA.GrenadeLauncher.GrenadeLauncher,aida-property:hasName,"en:""aida-item:WEA.GrenadeLauncher.GrenadeLauncher""",
-aida-item:FAC.Structure,aida-property:hasName,"en:""aida-item:FAC.Structure""",
-aida-property:PartWhole.Subsidiary.OrganizationSubsidiary_Subsidiary,aida-property:hasName,"en:""aida-property:PartWhole.Subsidiary.OrganizationSubsidiary_Subsidiary""",
-aida-item:Life.Injure,aida-property:hasName,"en:""aida-item:Life.Injure""",
-aida-item:PartWhole.Subsidiary,aida-property:hasName,"en:""aida-item:PartWhole.Subsidiary""",
-aida-item:Movement.TransportPerson.PreventExit,aida-property:hasName,"en:""aida-item:Movement.TransportPerson.PreventExit""",
-aida-property:Information.Color.Color_Color,aida-property:hasName,"en:""aida-property:Information.Color.Color_Color""",
-aida-item:OrganizationAffiliation.Leadership.HeadOfState,aida-property:hasName,"en:""aida-item:OrganizationAffiliation.Leadership.HeadOfState""",
-aida-item:SID,aida-property:hasName,"en:""aida-item:SID""",
-aida-entity:cb192f2f-07ca-49fe-80b4-56fa54420f48,aida-property:source,HC0005QF4,HC0005QF4
-aida-property:Physical.Resident.Resident_Resident,aida-property:hasName,"en:""aida-property:Physical.Resident.Resident_Resident""",
-aida-item:ORG.CommercialOrganization.Manufacturer,aida-property:hasName,"en:""aida-item:ORG.CommercialOrganization.Manufacturer""",
-aida-item:Government.Agreements.RejectNullifyAgreementContractCeasefire,aida-property:hasName,"en:""aida-item:Government.Agreements.RejectNullifyAgreementContractCeasefire""",
-aida-item:Justice.JudicialConsequences.Execute,aida-property:hasName,"en:""aida-item:Justice.JudicialConsequences.Execute""",
-aida-item:Conflict.Attack.StealRobHijack,aida-property:hasName,"en:""aida-item:Conflict.Attack.StealRobHijack""",
-aida-item:Conflict.Demonstrate.MarchProtestPoliticalGathering,aida-property:hasName,"en:""aida-item:Conflict.Demonstrate.MarchProtestPoliticalGathering""",
-aida-item:Justice.JudicialConsequences,aida-property:hasName,"en:""aida-item:Justice.JudicialConsequences""",
-aida-property:Evaluate.Legitimacy.Legitimate_Event,aida-property:hasName,"en:""aida-property:Evaluate.Legitimacy.Legitimate_Event""",
-aida-item:Contact.RequestAdvise,aida-property:hasName,"en:""aida-item:Contact.RequestAdvise""",
-aida-property:GeneralAffiliation.Sponsorship.Affiliated_ActorOrEvent,aida-property:hasName,"en:""aida-property:GeneralAffiliation.Sponsorship.Affiliated_ActorOrEvent""",
-aida-item:WEA.MissileSystem.AirToAirMissile,aida-property:hasName,"en:""aida-item:WEA.MissileSystem.AirToAirMissile""",
-aida-item:ORG.MilitaryOrganization.Intelligence,aida-property:hasName,"en:""aida-item:ORG.MilitaryOrganization.Intelligence""",
-aida-property:Evaluate.Legitimacy.Illegitimate_Holder,aida-property:hasName,"en:""aida-property:Evaluate.Legitimacy.Illegitimate_Holder""",
-aida-item:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen,aida-property:hasName,"en:""aida-item:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen""",
-aida-item:Contact.PublicStatementInPerson.Broadcast,aida-property:hasName,"en:""aida-item:Contact.PublicStatementInPerson.Broadcast""",
-aida-item:VEH.MilitaryVehicle.Tank,aida-property:hasName,"en:""aida-item:VEH.MilitaryVehicle.Tank""",
-aida-item:Contact.Prevarication.Correspondence,aida-property:hasName,"en:""aida-item:Contact.Prevarication.Correspondence""",
-aida-item:Movement.TransportPerson.DisperseSeparate,aida-property:hasName,"en:""aida-item:Movement.TransportPerson.DisperseSeparate""",
-aida-property:ResponsibilityBlame.ClaimResponsibility.ClaimResponsibility_EntityResponsible,aida-property:hasName,"en:""aida-property:ResponsibilityBlame.ClaimResponsibility.ClaimResponsibility_EntityResponsible""",
-aida-property:OrganizationAffiliation.EmploymentMembership_EmployeeMember,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.EmploymentMembership_EmployeeMember""",
-aida-property:Measurement.Size.Percentage_EntityOrFiller,aida-property:hasName,"en:""aida-property:Measurement.Size.Percentage_EntityOrFiller""",
-aida-item:Physical.LocatedNear,aida-property:hasName,"en:""aida-item:Physical.LocatedNear""",
-aida-item:Conflict.Attack.Strangling,aida-property:hasName,"en:""aida-item:Conflict.Attack.Strangling""",
-aida-property:Physical.LocatedNear_EntityOrFiller,aida-property:hasName,"en:""aida-property:Physical.LocatedNear_EntityOrFiller""",HC0005QF4
-aida-property:Physical.LocatedNear_EntityOrFiller,aida-property:source,HC0005QF4,HC0005QF4
-aida-property:GeneralAffiliation.Sponsorship.Affiliated_Sponsor,aida-property:hasName,"en:""aida-property:GeneralAffiliation.Sponsorship.Affiliated_Sponsor""",
-aida-item:OrganizationAffiliation.Leadership.MilitaryPolice,aida-property:hasName,"en:""aida-item:OrganizationAffiliation.Leadership.MilitaryPolice""",
-aida-item:Contact.Prevarication.Meet,aida-property:hasName,"en:""aida-item:Contact.Prevarication.Meet""",
-aida-property:Evaluate.Sentiment.Negative_SentimentTarget,aida-property:hasName,"en:""aida-property:Evaluate.Sentiment.Negative_SentimentTarget""",
-aida-item:PER,aida-property:hasName,"en:""aida-item:PER""",
-aida-property:Information.Color.Color_EntityOrFiller,aida-property:hasName,"en:""aida-property:Information.Color.Color_EntityOrFiller""",
-aida-property:OrganizationAffiliation.EmploymentMembership.Employment_Employee,aida-property:hasName,"en:""aida-property:OrganizationAffiliation.EmploymentMembership.Employment_Employee""",HC0005QF4
-aida-property:OrganizationAffiliation.EmploymentMembership.Employment_Employee,aida-property:source,HC0005QF4,HC0005QF4
-aida-property-type:URL,aida-property:hasName,"en:""aida-property-type:URL""",
-aida-property:PersonalSocial.Unspecified_Person,aida-property:hasName,"en:""aida-property:PersonalSocial.Unspecified_Person""",
-aida-property:PersonalSocial.Role.ProfessionalRole_Person,aida-property:hasName,"en:""aida-property:PersonalSocial.Role.ProfessionalRole_Person""",
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory,aida-property:hasName,"en:""aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory""",
-aida-item:Contact.Negotiate.Meet,aida-property:hasName,"en:""aida-item:Contact.Negotiate.Meet""",
-aida-property:GeneralAffiliation.Sponsorship.HelpSupport_Sponsor,aida-property:hasName,"en:""aida-property:GeneralAffiliation.Sponsorship.HelpSupport_Sponsor""",
-aida-property:Measurement.Size_Measurement,aida-property:hasName,"en:""aida-property:Measurement.Size_Measurement""",
-aida-property:Evaluate.Deliberateness.Accidental_Holder,aida-property:hasName,"en:""aida-property:Evaluate.Deliberateness.Accidental_Holder""",
-aida-item:WEA.Bomb.MolotovCocktail,aida-property:hasName,"en:""aida-item:WEA.Bomb.MolotovCocktail""",
diff --git a/data/maa_m18_edge_table_file1.csv b/data/maa_m18_edge_table_file1.csv
deleted file mode 100644
index b94e84d37..000000000
--- a/data/maa_m18_edge_table_file1.csv
+++ /dev/null
@@ -1,898 +0,0 @@
-node,property,value,document_id
-aida-item:PER.ProfessionalPosition,rdf:type,owl:Class,
-aida-item:PER.ProfessionalPosition,rdfs:subClassOf,aida-item:PER,
-aida-item:VEH.Aircraft,rdf:type,owl:Class,
-aida-item:VEH.Aircraft,rdfs:subClassOf,aida-item:VEH,
-aida-item:PER.Protester.ProtestLeader,rdf:type,owl:Class,
-aida-item:PER.Protester.ProtestLeader,rdfs:subClassOf,aida-item:PER.Protester,
-aida-item:CRM.ViolentCrime.Terrorism,rdf:type,owl:Class,
-aida-item:CRM.ViolentCrime.Terrorism,rdfs:subClassOf,aida-item:CRM.ViolentCrime,
-aida-item:Conflict.Yield,rdf:type,owl:Class,
-aida-item:Conflict.Yield,rdfs:subClassOf,aida-item:Event,
-aida-entity:cab521fd-369e-40e9-ba8d-a97cc7c723b9,rdf:type,aida-item:TTL,HC0005QF4
-aida-entity:cab521fd-369e-40e9-ba8d-a97cc7c723b9,aida-property:OrganizationAffiliation.Leadership_Leader,aida-entity:4fba8ada-3361-459c-a37f-a0ccc4624ac5,HC0005QF4
-aida-item:COM.Document,rdfs:subClassOf,aida-item:COM,
-aida-item:COM.Document,rdf:type,owl:Class,
-aida-item:PER.ProfessionalPosition.Scientist,rdf:type,owl:Class,
-aida-item:PER.ProfessionalPosition.Scientist,rdfs:subClassOf,aida-item:PER.ProfessionalPosition,
-aida-item:PersonalSocial.Unspecified,rdfs:subClassOf,aida-item:Relation,
-aida-item:PersonalSocial.Unspecified,rdf:type,owl:Class,
-aida-item:PER.Politician,rdf:type,owl:Class,
-aida-item:PER.Politician,rdfs:subClassOf,aida-item:PER,
-aida-item:Contact.ThreatenCoerce,rdfs:subClassOf,aida-item:Event,
-aida-item:Contact.ThreatenCoerce,rdf:type,owl:Class,
-aida-property:GeneralAffiliation.Sponsorship_Sponsor,rdf:type,owl:ObjectProperty,
-aida-entity:4fba8ada-3361-459c-a37f-a0ccc4624ac5,aida-property:OrganizationAffiliation.EmploymentMembership.Employment_PlaceOfEmployment,aida-entity:12c3230f-cc45-4c14-b34a-c8a889177502,HC0005QF4
-aida-entity:4fba8ada-3361-459c-a37f-a0ccc4624ac5,aida-property:OrganizationAffiliation.Leadership_Organization,aida-entity:cab521fd-369e-40e9-ba8d-a97cc7c723b9,HC0005QF4
-aida-entity:4fba8ada-3361-459c-a37f-a0ccc4624ac5,rdf:type,aida-item:GPE.ProvinceState.ProvinceState,HC0005QF4
-aida-item:WEA.Cannon.Cannon,rdfs:subClassOf,aida-item:WEA,
-aida-item:WEA.Cannon.Cannon,rdf:type,owl:Class,
-aida-item:SID.Religious.Religious,rdfs:subClassOf,aida-item:SID,
-aida-item:SID.Religious.Religious,rdf:type,owl:Class,
-aida-item:WEA.MissileSystem.Missile,rdf:type,owl:Class,
-aida-item:WEA.MissileSystem.Missile,rdfs:subClassOf,aida-item:WEA.MissileSystem,
-aida-item:Contact.CommitmentPromiseExpressIntent,rdf:type,owl:Class,
-aida-item:Contact.CommitmentPromiseExpressIntent,rdfs:subClassOf,aida-item:Event,
-aida-item:Transaction.TransferOwnership,rdf:type,owl:Class,
-aida-item:Transaction.TransferOwnership,rdfs:subClassOf,aida-item:Event,
-aida-entity:e45a2f47-4a27-4bfe-8a09-221d2216cd7f,aida-property:Physical.LocatedNear_Place,aida-entity:f6959485-0657-46a7-8712-5e32a27d0619,HC0005QF4
-aida-entity:e45a2f47-4a27-4bfe-8a09-221d2216cd7f,rdf:type,aida-item:GPE.UrbanArea.City,HC0005QF4
-aida-entity:ed73b6b9-bd48-4560-a5ef-381937cca366,aida-property:Physical.LocatedNear_EntityOrFiller,aida-entity:e8d98762-d520-49ed-8de4-5b208d2e3fc9,HC0005QF4
-aida-entity:ed73b6b9-bd48-4560-a5ef-381937cca366,rdf:type,aida-item:PER,HC0005QF4
-aida-item:ORG.CommercialOrganization,rdf:type,owl:Class,
-aida-item:ORG.CommercialOrganization,rdfs:subClassOf,aida-item:ORG,
-aida-item:SID.Sports.Sports,rdfs:subClassOf,aida-item:SID,
-aida-item:SID.Sports.Sports,rdf:type,owl:Class,
-aida-item:ORG.PoliticalOrganization.Court,rdfs:subClassOf,aida-item:ORG.PoliticalOrganization,
-aida-item:ORG.PoliticalOrganization.Court,rdf:type,owl:Class,
-aida-item:Inspection.SensoryObserve.InspectPeopleOrganization,rdfs:subClassOf,aida-item:Inspection.SensoryObserve,
-aida-item:Inspection.SensoryObserve.InspectPeopleOrganization,rdf:type,owl:Class,
-aida-item:Contact.Discussion.Meet,rdf:type,owl:Class,
-aida-item:Contact.Discussion.Meet,rdfs:subClassOf,aida-item:Contact.Discussion,
-aida-property:OrganizationAffiliation.Leadership.Government_GovernmentBodyOrGPE,rdf:type,owl:ObjectProperty,
-aida-item:FAC.Installation.MilitaryInstallation,rdf:type,owl:Class,
-aida-item:FAC.Installation.MilitaryInstallation,rdfs:subClassOf,aida-item:FAC.Installation,
-aida-item:ORG.Association.Club,rdf:type,owl:Class,
-aida-item:ORG.Association.Club,rdfs:subClassOf,aida-item:ORG.Association,
-aida-item:PER.Police.ChiefOfPolice,rdfs:subClassOf,aida-item:PER.Police,
-aida-item:PER.Police.ChiefOfPolice,rdf:type,owl:Class,
-aida-item:VEH.MilitaryVehicle.MilitaryTransportAircraft,rdf:type,owl:Class,
-aida-item:VEH.MilitaryVehicle.MilitaryTransportAircraft,rdfs:subClassOf,aida-item:VEH.MilitaryVehicle,
-aida-property-type:dataType,rdf:type,owl:ObjectProperty,
-aida-entity:e8d98762-d520-49ed-8de4-5b208d2e3fc9,rdf:type,aida-item:GPE.UrbanArea.City,HC0005QF4
-aida-entity:e8d98762-d520-49ed-8de4-5b208d2e3fc9,aida-property:Physical.LocatedNear_Place,aida-entity:ed73b6b9-bd48-4560-a5ef-381937cca366,HC0005QF4
-aida-entity:e8d98762-d520-49ed-8de4-5b208d2e3fc9,aida-property:Measurement.Size.Count_EntityOrFiller,aida-entity:0e0214a7-2f64-4dae-b1ba-eff4b6663ef4,HC0005QF4
-aida-entity:e8d98762-d520-49ed-8de4-5b208d2e3fc9,aida-property:OrganizationAffiliation.Leadership_Organization,aida-entity:f4cfc546-ff27-4df0-b19b-71dbdbc93a29,HC0005QF4
-aida-item:Measurement.Size.HeightLengthWidth,rdfs:subClassOf,aida-item:Measurement.Size,
-aida-item:Measurement.Size.HeightLengthWidth,rdf:type,owl:Class,
-aida-item:Personnel.EndPosition.QuitRetire,rdf:type,owl:Class,
-aida-item:Personnel.EndPosition.QuitRetire,rdfs:subClassOf,aida-item:Personnel.EndPosition,
-aida-item:Personnel.EndPosition,rdfs:subClassOf,aida-item:Event,
-aida-item:Personnel.EndPosition,rdf:type,owl:Class,
-aida-entity:9789ff8a-7d0a-45cc-9b2a-9a7465182d09,aida-property:Information.Make.Make_Brand,aida-entity:f6959485-0657-46a7-8712-5e32a27d0619,HC0005QF4
-aida-entity:9789ff8a-7d0a-45cc-9b2a-9a7465182d09,rdf:type,aida-item:ORG.CommercialOrganization,HC0005QF4
-aida-entity:9789ff8a-7d0a-45cc-9b2a-9a7465182d09,aida-property:OrganizationAffiliation.EmploymentMembership.Employment_PlaceOfEmployment,aida-entity:d93aa0ec-a038-47c6-9983-334903ee39a8,HC0005QF4
-aida-property:Evaluate.Deliberateness.Accidental_Event,rdf:type,owl:ObjectProperty,
-aida-item:CRM.PoliticalCrime.AbuseOfOffice,rdfs:subClassOf,aida-item:CRM.PoliticalCrime,
-aida-item:CRM.PoliticalCrime.AbuseOfOffice,rdf:type,owl:Class,
-aida-property:OrganizationAffiliation.Leadership_Organization,rdf:type,owl:ObjectProperty,HC0005QF4
-aida-item:VEH.MilitaryVehicle.FighterAircraft,rdf:type,owl:Class,
-aida-item:VEH.MilitaryVehicle.FighterAircraft,rdfs:subClassOf,aida-item:VEH.MilitaryVehicle,
-aida-item:PER.ProfessionalPosition.Spy,rdfs:subClassOf,aida-item:PER.ProfessionalPosition,
-aida-item:PER.ProfessionalPosition.Spy,rdf:type,owl:Class,
-aida-item:Transaction.TransferMoney.BorrowLend,rdf:type,owl:Class,
-aida-item:Transaction.TransferMoney.BorrowLend,rdfs:subClassOf,aida-item:Transaction.TransferMoney,
-aida-item:VEH.Aircraft.Airplane,rdf:type,owl:Class,
-aida-item:VEH.Aircraft.Airplane,rdfs:subClassOf,aida-item:VEH.Aircraft,
-aida-item:Contact.FuneralVigil.Meet,rdf:type,owl:Class,
-aida-item:Contact.FuneralVigil.Meet,rdfs:subClassOf,aida-item:Contact.FuneralVigil,
-aida-item:PER.ProfessionalPosition.Journalist,rdfs:subClassOf,aida-item:PER.ProfessionalPosition,
-aida-item:PER.ProfessionalPosition.Journalist,rdf:type,owl:Class,
-aida-item:Contact.Prevarication.Broadcast,rdf:type,owl:Class,
-aida-item:Contact.Prevarication.Broadcast,rdfs:subClassOf,aida-item:Contact.Prevarication,
-aida-property:OrganizationAffiliation.Leadership_Leader,rdf:type,owl:ObjectProperty,HC0005QF4
-aida-property-type:DateTime,rdf:type,aida-property-type:Value,
-aida-item:Contact.CommitmentPromiseExpressIntent.Correspondence,rdf:type,owl:Class,
-aida-item:Contact.CommitmentPromiseExpressIntent.Correspondence,rdfs:subClassOf,aida-item:Contact.CommitmentPromiseExpressIntent,
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity_EntityOrFiller,rdf:type,owl:ObjectProperty,
-aida-entity:ba5bd100-3c53-45bb-8c1e-81d7827e948d,aida-property:Physical.LocatedNear_Place,aida-entity:e3d187c7-5e2b-48f3-a05e-dc9724cb9596,HC0005QF4
-aida-entity:ba5bd100-3c53-45bb-8c1e-81d7827e948d,rdf:type,aida-item:LOC.Position.Region,HC0005QF4
-aida-item:Justice.Investigate,rdfs:subClassOf,aida-item:Event,
-aida-item:Justice.Investigate,rdf:type,owl:Class,
-aida-entity:1adf20cf-9ade-467e-82a3-f4583a7b5c19,rdf:type,aida-item:GPE.UrbanArea.City,HC0005QF4
-aida-property:ResponsibilityBlame.AssignBlame.AssignBlame_EntityResponsible,rdf:type,owl:ObjectProperty,
-aida-item:WEA.Bullets,rdf:type,owl:Class,
-aida-item:WEA.Bullets,rdfs:subClassOf,aida-item:WEA,
-aida-item:ORG.CommercialOrganization.Corporation,rdf:type,owl:Class,
-aida-item:ORG.CommercialOrganization.Corporation,rdfs:subClassOf,aida-item:ORG.CommercialOrganization,
-aida-entity:42fe32a7-57a6-499e-9043-d45cf4522f76,aida-property:OrganizationAffiliation.Leadership_Organization,aida-entity:b993cba5-67e3-4e8e-b843-0f8103af98a0,HC0005QF4
-aida-entity:42fe32a7-57a6-499e-9043-d45cf4522f76,rdf:type,aida-item:ORG.MilitaryOrganization.GovernmentArmedForces,HC0005QF4
-aida-item:COM.Flag.Flag,rdf:type,owl:Class,
-aida-item:COM.Flag.Flag,rdfs:subClassOf,aida-item:COM,
-aida-item:LOC.Position.Field,rdf:type,owl:Class,
-aida-item:LOC.Position.Field,rdfs:subClassOf,aida-item:LOC.Position,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession_Artifact,rdf:type,owl:ObjectProperty,
-aida-item:Conflict.Yield.Surrender,rdfs:subClassOf,aida-item:Conflict.Yield,
-aida-item:Conflict.Yield.Surrender,rdf:type,owl:Class,
-aida-property:Measurement.Size.Count_EntityOrFiller,rdf:type,owl:ObjectProperty,HC0005QF4
-aida-item:OrganizationAffiliation.EmploymentMembership,rdf:type,owl:Class,
-aida-item:OrganizationAffiliation.EmploymentMembership,rdfs:subClassOf,aida-item:Relation,
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity_Person,rdf:type,owl:ObjectProperty,
-aida-item:COM.Equipment,rdf:type,owl:Class,
-aida-item:COM.Equipment,rdfs:subClassOf,aida-item:COM,
-aida-item:Movement.TransportArtifact.PreventEntry,rdfs:subClassOf,aida-item:Movement.TransportArtifact,
-aida-item:Movement.TransportArtifact.PreventEntry,rdf:type,owl:Class,
-aida-item:Life.Die.NonviolentDeath,rdfs:subClassOf,aida-item:Life.Die,
-aida-item:Life.Die.NonviolentDeath,rdf:type,owl:Class,
-aida-item:PER.Politician.Mayor,rdfs:subClassOf,aida-item:PER.Politician,
-aida-item:PER.Politician.Mayor,rdf:type,owl:Class,
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity_Person,rdf:type,owl:ObjectProperty,
-aida-item:FAC.Building.GovernmentBuilding,rdfs:subClassOf,aida-item:FAC.Building,
-aida-item:FAC.Building.GovernmentBuilding,rdf:type,owl:Class,
-aida-item:ORG.PoliticalOrganization,rdf:type,owl:Class,
-aida-item:ORG.PoliticalOrganization,rdfs:subClassOf,aida-item:ORG,
-aida-property:Information.Make.Make_Brand,rdf:type,owl:ObjectProperty,HC0005QF4
-aida-item:RES.NumberPercentageVotes.NumberPercentageVotes,rdf:type,owl:Class,
-aida-item:RES.NumberPercentageVotes.NumberPercentageVotes,rdfs:subClassOf,aida-item:RES,
-aida-entity:d48106d9-91c7-4d10-a4dc-7d82d735a925,aida-property:Physical.LocatedNear_Place,aida-entity:b993cba5-67e3-4e8e-b843-0f8103af98a0,HC0005QF4
-aida-entity:d48106d9-91c7-4d10-a4dc-7d82d735a925,rdf:type,aida-item:GPE.UrbanArea.City,HC0005QF4
-aida-property:Measurement.Size.HeightLengthWidth_EntityOrFiller,rdf:type,owl:ObjectProperty,
-aida-item:PartWhole.Subsidiary.NationalityCitizen,rdfs:subClassOf,aida-item:PartWhole.Subsidiary,
-aida-item:PartWhole.Subsidiary.NationalityCitizen,rdf:type,owl:Class,
-aida-item:PER.Combatant,rdfs:subClassOf,aida-item:PER,
-aida-item:PER.Combatant,rdf:type,owl:Class,
-aida-item:Movement.TransportPerson.EvacuationRescue,rdfs:subClassOf,aida-item:Movement.TransportPerson,
-aida-item:Movement.TransportPerson.EvacuationRescue,rdf:type,owl:Class,
-aida-item:Government.Formation.StartGPE,rdf:type,owl:Class,
-aida-item:Government.Formation.StartGPE,rdfs:subClassOf,aida-item:Government.Formation,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen_Artifact,rdf:type,owl:ObjectProperty,
-aida-item:ORG.Government.Agency,rdf:type,owl:Class,
-aida-item:ORG.Government.Agency,rdfs:subClassOf,aida-item:ORG.Government,
-aida-item:FAC.Installation.TrainStation,rdf:type,owl:Class,
-aida-item:FAC.Installation.TrainStation,rdfs:subClassOf,aida-item:FAC.Installation,
-aida-item:WEA.Bullets.Ammunition,rdfs:subClassOf,aida-item:WEA.Bullets,
-aida-item:WEA.Bullets.Ammunition,rdf:type,owl:Class,
-aida-item:VEH.Rocket.Rocket,rdf:type,owl:Class,
-aida-item:VEH.Rocket.Rocket,rdfs:subClassOf,aida-item:VEH,
-aida-property:Evaluate.Legitimacy.Illegitimate_Event,rdf:type,owl:ObjectProperty,
-aida-entity:0e0214a7-2f64-4dae-b1ba-eff4b6663ef4,aida-property:Measurement.Size.Count_Measurement,aida-entity:e8d98762-d520-49ed-8de4-5b208d2e3fc9,HC0005QF4
-aida-entity:0e0214a7-2f64-4dae-b1ba-eff4b6663ef4,rdf:type,aida-item:VAL.Number.Number,HC0005QF4
-aida-item:RES.TurnoutVoters.TurnoutVoters,rdfs:subClassOf,aida-item:RES,
-aida-item:RES.TurnoutVoters.TurnoutVoters,rdf:type,owl:Class,
-aida-item:Life.Injure.IllnessDegradationPhysical,rdfs:subClassOf,aida-item:Life.Injure,
-aida-item:Life.Injure.IllnessDegradationPhysical,rdf:type,owl:Class,
-aida-item:WEA.ThrownProjectile,rdf:type,owl:Class,
-aida-item:WEA.ThrownProjectile,rdfs:subClassOf,aida-item:WEA,
-aida-item:ORG.CriminalOrganization.CriminalOrganization,rdf:type,owl:Class,
-aida-item:ORG.CriminalOrganization.CriminalOrganization,rdfs:subClassOf,aida-item:ORG,
-aida-item:PER.Fan.SportsFan,rdf:type,owl:Class,
-aida-item:PER.Fan.SportsFan,rdfs:subClassOf,aida-item:PER.Fan,
-aida-entity:de116796-d44b-4a1a-8491-4dcced8e8a9f,aida-property:Physical.LocatedNear_Place,aida-entity:ed73b6b9-bd48-4560-a5ef-381937cca366,HC0005QF4
-aida-entity:de116796-d44b-4a1a-8491-4dcced8e8a9f,rdf:type,aida-item:GPE.UrbanArea.Village,HC0005QF4
-aida-property:Evaluate.Sentiment.Positive_Holder,rdf:type,owl:ObjectProperty,
-aida-property:PersonalSocial.Role.TitleFormOfAddress_Title,rdf:type,owl:ObjectProperty,
-aida-item:BAL.PaperBallot.PaperBallot,rdf:type,owl:Class,
-aida-item:BAL.PaperBallot.PaperBallot,rdfs:subClassOf,aida-item:BAL,
-aida-item:Inspection.SensoryObserve.MonitorElection,rdfs:subClassOf,aida-item:Inspection.SensoryObserve,
-aida-item:Inspection.SensoryObserve.MonitorElection,rdf:type,owl:Class,
-aida-item:GPE.UrbanArea.City,rdfs:subClassOf,aida-item:GPE.UrbanArea,
-aida-item:GPE.UrbanArea.City,rdf:type,owl:Class,
-aida-entity:90846e8e-d544-4c5c-8405-eb0e62e73627,rdf:type,aida-item:PER,HC0005QF4
-aida-entity:90846e8e-d544-4c5c-8405-eb0e62e73627,aida-property:Physical.LocatedNear_EntityOrFiller,aida-entity:de116796-d44b-4a1a-8491-4dcced8e8a9f,HC0005QF4
-aida-entity:90846e8e-d544-4c5c-8405-eb0e62e73627,aida-property:Measurement.Size.Count_EntityOrFiller,aida-entity:a27c1037-3b21-4372-9c53-2a81065b3b16,HC0005QF4
-aida-item:PER.ProfessionalPosition.Spokesperson,rdf:type,owl:Class,
-aida-item:PER.ProfessionalPosition.Spokesperson,rdfs:subClassOf,aida-item:PER.ProfessionalPosition,
-aida-property:PartWhole.Subsidiary.NationalityCitizen_Subsidiary,rdf:type,owl:ObjectProperty,
-aida-item:GeneralAffiliation.Sponsorship.AdvisePlanOrganize,rdf:type,owl:Class,
-aida-item:GeneralAffiliation.Sponsorship.AdvisePlanOrganize,rdfs:subClassOf,aida-item:GeneralAffiliation.Sponsorship,
-aida-entity:110be185-7b8c-4a12-a0d7-fcfe7687a7f0,rdf:type,aida-item:TTL,HC0005QF4
-aida-entity:110be185-7b8c-4a12-a0d7-fcfe7687a7f0,aida-property:OrganizationAffiliation.EmploymentMembership.Employment_Employee,aida-entity:fae5a094-6aea-44e3-bfba-2f50fe87cb5b,HC0005QF4
-aida-item:TTL,rdfs:subClassOf,aida-item:Entity,
-aida-item:TTL,rdf:type,owl:Class,
-aida-item:Transaction.Transaction.TransferControl,rdfs:subClassOf,aida-item:Transaction.Transaction,
-aida-item:Transaction.Transaction.TransferControl,rdf:type,owl:Class,
-aida-item:OrganizationAffiliation.EmploymentMembership.Employment,rdf:type,owl:Class,
-aida-item:OrganizationAffiliation.EmploymentMembership.Employment,rdfs:subClassOf,aida-item:OrganizationAffiliation.EmploymentMembership,
-aida-item:GPE.ProvinceState.ProvinceState,rdf:type,owl:Class,
-aida-item:GPE.ProvinceState.ProvinceState,rdfs:subClassOf,aida-item:Entity,
-aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen_Organization,rdf:type,owl:ObjectProperty,
-aida-item:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen,rdfs:subClassOf,aida-item:GeneralAffiliation.MemberOriginReligionEthnicity,
-aida-item:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen,rdf:type,owl:Class,
-aida-item:GeneralAffiliation.Sponsorship,rdf:type,owl:Class,
-aida-item:GeneralAffiliation.Sponsorship,rdfs:subClassOf,aida-item:Relation,
-aida-item:Contact.CommitmentPromiseExpressIntent.Meet,rdfs:subClassOf,aida-item:Event,
-aida-item:Contact.CommitmentPromiseExpressIntent.Meet,rdf:type,owl:Class,
-aida-property:OrganizationAffiliation.Founder.Founder_Organization,rdf:type,owl:ObjectProperty,
-aida-property:Measurement.Size.Count_Measurement,rdf:type,owl:ObjectProperty,HC0005QF4
-aida-item:WEA.Gun.Firearm,rdfs:subClassOf,aida-item:WEA.Gun,
-aida-item:WEA.Gun.Firearm,rdf:type,owl:Class,
-aida-entity:01f0cc75-2ed2-4968-add0-715d91f1b7ec,rdf:type,aida-item:PER,HC0005QF4
-aida-entity:01f0cc75-2ed2-4968-add0-715d91f1b7ec,aida-property:Physical.LocatedNear_EntityOrFiller,aida-entity:de116796-d44b-4a1a-8491-4dcced8e8a9f,HC0005QF4
-aida-entity:6bcbc73a-2907-4c6c-8fc2-ad149846d6ac,rdf:type,aida-item:GPE.UrbanArea.City,HC0005QF4
-aida-entity:6bcbc73a-2907-4c6c-8fc2-ad149846d6ac,aida-property:Physical.LocatedNear_Place,aida-entity:01f0cc75-2ed2-4968-add0-715d91f1b7ec,HC0005QF4
-aida-item:ORG.Government.LegislativeBody,rdfs:subClassOf,aida-item:ORG.Government,
-aida-item:ORG.Government.LegislativeBody,rdf:type,owl:Class,
-aida-item:Contact.FuneralVigil,rdf:type,owl:Class,
-aida-item:Contact.FuneralVigil,rdfs:subClassOf,aida-item:Event,
-aida-item:Movement.TransportPerson,rdfs:subClassOf,aida-item:Event,
-aida-item:Movement.TransportPerson,rdf:type,owl:Class,
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen_Citizen,rdf:type,owl:ObjectProperty,
-aida-item:SID.Cultural.Cultural,rdfs:subClassOf,aida-item:SID,
-aida-item:SID.Cultural.Cultural,rdf:type,owl:Class,
-aida-item:Measurement.Size,rdfs:subClassOf,aida-item:Relation,
-aida-item:Measurement.Size,rdf:type,owl:Class,
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen,rdfs:subClassOf,aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation,
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen,rdf:type,owl:Class,
-aida-item:ORG.Government,rdf:type,owl:Class,
-aida-item:ORG.Government,rdfs:subClassOf,aida-item:ORG,
-aida-item:CRM.BehaviorCrime,rdfs:subClassOf,aida-item:CRM,
-aida-item:CRM.BehaviorCrime,rdf:type,owl:Class,
-aida-item:Government.Agreements.ViolateAgreement,rdf:type,owl:Class,
-aida-item:Government.Agreements.ViolateAgreement,rdfs:subClassOf,aida-item:Government.Agreements,
-aida-item:FAC.Installation.Airport,rdfs:subClassOf,aida-item:FAC.Installation,
-aida-item:FAC.Installation.Airport,rdf:type,owl:Class,
-aida-item:ORG.International.MonitoringGroup,rdfs:subClassOf,aida-item:ORG.International,
-aida-item:ORG.International.MonitoringGroup,rdf:type,owl:Class,
-aida-property:Physical.LocatedNear.Surround_Place,rdf:type,owl:ObjectProperty,
-aida-item:PER.Politician.HeadOfGovernment,rdfs:subClassOf,aida-item:PER.Politician,
-aida-item:PER.Politician.HeadOfGovernment,rdf:type,owl:Class,
-aida-item:Personnel.Elect,rdfs:subClassOf,aida-item:Event,
-aida-item:Personnel.Elect,rdf:type,owl:Class,
-aida-property:OrganizationAffiliation.Leadership.MilitaryPolice_MilitaryPoliceORG,rdf:type,owl:ObjectProperty,
-aida-item:GPE,rdfs:subClassOf,aida-item:Entity,
-aida-item:GPE,rdf:type,owl:Class,
-aida-item:Movement.TransportArtifact.SendSupplyExport,rdfs:subClassOf,aida-item:Movement.TransportArtifact,
-aida-item:Movement.TransportArtifact.SendSupplyExport,rdf:type,owl:Class,
-aida-property:GeneralAffiliation.Sponsorship.AdvisePlanOrganize_ActorOrEvent,rdf:type,owl:ObjectProperty,
-aida-item:VEH.WheeledVehicle,rdf:type,owl:Class,
-aida-item:VEH.WheeledVehicle,rdfs:subClassOf,aida-item:VEH,
-aida-entity:f6959485-0657-46a7-8712-5e32a27d0619,rdf:type,aida-item:FAC.GeographicalArea,HC0005QF4
-aida-entity:f6959485-0657-46a7-8712-5e32a27d0619,aida-property:Physical.LocatedNear_EntityOrFiller,aida-entity:de116796-d44b-4a1a-8491-4dcced8e8a9f,HC0005QF4
-aida-entity:f6959485-0657-46a7-8712-5e32a27d0619,aida-property:Physical.LocatedNear_Place,aida-entity:01f0cc75-2ed2-4968-add0-715d91f1b7ec,HC0005QF4
-aida-entity:f6959485-0657-46a7-8712-5e32a27d0619,aida-property:Information.Make.Make_EntityOrFiller,aida-entity:9789ff8a-7d0a-45cc-9b2a-9a7465182d09,HC0005QF4
-aida-item:FAC.GeographicalArea.Border,rdfs:subClassOf,aida-item:FAC.GeographicalArea,
-aida-item:FAC.GeographicalArea.Border,rdf:type,owl:Class,
-aida-item:Transaction.Transaction,rdf:type,owl:Class,
-aida-item:Transaction.Transaction,rdfs:subClassOf,aida-item:Event,
-aida-item:LAW,rdfs:subClassOf,aida-item:Entity,
-aida-item:LAW,rdf:type,owl:Class,
-aida-item:Contact.CommandOrder,rdf:type,owl:Class,
-aida-item:Contact.CommandOrder,rdfs:subClassOf,aida-item:Event,
-aida-item:Contact.ThreatenCoerce.Correspondence,rdfs:subClassOf,aida-item:Contact.ThreatenCoerce,
-aida-item:Contact.ThreatenCoerce.Correspondence,rdf:type,owl:Class,
-aida-item:RES,rdf:type,owl:Class,
-aida-item:RES,rdfs:subClassOf,aida-item:Entity,
-aida-item:FAC.Building.OfficeBuilding,rdfs:subClassOf,aida-item:FAC.Building,
-aida-item:FAC.Building.OfficeBuilding,rdf:type,owl:Class,
-aida-item:Contact.MediaStatement,rdf:type,owl:Class,
-aida-item:Contact.MediaStatement,rdfs:subClassOf,aida-item:Event,
-aida-item:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity,rdfs:subClassOf,aida-item:GeneralAffiliation.MemberOriginReligionEthnicity,
-aida-item:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity,rdf:type,owl:Class,
-aida-item:LAW.Referendum.Referendum,rdfs:subClassOf,aida-item:LAW,
-aida-item:LAW.Referendum.Referendum,rdf:type,owl:Class,
-aida-item:ArtifactExistence.DamageDestroy.Destroy,rdfs:subClassOf,aida-item:ArtifactExistence.DamageDestroy,
-aida-item:ArtifactExistence.DamageDestroy.Destroy,rdf:type,owl:Class,
-aida-property:GeneralAffiliation.OrganizationWebsite.OrganizationWebsite_Website,rdf:type,owl:ObjectProperty,
-aida-entity:f4cfc546-ff27-4df0-b19b-71dbdbc93a29,aida-property:OrganizationAffiliation.EmploymentMembership.Employment_Employee,aida-entity:fae5a094-6aea-44e3-bfba-2f50fe87cb5b,HC0005QF4
-aida-entity:f4cfc546-ff27-4df0-b19b-71dbdbc93a29,aida-property:OrganizationAffiliation.Leadership_Leader,aida-entity:e8d98762-d520-49ed-8de4-5b208d2e3fc9,HC0005QF4
-aida-entity:f4cfc546-ff27-4df0-b19b-71dbdbc93a29,rdf:type,aida-item:PER.Politician.HeadOfGovernment,HC0005QF4
-aida-entity:f4cfc546-ff27-4df0-b19b-71dbdbc93a29,aida-property:Physical.LocatedNear_EntityOrFiller,aida-entity:d48106d9-91c7-4d10-a4dc-7d82d735a925,HC0005QF4
-aida-item:GPE.UrbanArea.Village,rdfs:subClassOf,aida-item:Entity,
-aida-item:GPE.UrbanArea.Village,rdf:type,owl:Class,
-aida-item:Manufacture.Artifact,rdf:type,owl:Class,
-aida-item:Manufacture.Artifact,rdfs:subClassOf,aida-item:Event,
-aida-property:Information.Make.Make_EntityOrFiller,rdf:type,owl:ObjectProperty,HC0005QF4
-aida-item:ArtifactExistence.DamageDestroy,rdfs:subClassOf,aida-item:Event,
-aida-item:ArtifactExistence.DamageDestroy,rdf:type,owl:Class,
-aida-property:Physical.LocatedNear_Place,rdf:type,owl:ObjectProperty,HC0005QF4
-aida-property:OrganizationAffiliation.Founder.Founder_Founder,rdf:type,owl:ObjectProperty,
-aida-item:WEA.Gas.PoisonGas,rdfs:subClassOf,aida-item:WEA.Gas,
-aida-item:WEA.Gas.PoisonGas,rdf:type,owl:Class,
-aida-item:MON.Cash.Cash,rdfs:subClassOf,aida-item:MON,
-aida-item:MON.Cash.Cash,rdf:type,owl:Class,
-aida-item:Contact.Collaborate.Meet,rdf:type,owl:Class,
-aida-item:Contact.Collaborate.Meet,rdfs:subClassOf,aida-item:Contact.Collaborate,
-aida-item:VEH.Aircraft.Helicopter,rdf:type,owl:Class,
-aida-item:VEH.Aircraft.Helicopter,rdfs:subClassOf,aida-item:VEH.Aircraft,
-aida-item:FAC.Way,rdf:type,owl:Class,
-aida-item:FAC.Way,rdfs:subClassOf,aida-item:FAC,
-aida-item:COM.Wreckage.Wreckage,rdfs:subClassOf,aida-item:COM,
-aida-item:COM.Wreckage.Wreckage,rdf:type,owl:Class,
-aida-item:GeneralAffiliation.MemberOriginReligionEthnicity,rdfs:subClassOf,aida-item:Relation,
-aida-item:GeneralAffiliation.MemberOriginReligionEthnicity,rdf:type,owl:Class,
-aida-item:ORG.Government.LawEnforcementAgency,rdfs:subClassOf,aida-item:ORG.Government,
-aida-item:ORG.Government.LawEnforcementAgency,rdf:type,owl:Class,
-aida-item:OrganizationAffiliation.Leadership.Government,rdf:type,owl:Class,
-aida-item:OrganizationAffiliation.Leadership.Government,rdfs:subClassOf,aida-item:OrganizationAffiliation.Leadership,
-aida-item:COM.Document.PersonalIdentification,rdfs:subClassOf,aida-item:COM.Document,
-aida-item:COM.Document.PersonalIdentification,rdf:type,owl:Class,
-aida-item:Transaction.Transaction.EmbargoSanction,rdf:type,owl:Class,
-aida-item:Transaction.Transaction.EmbargoSanction,rdfs:subClassOf,aida-item:Transaction.Transaction,
-aida-item:Contact.RequestAdvise.Broadcast,rdf:type,owl:Class,
-aida-item:Contact.RequestAdvise.Broadcast,rdfs:subClassOf,aida-item:Contact.RequestAdvise,
-aida-item:ORG.Government.Railway,rdfs:subClassOf,aida-item:ORG.Government,
-aida-item:ORG.Government.Railway,rdf:type,owl:Class,
-aida-item:COM.Document.Map,rdfs:subClassOf,aida-item:COM.Document,
-aida-item:COM.Document.Map,rdf:type,owl:Class,
-aida-item:WEA,rdfs:subClassOf,aida-item:Entity,
-aida-item:WEA,rdf:type,owl:Class,
-aida-item:PersonalSocial.Unspecified.Political,rdfs:subClassOf,aida-item:PersonalSocial.Unspecified,
-aida-item:PersonalSocial.Unspecified.Political,rdf:type,owl:Class,
-aida-item:Movement.TransportPerson.GrantEntryAsylum,rdf:type,owl:Class,
-aida-item:Movement.TransportPerson.GrantEntryAsylum,rdfs:subClassOf,aida-item:Movement.TransportPerson,
-aida-entity:acb74d8b-5845-4f07-9207-190313865729,rdf:type,aida-item:PER,HC0005QF4
-aida-entity:acb74d8b-5845-4f07-9207-190313865729,aida-property:OrganizationAffiliation.EmploymentMembership.Employment_Employee,aida-entity:fae5a094-6aea-44e3-bfba-2f50fe87cb5b,HC0005QF4
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity_Ethnicity,rdf:type,owl:ObjectProperty,
-aida-property:GeneralAffiliation.Sponsorship_ActorOrEvent,rdf:type,owl:ObjectProperty,
-aida-item:ORG.Government.FireDepartment,rdfs:subClassOf,aida-item:ORG.Government,
-aida-item:ORG.Government.FireDepartment,rdf:type,owl:Class,
-aida-property:Evaluate.Deliberateness.Deliberate_Event,rdf:type,owl:ObjectProperty,
-aida-item:ORG.International.Commission,rdfs:subClassOf,aida-item:ORG.International,
-aida-item:ORG.International.Commission,rdf:type,owl:Class,
-aida-item:Government.Vote,rdfs:subClassOf,aida-item:Event,
-aida-item:Government.Vote,rdf:type,owl:Class,
-aida-entity:b993cba5-67e3-4e8e-b843-0f8103af98a0,aida-property:OrganizationAffiliation.Leadership_Leader,aida-entity:42fe32a7-57a6-499e-9043-d45cf4522f76,HC0005QF4
-aida-entity:b993cba5-67e3-4e8e-b843-0f8103af98a0,rdf:type,aida-item:TTL,HC0005QF4
-aida-entity:b993cba5-67e3-4e8e-b843-0f8103af98a0,aida-property:Physical.LocatedNear_EntityOrFiller,aida-entity:d48106d9-91c7-4d10-a4dc-7d82d735a925,HC0005QF4
-aida-item:LOC.Land,rdfs:subClassOf,aida-item:LOC,
-aida-item:LOC.Land,rdf:type,owl:Class,
-aida-item:PER.Police,rdfs:subClassOf,aida-item:PER,
-aida-item:PER.Police,rdf:type,owl:Class,
-aida-item:Transaction.TransferMoney.PayForService,rdf:type,owl:Class,
-aida-item:Transaction.TransferMoney.PayForService,rdfs:subClassOf,aida-item:Transaction.TransferMoney,
-aida-property:Measurement.Size.Percentage_Measurement,rdf:type,owl:ObjectProperty,
-aida-item:Contact.ThreatenCoerce.Meet,rdfs:subClassOf,aida-item:Contact.ThreatenCoerce,
-aida-item:Contact.ThreatenCoerce.Meet,rdf:type,owl:Class,
-aida-item:VEH.Watercraft.Yacht,rdfs:subClassOf,aida-item:VEH.Watercraft,
-aida-item:VEH.Watercraft.Yacht,rdf:type,owl:Class,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession_Owner,rdf:type,owl:ObjectProperty,
-aida-property:start,aida-property-type:dataType,aida-property-type:DateTime,
-aida-property:start,rdf:type,owl:DatatypeProperty,
-aida-item:LOC.Land.Continent,rdfs:subClassOf,aida-item:LOC.Land,
-aida-item:LOC.Land.Continent,rdf:type,owl:Class,
-aida-item:ORG.CommercialOrganization.BroadcastingCompany,rdf:type,owl:Class,
-aida-item:ORG.CommercialOrganization.BroadcastingCompany,rdfs:subClassOf,aida-item:ORG.CommercialOrganization,
-aida-item:PER.MilitaryPersonnel.MilitaryOfficer,rdfs:subClassOf,aida-item:Entity,
-aida-item:PER.MilitaryPersonnel.MilitaryOfficer,rdf:type,owl:Class,
-aida-item:Measurement.Size.Count,rdfs:subClassOf,aida-item:Measurement.Size,
-aida-item:Measurement.Size.Count,rdf:type,owl:Class,
-aida-entity:a27c1037-3b21-4372-9c53-2a81065b3b16,aida-property:Measurement.Size.Count_Measurement,aida-entity:90846e8e-d544-4c5c-8405-eb0e62e73627,HC0005QF4
-aida-entity:a27c1037-3b21-4372-9c53-2a81065b3b16,rdf:type,aida-item:VAL.Number.Number,HC0005QF4
-aida-item:ORG.International,rdfs:subClassOf,aida-item:ORG,
-aida-item:ORG.International,rdf:type,owl:Class,
-aida-item:Conflict.Attack.Stabbing,rdfs:subClassOf,aida-item:Conflict.Attack,
-aida-item:Conflict.Attack.Stabbing,rdf:type,owl:Class,
-aida-item:ORG.PoliticalOrganization.Party,rdfs:subClassOf,aida-item:ORG.PoliticalOrganization,
-aida-item:ORG.PoliticalOrganization.Party,rdf:type,owl:Class,
-aida-item:Conflict.Attack.Invade,rdfs:subClassOf,aida-item:Conflict.Attack,
-aida-item:Conflict.Attack.Invade,rdf:type,owl:Class,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory_Territory,rdf:type,owl:ObjectProperty,
-aida-item:FAC.Building,rdfs:subClassOf,aida-item:FAC,
-aida-item:FAC.Building,rdf:type,owl:Class,
-aida-item:WEA.MissileSystem.SurfaceToAirMissile,rdf:type,owl:Class,
-aida-item:WEA.MissileSystem.SurfaceToAirMissile,rdfs:subClassOf,aida-item:WEA.MissileSystem,
-aida-property:OrganizationAffiliation.Leadership.MilitaryPolice_Leader,rdf:type,owl:ObjectProperty,
-aida-item:ORG.CommercialOrganization.NewsAgency,rdf:type,owl:Class,
-aida-item:ORG.CommercialOrganization.NewsAgency,rdfs:subClassOf,aida-item:ORG.CommercialOrganization,
-aida-item:PER.Fan,rdf:type,owl:Class,
-aida-item:PER.Fan,rdfs:subClassOf,aida-item:PER,
-aida-property:PersonalSocial.Role_Person,rdf:type,owl:ObjectProperty,
-aida-item:WEA.Club.Bat,rdf:type,owl:Class,
-aida-item:WEA.Club.Bat,rdfs:subClassOf,aida-item:WEA.Club,
-aida-item:WEA.Gas,rdf:type,owl:Class,
-aida-item:WEA.Gas,rdfs:subClassOf,aida-item:WEA,
-aida-item:ORG.MilitaryOrganization.NonGovernmentMilitia,rdfs:subClassOf,aida-item:ORG.MilitaryOrganization,
-aida-item:ORG.MilitaryOrganization.NonGovernmentMilitia,rdf:type,owl:Class,
-aida-item:Movement.TransportArtifact.PreventExit,rdf:type,owl:Class,
-aida-item:Movement.TransportArtifact.PreventExit,rdfs:subClassOf,aida-item:Movement.TransportArtifact,
-aida-item:Movement.TransportPerson.SmuggleExtract,rdfs:subClassOf,aida-item:Movement.TransportPerson,
-aida-item:Movement.TransportPerson.SmuggleExtract,rdf:type,owl:Class,
-aida-property:Physical.OrganizationHeadquarters.OrganizationHeadquarters_Place,rdf:type,owl:ObjectProperty,
-aida-item:GeneralAffiliation.Sponsorship.Affiliated,rdfs:subClassOf,aida-item:GeneralAffiliation.Sponsorship,
-aida-item:GeneralAffiliation.Sponsorship.Affiliated,rdf:type,owl:Class,
-aida-item:OrganizationAffiliation.EmploymentMembership.Membership,rdfs:subClassOf,aida-item:OrganizationAffiliation.EmploymentMembership,
-aida-item:OrganizationAffiliation.EmploymentMembership.Membership,rdf:type,owl:Class,
-aida-item:Life.Die.DeathCausedByViolentEvents,rdfs:subClassOf,aida-item:Life.Die,
-aida-item:Life.Die.DeathCausedByViolentEvents,rdf:type,owl:Class,
-aida-item:VEH.WheeledVehicle.Truck,rdf:type,owl:Class,
-aida-item:VEH.WheeledVehicle.Truck,rdfs:subClassOf,aida-item:VEH.WheeledVehicle,
-aida-item:Government.Agreements,rdf:type,owl:Class,
-aida-item:Government.Agreements,rdfs:subClassOf,aida-item:Event,
-aida-item:BAL.BallotSlate.BallotSlate,rdfs:subClassOf,aida-item:BAL,
-aida-item:BAL.BallotSlate.BallotSlate,rdf:type,owl:Class,
-aida-item:PER.Politician.Governor,rdf:type,owl:Class,
-aida-item:PER.Politician.Governor,rdfs:subClassOf,aida-item:PER.Politician,
-aida-item:Conflict.Yield.Retreat,rdfs:subClassOf,aida-item:Conflict.Yield,
-aida-item:Conflict.Yield.Retreat,rdf:type,owl:Class,
-aida-item:Personnel.EndPosition.FiringLayoff,rdfs:subClassOf,aida-item:Personnel.EndPosition,
-aida-item:Personnel.EndPosition.FiringLayoff,rdf:type,owl:Class,
-aida-entity:e3d187c7-5e2b-48f3-a05e-dc9724cb9596,rdf:type,aida-item:PER,HC0005QF4
-aida-entity:e3d187c7-5e2b-48f3-a05e-dc9724cb9596,aida-property:Physical.LocatedNear_EntityOrFiller,aida-entity:5aa209ed-0b29-42c9-99f6-bb7f451b418e,HC0005QF4
-aida-item:FAC.GeographicalArea,rdfs:subClassOf,aida-item:Entity,
-aida-item:FAC.GeographicalArea,rdf:type,owl:Class,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation_Artifact,rdf:type,owl:ObjectProperty,
-aida-property:Physical.OrganizationHeadquarters.OrganizationHeadquarters_Organization,rdf:type,owl:ObjectProperty,
-aida-entity:5aa209ed-0b29-42c9-99f6-bb7f451b418e,aida-property:Physical.LocatedNear_Place,aida-entity:e3d187c7-5e2b-48f3-a05e-dc9724cb9596,HC0005QF4
-aida-entity:5aa209ed-0b29-42c9-99f6-bb7f451b418e,rdf:type,aida-item:GPE.Country.Country,HC0005QF4
-aida-item:VAL.Number.Number,rdf:type,owl:Class,
-aida-item:VAL.Number.Number,rdfs:subClassOf,aida-item:Entity,
-aida-property:GeneralAffiliation.Sponsorship.AdvisePlanOrganize_Sponsor,rdf:type,owl:ObjectProperty,
-aida-item:FAC.Structure.Bridge,rdfs:subClassOf,aida-item:FAC.Structure,
-aida-item:FAC.Structure.Bridge,rdf:type,owl:Class,
-aida-item:COM.Equipment.MilitaryEquipment,rdfs:subClassOf,aida-item:COM.Equipment,
-aida-item:COM.Equipment.MilitaryEquipment,rdf:type,owl:Class,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen_Nationality,rdf:type,owl:ObjectProperty,
-aida-item:MON,rdf:type,owl:Class,
-aida-item:MON,rdfs:subClassOf,aida-item:Entity,
-aida-item:Movement.TransportPerson.SelfMotion,rdf:type,owl:Class,
-aida-item:Movement.TransportPerson.SelfMotion,rdfs:subClassOf,aida-item:Movement.TransportPerson,
-aida-item:Justice.JudicialConsequences.Convict,rdf:type,owl:Class,
-aida-item:Justice.JudicialConsequences.Convict,rdfs:subClassOf,aida-item:Justice.JudicialConsequences,
-aida-item:Government.Agreements.AcceptAgreementContractCeasefire,rdfs:subClassOf,aida-item:Government.Agreements,
-aida-item:Government.Agreements.AcceptAgreementContractCeasefire,rdf:type,owl:Class,
-aida-item:Movement.TransportPerson.BringCarryUnload,rdfs:subClassOf,aida-item:Movement.TransportPerson,
-aida-item:Movement.TransportPerson.BringCarryUnload,rdf:type,owl:Class,
-aida-item:WEA.Bullets.LiveRounds,rdfs:subClassOf,aida-item:WEA.Bullets,
-aida-item:WEA.Bullets.LiveRounds,rdf:type,owl:Class,
-aida-item:Life.Die,rdf:type,owl:Class,
-aida-item:Life.Die,rdfs:subClassOf,aida-item:Event,
-aida-item:Measurement.Size.Percentage,rdfs:subClassOf,aida-item:Measurement.Size,
-aida-item:Measurement.Size.Percentage,rdf:type,owl:Class,
-aida-item:ORG.Government.ProsecutorOffice,rdf:type,owl:Class,
-aida-item:ORG.Government.ProsecutorOffice,rdfs:subClassOf,aida-item:ORG.Government,
-aida-item:Government.Formation.MergeGPE,rdfs:subClassOf,aida-item:Government.Formation,
-aida-item:Government.Formation.MergeGPE,rdf:type,owl:Class,
-aida-entity:d93aa0ec-a038-47c6-9983-334903ee39a8,aida-property:OrganizationAffiliation.EmploymentMembership.Employment_Employee,aida-entity:9789ff8a-7d0a-45cc-9b2a-9a7465182d09,HC0005QF4
-aida-entity:d93aa0ec-a038-47c6-9983-334903ee39a8,rdf:type,aida-item:PER.Combatant,HC0005QF4
-aida-property:PartWhole.Subsidiary_Parent,rdf:type,owl:ObjectProperty,
-aida-item:CRM.PoliticalCrime,rdf:type,owl:Class,
-aida-item:CRM.PoliticalCrime,rdfs:subClassOf,aida-item:CRM,
-aida-entity:fae5a094-6aea-44e3-bfba-2f50fe87cb5b,aida-property:OrganizationAffiliation.EmploymentMembership.Employment_PlaceOfEmployment,aida-entity:110be185-7b8c-4a12-a0d7-fcfe7687a7f0,HC0005QF4
-aida-entity:fae5a094-6aea-44e3-bfba-2f50fe87cb5b,rdf:type,aida-item:ORG.MilitaryOrganization,HC0005QF4
-aida-item:Life.Injure.InjuryCausedByViolentEvents,rdf:type,owl:Class,
-aida-item:Life.Injure.InjuryCausedByViolentEvents,rdfs:subClassOf,aida-item:Life.Injure,
-aida-item:VEH.MilitaryVehicle.MilitaryBoat,rdf:type,owl:Class,
-aida-item:VEH.MilitaryVehicle.MilitaryBoat,rdfs:subClassOf,aida-item:VEH.MilitaryVehicle,
-aida-item:Movement.TransportArtifact.ReceiveImport,rdfs:subClassOf,aida-item:Movement.TransportArtifact,
-aida-item:Movement.TransportArtifact.ReceiveImport,rdf:type,owl:Class,
-aida-item:Inspection.SensoryObserve.PhysicalInvestigateInspect,rdfs:subClassOf,aida-item:Inspection.SensoryObserve,
-aida-item:Inspection.SensoryObserve.PhysicalInvestigateInspect,rdf:type,owl:Class,
-aida-property:OrganizationAffiliation.EmploymentMembership.Membership_Member,rdf:type,owl:ObjectProperty,
-aida-entity:312b6440-a00e-4e4f-b0a0-99a7f0084fdf,rdf:type,aida-item:LOC.Position.Region,HC0005QF4
-aida-entity:312b6440-a00e-4e4f-b0a0-99a7f0084fdf,aida-property:Physical.LocatedNear_Place,aida-entity:01f0cc75-2ed2-4968-add0-715d91f1b7ec,HC0005QF4
-aida-item:Conflict.Attack.FirearmAttack,rdfs:subClassOf,aida-item:Conflict.Attack,
-aida-item:Conflict.Attack.FirearmAttack,rdf:type,owl:Class,
-aida-item:WEA.Bullets.RubberBullets,rdf:type,owl:Class,
-aida-item:WEA.Bullets.RubberBullets,rdfs:subClassOf,aida-item:WEA.Bullets,
-aida-item:CRM.PoliticalCrime.PoliceMisconduct,rdf:type,owl:Class,
-aida-item:CRM.PoliticalCrime.PoliceMisconduct,rdfs:subClassOf,aida-item:CRM.PoliticalCrime,
-aida-item:WEA.ThrownProjectile.Rock,rdf:type,owl:Class,
-aida-item:WEA.ThrownProjectile.Rock,rdfs:subClassOf,aida-item:WEA.ThrownProjectile,
-aida-item:Justice.InitiateJudicialProcess.ChargeIndict,rdf:type,owl:Class,
-aida-item:Justice.InitiateJudicialProcess.ChargeIndict,rdfs:subClassOf,aida-item:Justice.InitiateJudicialProcess,
-aida-item:Contact.RequestAdvise.Meet,rdfs:subClassOf,aida-item:Contact.RequestAdvise,
-aida-item:Contact.RequestAdvise.Meet,rdf:type,owl:Class,
-aida-item:ArtifactExistence.DamageDestroy.Damage,rdfs:subClassOf,aida-item:ArtifactExistence.DamageDestroy,
-aida-item:ArtifactExistence.DamageDestroy.Damage,rdf:type,owl:Class,
-aida-item:LOC.Position.AirSpace,rdfs:subClassOf,aida-item:LOC.Position,
-aida-item:LOC.Position.AirSpace,rdf:type,owl:Class,
-aida-property:linkTarget,rdf:type,owl:DatatypeProperty,
-aida-property:linkTarget,aida-property-type:dataType,aida-property-type:URL,
-aida-item:Movement.TransportArtifact,rdf:type,owl:Class,
-aida-item:Movement.TransportArtifact,rdfs:subClassOf,aida-item:Event,
-aida-item:Contact.Discussion.Correspondence,rdfs:subClassOf,aida-item:Contact.Discussion,
-aida-item:Contact.Discussion.Correspondence,rdf:type,owl:Class,
-aida-item:Conflict.Demonstrate,rdf:type,owl:Class,
-aida-item:Conflict.Demonstrate,rdfs:subClassOf,aida-item:Event,
-aida-event:c8d650fb-506c-4854-a28e-e058c3f2d168,aida-property:Contact.CommitmentPromiseExpressIntent.Meet_Communicator,aida-entity:cab521fd-369e-40e9-ba8d-a97cc7c723b9,HC0005QF4
-aida-event:c8d650fb-506c-4854-a28e-e058c3f2d168,aida-property:Contact.CommitmentPromiseExpressIntent.Meet_Place,aida-entity:1adf20cf-9ade-467e-82a3-f4583a7b5c19,HC0005QF4
-aida-event:c8d650fb-506c-4854-a28e-e058c3f2d168,rdf:type,aida-item:Contact.CommitmentPromiseExpressIntent.Meet,HC0005QF4
-aida-item:Physical.LocatedNear.Surround,rdf:type,owl:Class,
-aida-item:Physical.LocatedNear.Surround,rdfs:subClassOf,aida-item:Physical.LocatedNear,
-aida-item:PER.MilitaryPersonnel,rdf:type,owl:Class,
-aida-item:PER.MilitaryPersonnel,rdfs:subClassOf,aida-item:PER,
-aida-item:Conflict.Attack.SetFire,rdfs:subClassOf,aida-item:Conflict.Attack,
-aida-item:Conflict.Attack.SetFire,rdf:type,owl:Class,
-aida-item:Transaction.Transaction.GiftGrantProvideAid,rdfs:subClassOf,aida-item:Transaction.Transaction,
-aida-item:Transaction.Transaction.GiftGrantProvideAid,rdf:type,owl:Class,
-aida-item:Contact.CommandOrder.Correspondence,rdfs:subClassOf,aida-item:Contact.CommandOrder,
-aida-item:Contact.CommandOrder.Correspondence,rdf:type,owl:Class,
-aida-item:FAC.Way.Highway,rdfs:subClassOf,aida-item:FAC.Way,
-aida-item:FAC.Way.Highway,rdf:type,owl:Class,
-aida-item:LOC.Position.Region,rdfs:subClassOf,aida-item:LOC.Position,
-aida-item:LOC.Position.Region,rdf:type,owl:Class,
-aida-item:Government.Formation,rdf:type,owl:Class,
-aida-item:Government.Formation,rdfs:subClassOf,aida-item:Event,
-aida-property:Evaluate.Legitimacy.Legitimate_Holder,rdf:type,owl:ObjectProperty,
-aida-item:Manufacture.Artifact.CreateManufacture,rdf:type,owl:Class,
-aida-item:Manufacture.Artifact.CreateManufacture,rdfs:subClassOf,aida-item:Manufacture.Artifact,
-aida-item:FAC.GeographicalArea.Checkpoint,rdfs:subClassOf,aida-item:FAC.GeographicalArea,
-aida-item:FAC.GeographicalArea.Checkpoint,rdf:type,owl:Class,
-aida-item:Entity,rdf:type,owl:Class,
-aida-item:Conflict.Attack.AirstrikeMissileStrike,rdfs:subClassOf,aida-item:Conflict.Attack,
-aida-item:Conflict.Attack.AirstrikeMissileStrike,rdf:type,owl:Class,
-aida-item:Contact.Negotiate,rdfs:subClassOf,aida-item:Event,
-aida-item:Contact.Negotiate,rdf:type,owl:Class,
-aida-item:Manufacture.Artifact.CreateIntellectualProperty,rdf:type,owl:Class,
-aida-item:Manufacture.Artifact.CreateIntellectualProperty,rdfs:subClassOf,aida-item:Manufacture.Artifact,
-aida-property:GeneralAffiliation.Sponsorship.HelpSupport_ActorOrEvent,rdf:type,owl:ObjectProperty,
-aida-item:LOC.Position,rdfs:subClassOf,aida-item:LOC,
-aida-item:LOC.Position,rdf:type,owl:Class,
-aida-item:BAL,rdfs:subClassOf,aida-item:Entity,
-aida-item:BAL,rdf:type,owl:Class,
-aida-item:Transaction.TransferOwnership.GiftGrantProvideAid,rdfs:subClassOf,aida-item:Transaction.TransferOwnership,
-aida-item:Transaction.TransferOwnership.GiftGrantProvideAid,rdf:type,owl:Class,
-aida-item:VEH.Watercraft,rdf:type,owl:Class,
-aida-item:VEH.Watercraft,rdfs:subClassOf,aida-item:VEH,
-aida-item:FAC.Building.House,rdfs:subClassOf,aida-item:FAC.Building,
-aida-item:FAC.Building.House,rdf:type,owl:Class,
-aida-property:OrganizationAffiliation.EmploymentMembership.Membership_PlaceOfMembership,rdf:type,owl:ObjectProperty,
-aida-item:Transaction.TransferMoney.Purchase,rdfs:subClassOf,aida-item:Transaction.TransferMoney,
-aida-item:Transaction.TransferMoney.Purchase,rdf:type,owl:Class,
-aida-property:PartWhole.Subsidiary_Subsidiary,rdf:type,owl:ObjectProperty,
-aida-item:PER.ProfessionalPosition.Paramedic,rdf:type,owl:Class,
-aida-item:PER.ProfessionalPosition.Paramedic,rdfs:subClassOf,aida-item:PER.ProfessionalPosition,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation_EntityOrFiller,rdf:type,owl:ObjectProperty,
-aida-item:WEA.Club,rdfs:subClassOf,aida-item:WEA,
-aida-item:WEA.Club,rdf:type,owl:Class,
-aida-property:ResponsibilityBlame.AssignBlame.AssignBlame_Event,rdf:type,owl:ObjectProperty,
-aida-property:PersonalSocial.Role.ProfessionalRole_JobRole,rdf:type,owl:ObjectProperty,
-aida-item:VEH.Aircraft.CargoAircraft,rdf:type,owl:Class,
-aida-item:VEH.Aircraft.CargoAircraft,rdfs:subClassOf,aida-item:VEH.Aircraft,
-aida-item:WEA.DaggerKnifeSword.Hatchet,rdf:type,owl:Class,
-aida-item:WEA.DaggerKnifeSword.Hatchet,rdfs:subClassOf,aida-item:WEA.DaggerKnifeSword,
-aida-item:Conflict.Attack,rdf:type,owl:Class,
-aida-item:Conflict.Attack,rdfs:subClassOf,aida-item:Event,
-aida-item:FAC.Structure.Plaza,rdf:type,owl:Class,
-aida-item:FAC.Structure.Plaza,rdfs:subClassOf,aida-item:FAC.Structure,
-aida-property:Evaluate.Deliberateness.Deliberate_Holder,rdf:type,owl:ObjectProperty,
-aida-property:OrganizationAffiliation.EmploymentMembership.Employment_PlaceOfEmployment,rdf:type,owl:ObjectProperty,HC0005QF4
-aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation_EntityOrFiller,rdf:type,owl:ObjectProperty,
-aida-item:Contact.CommandOrder.Meet,rdf:type,owl:Class,
-aida-item:Contact.CommandOrder.Meet,rdfs:subClassOf,aida-item:Contact.CommandOrder,
-aida-item:GPE.Country.Country,rdfs:subClassOf,aida-item:Entity,
-aida-item:GPE.Country.Country,rdf:type,owl:Class,
-aida-property:PartWhole.Subsidiary.NationalityCitizen_Parent,rdf:type,owl:ObjectProperty,
-aida-item:CRM.FinancialCrime,rdf:type,owl:Class,
-aida-item:CRM.FinancialCrime,rdfs:subClassOf,aida-item:CRM,
-aida-item:PER.ProfessionalPosition.Minister,rdfs:subClassOf,aida-item:PER.ProfessionalPosition,
-aida-item:PER.ProfessionalPosition.Minister,rdf:type,owl:Class,
-aida-item:Movement.TransportPerson.Hide,rdfs:subClassOf,aida-item:Movement.TransportPerson,
-aida-item:Movement.TransportPerson.Hide,rdf:type,owl:Class,
-aida-item:FAC.Structure.Tower,rdf:type,owl:Class,
-aida-item:FAC.Structure.Tower,rdfs:subClassOf,aida-item:FAC.Structure,
-aida-item:CRM.BehaviorCrime.OrganizedCrime,rdfs:subClassOf,aida-item:CRM.BehaviorCrime,
-aida-item:CRM.BehaviorCrime.OrganizedCrime,rdf:type,owl:Class,
-aida-item:WEA.DaggerKnifeSword,rdf:type,owl:Class,
-aida-item:WEA.DaggerKnifeSword,rdfs:subClassOf,aida-item:WEA,
-aida-item:Manufacture.Artifact.Build,rdfs:subClassOf,aida-item:Manufacture.Artifact,
-aida-item:Manufacture.Artifact.Build,rdf:type,owl:Class,
-aida-item:Transaction.TransferMoney.EmbargoSanction,rdfs:subClassOf,aida-item:Transaction.TransferMoney,
-aida-item:Transaction.TransferMoney.EmbargoSanction,rdf:type,owl:Class,
-aida-property:Physical.LocatedNear.Surround_EntityOrFiller,rdf:type,owl:ObjectProperty,
-aida-property:Contact.CommitmentPromiseExpressIntent.Meet_Communicator,rdf:type,owl:ObjectProperty,
-aida-item:Movement.TransportPerson.PreventEntry,rdf:type,owl:Class,
-aida-item:Movement.TransportPerson.PreventEntry,rdfs:subClassOf,aida-item:Movement.TransportPerson,
-aida-item:PersonalSocial.Role.ProfessionalRole,rdfs:subClassOf,aida-item:PersonalSocial.Role,
-aida-item:PersonalSocial.Role.ProfessionalRole,rdf:type,owl:Class,
-aida-item:Contact.Collaborate,rdfs:subClassOf,aida-item:Event,
-aida-item:Contact.Collaborate,rdf:type,owl:Class,
-aida-item:FAC.Structure.Barricade,rdfs:subClassOf,aida-item:FAC.Structure,
-aida-item:FAC.Structure.Barricade,rdf:type,owl:Class,
-aida-item:PER.ProfessionalPosition.Firefighter,rdfs:subClassOf,aida-item:PER.ProfessionalPosition,
-aida-item:PER.ProfessionalPosition.Firefighter,rdf:type,owl:Class,
-aida-property:Measurement.Size.Weight_Measurement,rdf:type,owl:ObjectProperty,
-aida-item:VEH.WheeledVehicle.FireApparatus,rdf:type,owl:Class,
-aida-item:VEH.WheeledVehicle.FireApparatus,rdfs:subClassOf,aida-item:VEH.WheeledVehicle,
-aida-item:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation,rdfs:subClassOf,aida-item:Relation,
-aida-item:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation,rdf:type,owl:Class,
-aida-item:CRM.FinancialCrime.Forgery,rdfs:subClassOf,aida-item:CRM.FinancialCrime,
-aida-item:CRM.FinancialCrime.Forgery,rdf:type,owl:Class,
-aida-item:WEA.Gas.TearGas,rdfs:subClassOf,aida-item:WEA.Gas,
-aida-item:WEA.Gas.TearGas,rdf:type,owl:Class,
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen_Nationality,rdf:type,owl:ObjectProperty,
-aida-property:OrganizationAffiliation.EmploymentMembership_PlaceOfEmploymentMembership,rdf:type,owl:ObjectProperty,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory_Controller,rdf:type,owl:ObjectProperty,
-aida-item:GPE.OrganizationOfCountries.OrganizationOfCountries,rdfs:subClassOf,aida-item:GPE,
-aida-item:GPE.OrganizationOfCountries.OrganizationOfCountries,rdf:type,owl:Class,
-aida-item:Justice.InitiateJudicialProcess,rdfs:subClassOf,aida-item:Event,
-aida-item:Justice.InitiateJudicialProcess,rdf:type,owl:Class,
-aida-property:OrganizationAffiliation.Leadership.HeadOfState_Country,rdf:type,owl:ObjectProperty,
-aida-item:ORG.Association.Team,rdfs:subClassOf,aida-item:ORG.Association,
-aida-item:ORG.Association.Team,rdf:type,owl:Class,
-aida-item:LOC.Position.Neighborhood,rdfs:subClassOf,aida-item:LOC.Position,
-aida-item:LOC.Position.Neighborhood,rdf:type,owl:Class,
-aida-item:Personnel.StartPosition.Hiring,rdf:type,owl:Class,
-aida-item:Personnel.StartPosition.Hiring,rdfs:subClassOf,aida-item:Personnel.StartPosition,
-aida-item:WEA.Bomb.Grenade,rdf:type,owl:Class,
-aida-item:WEA.Bomb.Grenade,rdfs:subClassOf,aida-item:WEA.Bomb,
-aida-item:COM,rdf:type,owl:Class,
-aida-item:COM,rdfs:subClassOf,aida-item:Entity,
-aida-item:VEH.WheeledVehicle.Train,rdfs:subClassOf,aida-item:VEH.WheeledVehicle,
-aida-item:VEH.WheeledVehicle.Train,rdf:type,owl:Class,
-aida-item:MON.Assets.Assets,rdf:type,owl:Class,
-aida-item:MON.Assets.Assets,rdfs:subClassOf,aida-item:MON,
-aida-item:Inspection.SensoryObserve,rdfs:subClassOf,aida-item:Event,
-aida-item:Inspection.SensoryObserve,rdf:type,owl:Class,
-aida-property:OrganizationAffiliation.Leadership.HeadOfState_Leader,rdf:type,owl:ObjectProperty,
-aida-item:OrganizationAffiliation.Leadership,rdf:type,owl:Class,
-aida-item:OrganizationAffiliation.Leadership,rdfs:subClassOf,aida-item:Relation,
-aida-item:Justice.InitiateJudicialProcess.TrialHearing,rdfs:subClassOf,aida-item:Justice.InitiateJudicialProcess,
-aida-item:Justice.InitiateJudicialProcess.TrialHearing,rdf:type,owl:Class,
-aida-item:FAC.Building.ApartmentBuilding,rdf:type,owl:Class,
-aida-item:FAC.Building.ApartmentBuilding,rdfs:subClassOf,aida-item:FAC.Building,
-aida-item:Contact.Prevarication,rdf:type,owl:Class,
-aida-item:Contact.Prevarication,rdfs:subClassOf,aida-item:Event,
-aida-item:Transaction.TransferOwnership.Purchase,rdf:type,owl:Class,
-aida-item:Transaction.TransferOwnership.Purchase,rdfs:subClassOf,aida-item:Transaction.TransferOwnership,
-aida-item:LOC.GeographicPoint,rdf:type,owl:Class,
-aida-item:LOC.GeographicPoint,rdfs:subClassOf,aida-item:LOC,
-aida-item:GeneralAffiliation.Sponsorship.HelpSupport,rdf:type,owl:Class,
-aida-item:GeneralAffiliation.Sponsorship.HelpSupport,rdfs:subClassOf,aida-item:GeneralAffiliation.Sponsorship,
-aida-property:Contact.CommitmentPromiseExpressIntent.Meet_Place,rdf:type,owl:ObjectProperty,
-aida-item:Life.Injure.IllnessDegradationHungerThirst,rdfs:subClassOf,aida-item:Life.Injure,
-aida-item:Life.Injure.IllnessDegradationHungerThirst,rdf:type,owl:Class,
-aida-item:Contact.CommitmentPromiseExpressIntent.Broadcast,rdf:type,owl:Class,
-aida-item:Contact.CommitmentPromiseExpressIntent.Broadcast,rdfs:subClassOf,aida-item:Contact.CommitmentPromiseExpressIntent,
-aida-property:Physical.Resident.Resident_Place,rdf:type,owl:ObjectProperty,
-aida-item:COM.Equipment.HumanitarianAid,rdfs:subClassOf,aida-item:COM.Equipment,
-aida-item:COM.Equipment.HumanitarianAid,rdf:type,owl:Class,
-aida-item:CRM,rdf:type,owl:Class,
-aida-item:CRM,rdfs:subClassOf,aida-item:Entity,
-aida-item:Movement.TransportPerson.Fall,rdfs:subClassOf,aida-item:Movement.TransportPerson,
-aida-item:Movement.TransportPerson.Fall,rdf:type,owl:Class,
-aida-item:WEA.MissileSystem,rdf:type,owl:Class,
-aida-item:WEA.MissileSystem,rdfs:subClassOf,aida-item:WEA,
-aida-item:Conflict.Attack.BiologicalChemicalPoisonAttack,rdfs:subClassOf,aida-item:Conflict.Attack,
-aida-item:Conflict.Attack.BiologicalChemicalPoisonAttack,rdf:type,owl:Class,
-aida-item:WEA.MissileSystem.MissileLauncher,rdf:type,owl:Class,
-aida-item:WEA.MissileSystem.MissileLauncher,rdfs:subClassOf,aida-item:WEA.MissileSystem,
-aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation_Organization,rdf:type,owl:ObjectProperty,
-aida-item:Contact.MediaStatement.Broadcast,rdf:type,owl:Class,
-aida-item:Contact.MediaStatement.Broadcast,rdfs:subClassOf,aida-item:Contact.MediaStatement,
-aida-item:PER.Combatant.Mercenary,rdf:type,owl:Class,
-aida-item:PER.Combatant.Mercenary,rdfs:subClassOf,aida-item:PER.Combatant,
-aida-item:Transaction.TransferMoney.GiftGrantProvideAid,rdfs:subClassOf,aida-item:Transaction.TransferMoney,
-aida-item:Transaction.TransferMoney.GiftGrantProvideAid,rdf:type,owl:Class,
-aida-item:CRM.FinancialCrime.Fraud,rdfs:subClassOf,aida-item:CRM.FinancialCrime,
-aida-item:CRM.FinancialCrime.Fraud,rdf:type,owl:Class,
-aida-item:Contact.Negotiate.Correspondence,rdf:type,owl:Class,
-aida-item:Contact.Negotiate.Correspondence,rdfs:subClassOf,aida-item:Contact.Negotiate,
-aida-item:Personnel.StartPosition,rdf:type,owl:Class,
-aida-item:Personnel.StartPosition,rdfs:subClassOf,aida-item:Event,
-aida-item:WEA.Gun,rdfs:subClassOf,aida-item:WEA,
-aida-item:WEA.Gun,rdf:type,owl:Class,
-aida-property:OrganizationAffiliation.Leadership.Government_Leader,rdf:type,owl:ObjectProperty,
-aida-item:FAC.Building.StoreShop,rdfs:subClassOf,aida-item:FAC.Building,
-aida-item:FAC.Building.StoreShop,rdf:type,owl:Class,
-aida-property:PersonalSocial.Role_Role,rdf:type,owl:ObjectProperty,
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation,rdf:type,owl:Class,
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation,rdfs:subClassOf,aida-item:Relation,
-aida-item:Transaction.TransferOwnership.EmbargoSanction,rdfs:subClassOf,aida-item:Transaction.TransferOwnership,
-aida-item:Transaction.TransferOwnership.EmbargoSanction,rdf:type,owl:Class,
-aida-item:ORG.Association,rdfs:subClassOf,aida-item:ORG,
-aida-item:ORG.Association,rdf:type,owl:Class,
-aida-property:Measurement.Size.Weight_EntityOrFiller,rdf:type,owl:ObjectProperty,
-aida-item:FAC.Building.School,rdf:type,owl:Class,
-aida-item:FAC.Building.School,rdfs:subClassOf,aida-item:FAC.Building,
-aida-item:WEA.MissileSystem.AntiAircraftMissle,rdf:type,owl:Class,
-aida-item:WEA.MissileSystem.AntiAircraftMissle,rdfs:subClassOf,aida-item:WEA.MissileSystem,
-aida-item:Movement.TransportArtifact.BringCarryUnload,rdf:type,owl:Class,
-aida-item:Movement.TransportArtifact.BringCarryUnload,rdfs:subClassOf,aida-item:Movement.TransportArtifact,
-aida-item:COM.Equipment.Satellite,rdfs:subClassOf,aida-item:COM.Equipment,
-aida-item:COM.Equipment.Satellite,rdf:type,owl:Class,
-aida-item:Conflict.Attack.SelfDirectedBattle,rdf:type,owl:Class,
-aida-item:Conflict.Attack.SelfDirectedBattle,rdfs:subClassOf,aida-item:Conflict.Attack,
-aida-property:PersonalSocial.Role.TitleFormOfAddress_Person,rdf:type,owl:ObjectProperty,
-aida-property:Measurement.Size_EntityOrFiller,rdf:type,owl:ObjectProperty,
-aida-property:GeneralAffiliation.OrganizationWebsite.OrganizationWebsite_Organization,rdf:type,owl:ObjectProperty,
-aida-item:Contact.Collaborate.Correspondence,rdfs:subClassOf,aida-item:Contact.Collaborate,
-aida-item:Contact.Collaborate.Correspondence,rdf:type,owl:Class,
-aida-item:FAC.Installation,rdf:type,owl:Class,
-aida-item:FAC.Installation,rdfs:subClassOf,aida-item:FAC,
-aida-item:Movement.TransportArtifact.DisperseSeparate,rdf:type,owl:Class,
-aida-item:Movement.TransportArtifact.DisperseSeparate,rdfs:subClassOf,aida-item:Movement.TransportArtifact,
-aida-item:PartWhole.Subsidiary.OrganizationSubsidiary,rdf:type,owl:Class,
-aida-item:PartWhole.Subsidiary.OrganizationSubsidiary,rdfs:subClassOf,aida-item:PartWhole.Subsidiary,
-aida-item:Government.Vote.ViolationsPreventVote,rdfs:subClassOf,aida-item:Government.Vote,
-aida-item:Government.Vote.ViolationsPreventVote,rdf:type,owl:Class,
-aida-item:Justice.JudicialConsequences.Extradite,rdf:type,owl:Class,
-aida-item:Justice.JudicialConsequences.Extradite,rdfs:subClassOf,aida-item:Justice.JudicialConsequences,
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession,rdf:type,owl:Class,
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession,rdfs:subClassOf,aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation,
-aida-item:VEH,rdf:type,owl:Class,
-aida-item:VEH,rdfs:subClassOf,aida-item:Entity,
-aida-item:VEH.WheeledVehicle.Car,rdf:type,owl:Class,
-aida-item:VEH.WheeledVehicle.Car,rdfs:subClassOf,aida-item:VEH.WheeledVehicle,
-aida-item:CRM.ViolentCrime,rdf:type,owl:Class,
-aida-item:CRM.ViolentCrime,rdfs:subClassOf,aida-item:CRM,
-aida-item:LOC,rdfs:subClassOf,aida-item:Entity,
-aida-item:LOC,rdf:type,owl:Class,
-aida-item:WEA.Gun.Artillery,rdf:type,owl:Class,
-aida-item:WEA.Gun.Artillery,rdfs:subClassOf,aida-item:WEA.Gun,
-aida-property:ResponsibilityBlame.ClaimResponsibility.ClaimResponsibility_Event,rdf:type,owl:ObjectProperty,
-aida-property:PartWhole.Subsidiary.OrganizationSubsidiary_Parent,rdf:type,owl:ObjectProperty,
-aida-item:SID.Political.Opposition,rdf:type,owl:Class,
-aida-item:SID.Political.Opposition,rdfs:subClassOf,aida-item:SID,
-aida-item:VEH.Watercraft.Boat,rdfs:subClassOf,aida-item:VEH.Watercraft,
-aida-item:VEH.Watercraft.Boat,rdf:type,owl:Class,
-aida-item:Event,rdf:type,owl:Class,
-aida-item:Conflict.Attack.Bombing,rdf:type,owl:Class,
-aida-item:Conflict.Attack.Bombing,rdfs:subClassOf,aida-item:Conflict.Attack,
-aida-item:VAL,rdfs:subClassOf,aida-item:Entity,
-aida-item:VAL,rdf:type,owl:Class,
-aida-item:Contact.ThreatenCoerce.Broadcast,rdf:type,owl:Class,
-aida-item:Contact.ThreatenCoerce.Broadcast,rdfs:subClassOf,aida-item:Contact.ThreatenCoerce,
-aida-item:Justice.Investigate.InvestigateCrime,rdfs:subClassOf,aida-item:Justice.Investigate,
-aida-item:Justice.Investigate.InvestigateCrime,rdf:type,owl:Class,
-aida-property:PersonalSocial.Unspecified.Political_Person,rdf:type,owl:ObjectProperty,
-aida-item:Contact.CommandOrder.Broadcast,rdf:type,owl:Class,
-aida-item:Contact.CommandOrder.Broadcast,rdfs:subClassOf,aida-item:Contact.CommandOrder,
-aida-item:FAC,rdfs:subClassOf,aida-item:Entity,
-aida-item:FAC,rdf:type,owl:Class,
-aida-item:VEH.MilitaryVehicle,rdf:type,owl:Class,
-aida-item:VEH.MilitaryVehicle,rdfs:subClassOf,aida-item:VEH,
-aida-item:Contact.PublicStatementInPerson,rdfs:subClassOf,aida-item:Event,
-aida-item:Contact.PublicStatementInPerson,rdf:type,owl:Class,
-aida-item:CRM.BehaviorCrime.FailToAssist,rdf:type,owl:Class,
-aida-item:CRM.BehaviorCrime.FailToAssist,rdfs:subClassOf,aida-item:CRM.BehaviorCrime,
-aida-property-type:MonolingualText,rdf:type,aida-property-type:Value,
-aida-item:PersonalSocial.Role.TitleFormOfAddress,rdf:type,owl:Class,
-aida-item:PersonalSocial.Role.TitleFormOfAddress,rdfs:subClassOf,aida-item:PersonalSocial.Role,
-aida-item:PER.Protester,rdf:type,owl:Class,
-aida-item:PER.Protester,rdfs:subClassOf,aida-item:PER,
-aida-item:Contact.Discussion,rdfs:subClassOf,aida-item:Event,
-aida-item:Contact.Discussion,rdf:type,owl:Class,
-aida-item:GPE.UrbanArea,rdf:type,owl:Class,
-aida-item:GPE.UrbanArea,rdfs:subClassOf,aida-item:GPE,
-aida-item:FAC.Building.VotingFacility,rdf:type,owl:Class,
-aida-item:FAC.Building.VotingFacility,rdfs:subClassOf,aida-item:FAC.Building,
-aida-item:PersonalSocial.Role,rdf:type,owl:Class,
-aida-item:PersonalSocial.Role,rdfs:subClassOf,aida-item:Relation,
-aida-item:VEH.WheeledVehicle.Bus,rdfs:subClassOf,aida-item:VEH.WheeledVehicle,
-aida-item:VEH.WheeledVehicle.Bus,rdf:type,owl:Class,
-aida-entity:11d2d098-804e-4379-afa0-7475a6a9986f,rdf:type,aida-item:VEH.Aircraft,HC0005QF4
-aida-entity:11d2d098-804e-4379-afa0-7475a6a9986f,aida-property:Information.Make.Make_EntityOrFiller,aida-entity:9789ff8a-7d0a-45cc-9b2a-9a7465182d09,HC0005QF4
-aida-item:Movement.TransportArtifact.SmuggleExtract,rdf:type,owl:Class,
-aida-item:Movement.TransportArtifact.SmuggleExtract,rdfs:subClassOf,aida-item:Movement.TransportArtifact,
-aida-property:Measurement.Size.HeightLengthWidth_Measurement,rdf:type,owl:ObjectProperty,
-aida-item:PER.ProfessionalPosition.Ambassador,rdfs:subClassOf,aida-item:PER.ProfessionalPosition,
-aida-item:PER.ProfessionalPosition.Ambassador,rdf:type,owl:Class,
-aida-property-type:Value,rdf:type,owl:Class,
-aida-property:Evaluate.Sentiment.Negative_Holder,rdf:type,owl:ObjectProperty,
-aida-item:WEA.Bomb,rdfs:subClassOf,aida-item:WEA,
-aida-item:WEA.Bomb,rdf:type,owl:Class,
-aida-item:ORG.MilitaryOrganization,rdf:type,owl:Class,
-aida-item:ORG.MilitaryOrganization,rdfs:subClassOf,aida-item:Entity,
-aida-item:Measurement.Size.Weight,rdfs:subClassOf,aida-item:Measurement.Size,
-aida-item:Measurement.Size.Weight,rdf:type,owl:Class,
-aida-item:SID.Ideological.Ideological,rdf:type,owl:Class,
-aida-item:SID.Ideological.Ideological,rdfs:subClassOf,aida-item:SID,
-aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen_Nationality,rdf:type,owl:ObjectProperty,
-aida-entity:12c3230f-cc45-4c14-b34a-c8a889177502,aida-property:OrganizationAffiliation.EmploymentMembership.Employment_Employee,aida-entity:4fba8ada-3361-459c-a37f-a0ccc4624ac5,HC0005QF4
-aida-entity:12c3230f-cc45-4c14-b34a-c8a889177502,rdf:type,aida-item:PER,HC0005QF4
-aida-item:Contact.RequestAdvise.Correspondence,rdf:type,owl:Class,
-aida-item:Contact.RequestAdvise.Correspondence,rdfs:subClassOf,aida-item:Contact.RequestAdvise,
-aida-item:Personnel.Elect.WinElection,rdf:type,owl:Class,
-aida-item:Personnel.Elect.WinElection,rdfs:subClassOf,aida-item:Personnel.Elect,
-aida-item:Transaction.TransferMoney,rdf:type,owl:Class,
-aida-item:Transaction.TransferMoney,rdfs:subClassOf,aida-item:Event,
-aida-item:Movement.TransportArtifact.Fall,rdf:type,owl:Class,
-aida-item:Movement.TransportArtifact.Fall,rdfs:subClassOf,aida-item:Movement.TransportArtifact,
-aida-item:Government.Vote.CastVote,rdfs:subClassOf,aida-item:Government.Vote,
-aida-item:Government.Vote.CastVote,rdf:type,owl:Class,
-aida-item:ORG.MilitaryOrganization.GovernmentArmedForces,rdfs:subClassOf,aida-item:Entity,
-aida-item:ORG.MilitaryOrganization.GovernmentArmedForces,rdf:type,owl:Class,
-aida-item:LOC.GeographicPoint.Address,rdfs:subClassOf,aida-item:LOC.GeographicPoint,
-aida-item:LOC.GeographicPoint.Address,rdf:type,owl:Class,
-aida-item:Transaction.TransferOwnership.BorrowLend,rdf:type,owl:Class,
-aida-item:Transaction.TransferOwnership.BorrowLend,rdfs:subClassOf,aida-item:Transaction.TransferOwnership,
-aida-item:Movement.TransportArtifact.Hide,rdfs:subClassOf,aida-item:Movement.TransportArtifact,
-aida-item:Movement.TransportArtifact.Hide,rdf:type,owl:Class,
-aida-item:FAC.Way.Street,rdfs:subClassOf,aida-item:FAC.Way,
-aida-item:FAC.Way.Street,rdf:type,owl:Class,
-aida-item:LOC.Position.CrimeScene,rdf:type,owl:Class,
-aida-item:LOC.Position.CrimeScene,rdfs:subClassOf,aida-item:LOC.Position,
-aida-item:COM.Equipment.MedicalEquipment,rdf:type,owl:Class,
-aida-item:COM.Equipment.MedicalEquipment,rdfs:subClassOf,aida-item:COM.Equipment,
-aida-item:PER.Combatant.Sniper,rdfs:subClassOf,aida-item:PER.Combatant,
-aida-item:PER.Combatant.Sniper,rdf:type,owl:Class,
-aida-item:Conflict.Attack.Hanging,rdfs:subClassOf,aida-item:Conflict.Attack,
-aida-item:Conflict.Attack.Hanging,rdf:type,owl:Class,
-aida-property:Evaluate.Sentiment.Positive_SentimentTarget,rdf:type,owl:ObjectProperty,
-aida-item:Movement.TransportArtifact.GrantEntry,rdf:type,owl:Class,
-aida-item:Movement.TransportArtifact.GrantEntry,rdfs:subClassOf,aida-item:Movement.TransportArtifact,
-aida-item:ORG,rdf:type,owl:Class,
-aida-item:ORG,rdfs:subClassOf,aida-item:Entity,
-aida-item:ORG.Government.Council,rdf:type,owl:Class,
-aida-item:ORG.Government.Council,rdfs:subClassOf,aida-item:ORG.Government,
-aida-item:Movement.TransportArtifact.NonviolentThrowLaunch,rdf:type,owl:Class,
-aida-item:Movement.TransportArtifact.NonviolentThrowLaunch,rdfs:subClassOf,aida-item:Movement.TransportArtifact,
-aida-property:source,rdf:type,owl:DatatypeProperty,
-aida-property:source,aida-property-type:dataType,aida-property-type:URL,
-aida-item:CRM.FinancialCrime.Extortion,rdf:type,owl:Class,
-aida-item:CRM.FinancialCrime.Extortion,rdfs:subClassOf,aida-item:CRM.FinancialCrime,
-aida-item:WEA.GrenadeLauncher.GrenadeLauncher,rdfs:subClassOf,aida-item:WEA,
-aida-item:WEA.GrenadeLauncher.GrenadeLauncher,rdf:type,owl:Class,
-aida-item:FAC.Structure,rdfs:subClassOf,aida-item:FAC,
-aida-item:FAC.Structure,rdf:type,owl:Class,
-aida-property:PartWhole.Subsidiary.OrganizationSubsidiary_Subsidiary,rdf:type,owl:ObjectProperty,
-aida-item:Life.Injure,rdfs:subClassOf,aida-item:Event,
-aida-item:Life.Injure,rdf:type,owl:Class,
-aida-item:PartWhole.Subsidiary,rdf:type,owl:Class,
-aida-item:PartWhole.Subsidiary,rdfs:subClassOf,aida-item:Relation,
-aida-item:Movement.TransportPerson.PreventExit,rdfs:subClassOf,aida-item:Movement.TransportPerson,
-aida-item:Movement.TransportPerson.PreventExit,rdf:type,owl:Class,
-aida-property:Information.Color.Color_Color,rdf:type,owl:ObjectProperty,
-aida-item:OrganizationAffiliation.Leadership.HeadOfState,rdf:type,owl:Class,
-aida-item:OrganizationAffiliation.Leadership.HeadOfState,rdfs:subClassOf,aida-item:OrganizationAffiliation.Leadership,
-aida-item:SID,rdfs:subClassOf,aida-item:Entity,
-aida-item:SID,rdf:type,owl:Class,
-aida-entity:cb192f2f-07ca-49fe-80b4-56fa54420f48,rdf:type,aida-item:VAL.Number.Number,HC0005QF4
-aida-entity:cb192f2f-07ca-49fe-80b4-56fa54420f48,aida-property:Measurement.Size.Count_Measurement,aida-entity:90846e8e-d544-4c5c-8405-eb0e62e73627,HC0005QF4
-aida-property:Physical.Resident.Resident_Resident,rdf:type,owl:ObjectProperty,
-aida-item:ORG.CommercialOrganization.Manufacturer,rdf:type,owl:Class,
-aida-item:ORG.CommercialOrganization.Manufacturer,rdfs:subClassOf,aida-item:ORG.CommercialOrganization,
-aida-item:Government.Agreements.RejectNullifyAgreementContractCeasefire,rdfs:subClassOf,aida-item:Government.Agreements,
-aida-item:Government.Agreements.RejectNullifyAgreementContractCeasefire,rdf:type,owl:Class,
-aida-item:Justice.JudicialConsequences.Execute,rdf:type,owl:Class,
-aida-item:Justice.JudicialConsequences.Execute,rdfs:subClassOf,aida-item:Justice.JudicialConsequences,
-aida-item:Conflict.Attack.StealRobHijack,rdfs:subClassOf,aida-item:Conflict.Attack,
-aida-item:Conflict.Attack.StealRobHijack,rdf:type,owl:Class,
-aida-item:Conflict.Demonstrate.MarchProtestPoliticalGathering,rdf:type,owl:Class,
-aida-item:Conflict.Demonstrate.MarchProtestPoliticalGathering,rdfs:subClassOf,aida-item:Conflict.Demonstrate,
-aida-property:hasName,aida-property-type:dataType,aida-property-type:MonolingualText,
-aida-property:hasName,rdf:type,owl:DatatypeProperty,
-aida-item:Justice.JudicialConsequences,rdf:type,owl:Class,
-aida-item:Justice.JudicialConsequences,rdfs:subClassOf,aida-item:Event,
-aida-property:Evaluate.Legitimacy.Legitimate_Event,rdf:type,owl:ObjectProperty,
-aida-item:Contact.RequestAdvise,rdf:type,owl:Class,
-aida-item:Contact.RequestAdvise,rdfs:subClassOf,aida-item:Event,
-aida-property:GeneralAffiliation.Sponsorship.Affiliated_ActorOrEvent,rdf:type,owl:ObjectProperty,
-aida-item:WEA.MissileSystem.AirToAirMissile,rdfs:subClassOf,aida-item:WEA.MissileSystem,
-aida-item:WEA.MissileSystem.AirToAirMissile,rdf:type,owl:Class,
-aida-item:ORG.MilitaryOrganization.Intelligence,rdf:type,owl:Class,
-aida-item:ORG.MilitaryOrganization.Intelligence,rdfs:subClassOf,aida-item:ORG.MilitaryOrganization,
-aida-property:Evaluate.Legitimacy.Illegitimate_Holder,rdf:type,owl:ObjectProperty,
-aida-item:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen,rdf:type,owl:Class,
-aida-item:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen,rdfs:subClassOf,aida-item:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation,
-aida-item:Contact.PublicStatementInPerson.Broadcast,rdfs:subClassOf,aida-item:Contact.PublicStatementInPerson,
-aida-item:Contact.PublicStatementInPerson.Broadcast,rdf:type,owl:Class,
-aida-item:VEH.MilitaryVehicle.Tank,rdf:type,owl:Class,
-aida-item:VEH.MilitaryVehicle.Tank,rdfs:subClassOf,aida-item:VEH.MilitaryVehicle,
-aida-item:Contact.Prevarication.Correspondence,rdf:type,owl:Class,
-aida-item:Contact.Prevarication.Correspondence,rdfs:subClassOf,aida-item:Contact.Prevarication,
-aida-item:Movement.TransportPerson.DisperseSeparate,rdf:type,owl:Class,
-aida-item:Movement.TransportPerson.DisperseSeparate,rdfs:subClassOf,aida-item:Movement.TransportPerson,
-aida-property:ResponsibilityBlame.ClaimResponsibility.ClaimResponsibility_EntityResponsible,rdf:type,owl:ObjectProperty,
-aida-property:OrganizationAffiliation.EmploymentMembership_EmployeeMember,rdf:type,owl:ObjectProperty,
-aida-property:Measurement.Size.Percentage_EntityOrFiller,rdf:type,owl:ObjectProperty,
-aida-property:end,aida-property-type:dataType,aida-property-type:DateTime,
-aida-property:end,rdf:type,owl:DatatypeProperty,
-aida-item:Physical.LocatedNear,rdfs:subClassOf,aida-item:Relation,
-aida-item:Physical.LocatedNear,rdf:type,owl:Class,
-aida-item:Conflict.Attack.Strangling,rdfs:subClassOf,aida-item:Conflict.Attack,
-aida-item:Conflict.Attack.Strangling,rdf:type,owl:Class,
-aida-property:Physical.LocatedNear_EntityOrFiller,rdf:type,owl:ObjectProperty,HC0005QF4
-aida-property:GeneralAffiliation.Sponsorship.Affiliated_Sponsor,rdf:type,owl:ObjectProperty,
-aida-item:OrganizationAffiliation.Leadership.MilitaryPolice,rdfs:subClassOf,aida-item:OrganizationAffiliation.Leadership,
-aida-item:OrganizationAffiliation.Leadership.MilitaryPolice,rdf:type,owl:Class,
-aida-item:Contact.Prevarication.Meet,rdfs:subClassOf,aida-item:Contact.Prevarication,
-aida-item:Contact.Prevarication.Meet,rdf:type,owl:Class,
-aida-property:Evaluate.Sentiment.Negative_SentimentTarget,rdf:type,owl:ObjectProperty,
-aida-item:PER,rdf:type,owl:Class,
-aida-item:PER,rdfs:subClassOf,aida-item:Entity,
-aida-property:Information.Color.Color_EntityOrFiller,rdf:type,owl:ObjectProperty,
-aida-property:OrganizationAffiliation.EmploymentMembership.Employment_Employee,rdf:type,owl:ObjectProperty,HC0005QF4
-aida-property-type:URL,rdf:type,aida-property-type:Value,
-aida-property:PersonalSocial.Unspecified_Person,rdf:type,owl:ObjectProperty,
-aida-property:PersonalSocial.Role.ProfessionalRole_Person,rdf:type,owl:ObjectProperty,
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory,rdf:type,owl:Class,
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory,rdfs:subClassOf,aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation,
-aida-item:Contact.Negotiate.Meet,rdfs:subClassOf,aida-item:Contact.Negotiate,
-aida-item:Contact.Negotiate.Meet,rdf:type,owl:Class,
-aida-property:GeneralAffiliation.Sponsorship.HelpSupport_Sponsor,rdf:type,owl:ObjectProperty,
-aida-property:Measurement.Size_Measurement,rdf:type,owl:ObjectProperty,
-aida-property:Evaluate.Deliberateness.Accidental_Holder,rdf:type,owl:ObjectProperty,
-aida-item:WEA.Bomb.MolotovCocktail,rdf:type,owl:Class,
-aida-item:WEA.Bomb.MolotovCocktail,rdfs:subClassOf,aida-item:WEA.Bomb,
diff --git a/data/maa_m18_node_table_file1.csv b/data/maa_m18_node_table_file1.csv
deleted file mode 100644
index 3e0c610d6..000000000
--- a/data/maa_m18_node_table_file1.csv
+++ /dev/null
@@ -1,31 +0,0 @@
-identifier,name,type,data_type,document_id
-aida-entity:cab521fd-369e-40e9-ba8d-a97cc7c723b9,"en:""Vladimir Putin""",Item,,HC0005QF4
-aida-entity:4fba8ada-3361-459c-a37f-a0ccc4624ac5,"en:""Russia""",Item,,HC0005QF4
-aida-entity:e45a2f47-4a27-4bfe-8a09-221d2216cd7f,"en:""Donetsk""",Item,,HC0005QF4
-aida-entity:ed73b6b9-bd48-4560-a5ef-381937cca366,,Item,,HC0005QF4
-aida-entity:e8d98762-d520-49ed-8de4-5b208d2e3fc9,"en:""Ukrainian""",Item,,HC0005QF4
-aida-entity:9789ff8a-7d0a-45cc-9b2a-9a7465182d09,"en:""Malaysian Airlines""",Item,,HC0005QF4
-aida-entity:ba5bd100-3c53-45bb-8c1e-81d7827e948d,"en:""Black Sea""",Item,,HC0005QF4
-aida-entity:1adf20cf-9ade-467e-82a3-f4583a7b5c19,"en:""Brisbane""",Item,,HC0005QF4
-aida-entity:42fe32a7-57a6-499e-9043-d45cf4522f76,"en:""the National Guard""",Item,,HC0005QF4
-aida-entity:d48106d9-91c7-4d10-a4dc-7d82d735a925,"en:""Kyiv""",Item,,HC0005QF4
-aida-entity:0e0214a7-2f64-4dae-b1ba-eff4b6663ef4,,Item,,HC0005QF4
-aida-entity:de116796-d44b-4a1a-8491-4dcced8e8a9f,"en:""Hrabove""",Item,,HC0005QF4
-aida-entity:90846e8e-d544-4c5c-8405-eb0e62e73627,,Item,,HC0005QF4
-aida-entity:110be185-7b8c-4a12-a0d7-fcfe7687a7f0,"en:""bodyguard""",Item,,HC0005QF4
-aida-entity:01f0cc75-2ed2-4968-add0-715d91f1b7ec,,Item,,HC0005QF4
-aida-entity:6bcbc73a-2907-4c6c-8fc2-ad149846d6ac,"en:""Ilovaysk""",Item,,HC0005QF4
-aida-entity:f6959485-0657-46a7-8712-5e32a27d0619,,Item,,HC0005QF4
-aida-entity:f4cfc546-ff27-4df0-b19b-71dbdbc93a29,"en:""Petro Poroshenko""",Item,,HC0005QF4
-aida-entity:acb74d8b-5845-4f07-9207-190313865729,,Item,,HC0005QF4
-aida-entity:b993cba5-67e3-4e8e-b843-0f8103af98a0,"en:""Stepan Poltorak""",Item,,HC0005QF4
-aida-entity:a27c1037-3b21-4372-9c53-2a81065b3b16,,Item,,HC0005QF4
-aida-entity:e3d187c7-5e2b-48f3-a05e-dc9724cb9596,,Item,,HC0005QF4
-aida-entity:5aa209ed-0b29-42c9-99f6-bb7f451b418e,"en:""Crimea""",Item,,HC0005QF4
-aida-entity:d93aa0ec-a038-47c6-9983-334903ee39a8,"en:""Tony Abbott""",Item,,HC0005QF4
-aida-entity:fae5a094-6aea-44e3-bfba-2f50fe87cb5b,,Item,,HC0005QF4
-aida-entity:312b6440-a00e-4e4f-b0a0-99a7f0084fdf,,Item,,HC0005QF4
-aida-event:c8d650fb-506c-4854-a28e-e058c3f2d168,,Item,,HC0005QF4
-aida-entity:11d2d098-804e-4379-afa0-7475a6a9986f,"en:""MH17""",Item,,HC0005QF4
-aida-entity:12c3230f-cc45-4c14-b34a-c8a889177502,,Item,,HC0005QF4
-aida-entity:cb192f2f-07ca-49fe-80b4-56fa54420f48,,Item,,HC0005QF4
diff --git a/data/maa_m18_ontology_node_table_file1.csv b/data/maa_m18_ontology_node_table_file1.csv
deleted file mode 100644
index 506066269..000000000
--- a/data/maa_m18_ontology_node_table_file1.csv
+++ /dev/null
@@ -1,472 +0,0 @@
-identifier,name,type,data_type,document_id
-aida-item:PER.ProfessionalPosition,"en:""aida-item:PER.ProfessionalPosition""",Item,,
-aida-item:VEH.Aircraft,"en:""aida-item:VEH.Aircraft""",Item,,
-aida-item:PER.Protester.ProtestLeader,"en:""aida-item:PER.Protester.ProtestLeader""",Item,,
-aida-item:CRM.ViolentCrime.Terrorism,"en:""aida-item:CRM.ViolentCrime.Terrorism""",Item,,
-aida-item:Conflict.Yield,"en:""aida-item:Conflict.Yield""",Item,,
-aida-item:COM.Document,"en:""aida-item:COM.Document""",Item,,
-aida-item:PER.ProfessionalPosition.Scientist,"en:""aida-item:PER.ProfessionalPosition.Scientist""",Item,,
-aida-item:PersonalSocial.Unspecified,"en:""aida-item:PersonalSocial.Unspecified""",Item,,
-aida-item:PER.Politician,"en:""aida-item:PER.Politician""",Item,,
-aida-item:Contact.ThreatenCoerce,"en:""aida-item:Contact.ThreatenCoerce""",Item,,
-aida-property:GeneralAffiliation.Sponsorship_Sponsor,"en:""aida-property:GeneralAffiliation.Sponsorship_Sponsor""",Property,aida-property-type:Item,
-aida-item:WEA.Cannon.Cannon,"en:""aida-item:WEA.Cannon.Cannon""",Item,,
-aida-item:SID.Religious.Religious,"en:""aida-item:SID.Religious.Religious""",Item,,
-aida-item:WEA.MissileSystem.Missile,"en:""aida-item:WEA.MissileSystem.Missile""",Item,,
-aida-item:Contact.CommitmentPromiseExpressIntent,"en:""aida-item:Contact.CommitmentPromiseExpressIntent""",Item,,
-aida-item:Transaction.TransferOwnership,"en:""aida-item:Transaction.TransferOwnership""",Item,,
-aida-item:ORG.CommercialOrganization,"en:""aida-item:ORG.CommercialOrganization""",Item,,
-aida-item:SID.Sports.Sports,"en:""aida-item:SID.Sports.Sports""",Item,,
-aida-item:ORG.PoliticalOrganization.Court,"en:""aida-item:ORG.PoliticalOrganization.Court""",Item,,
-aida-item:Inspection.SensoryObserve.InspectPeopleOrganization,"en:""aida-item:Inspection.SensoryObserve.InspectPeopleOrganization""",Item,,
-aida-item:Contact.Discussion.Meet,"en:""aida-item:Contact.Discussion.Meet""",Item,,
-aida-property:OrganizationAffiliation.Leadership.Government_GovernmentBodyOrGPE,"en:""aida-property:OrganizationAffiliation.Leadership.Government_GovernmentBodyOrGPE""",Property,aida-property-type:Item,
-aida-item:FAC.Installation.MilitaryInstallation,"en:""aida-item:FAC.Installation.MilitaryInstallation""",Item,,
-aida-item:ORG.Association.Club,"en:""aida-item:ORG.Association.Club""",Item,,
-aida-item:PER.Police.ChiefOfPolice,"en:""aida-item:PER.Police.ChiefOfPolice""",Item,,
-aida-item:VEH.MilitaryVehicle.MilitaryTransportAircraft,"en:""aida-item:VEH.MilitaryVehicle.MilitaryTransportAircraft""",Item,,
-aida-property-type:dataType,"en:""aida-property-type:dataType""",Property,aida-property-type:Item,
-aida-item:Measurement.Size.HeightLengthWidth,"en:""aida-item:Measurement.Size.HeightLengthWidth""",Item,,
-aida-item:Personnel.EndPosition.QuitRetire,"en:""aida-item:Personnel.EndPosition.QuitRetire""",Item,,
-aida-item:Personnel.EndPosition,"en:""aida-item:Personnel.EndPosition""",Item,,
-aida-property:Evaluate.Deliberateness.Accidental_Event,"en:""aida-property:Evaluate.Deliberateness.Accidental_Event""",Property,aida-property-type:Item,
-aida-item:CRM.PoliticalCrime.AbuseOfOffice,"en:""aida-item:CRM.PoliticalCrime.AbuseOfOffice""",Item,,
-aida-property:OrganizationAffiliation.Leadership_Organization,"en:""aida-property:OrganizationAffiliation.Leadership_Organization""",Property,aida-property-type:Item,HC0005QF4
-aida-item:VEH.MilitaryVehicle.FighterAircraft,"en:""aida-item:VEH.MilitaryVehicle.FighterAircraft""",Item,,
-aida-item:PER.ProfessionalPosition.Spy,"en:""aida-item:PER.ProfessionalPosition.Spy""",Item,,
-aida-item:Transaction.TransferMoney.BorrowLend,"en:""aida-item:Transaction.TransferMoney.BorrowLend""",Item,,
-aida-item:VEH.Aircraft.Airplane,"en:""aida-item:VEH.Aircraft.Airplane""",Item,,
-aida-item:Contact.FuneralVigil.Meet,"en:""aida-item:Contact.FuneralVigil.Meet""",Item,,
-aida-item:PER.ProfessionalPosition.Journalist,"en:""aida-item:PER.ProfessionalPosition.Journalist""",Item,,
-aida-item:Contact.Prevarication.Broadcast,"en:""aida-item:Contact.Prevarication.Broadcast""",Item,,
-aida-property:OrganizationAffiliation.Leadership_Leader,"en:""aida-property:OrganizationAffiliation.Leadership_Leader""",Property,aida-property-type:Item,HC0005QF4
-aida-property-type:DateTime,"en:""aida-property-type:DateTime""",Item,,
-aida-item:Contact.CommitmentPromiseExpressIntent.Correspondence,"en:""aida-item:Contact.CommitmentPromiseExpressIntent.Correspondence""",Item,,
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity_EntityOrFiller,"en:""aida-property:GeneralAffiliation.MemberOriginReligionEthnicity_EntityOrFiller""",Property,aida-property-type:Item,
-aida-item:Justice.Investigate,"en:""aida-item:Justice.Investigate""",Item,,
-aida-property:ResponsibilityBlame.AssignBlame.AssignBlame_EntityResponsible,"en:""aida-property:ResponsibilityBlame.AssignBlame.AssignBlame_EntityResponsible""",Property,aida-property-type:Item,
-aida-item:WEA.Bullets,"en:""aida-item:WEA.Bullets""",Item,,
-aida-item:ORG.CommercialOrganization.Corporation,"en:""aida-item:ORG.CommercialOrganization.Corporation""",Item,,
-aida-item:COM.Flag.Flag,"en:""aida-item:COM.Flag.Flag""",Item,,
-aida-item:LOC.Position.Field,"en:""aida-item:LOC.Position.Field""",Item,,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession_Artifact,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession_Artifact""",Property,aida-property-type:Item,
-aida-item:Conflict.Yield.Surrender,"en:""aida-item:Conflict.Yield.Surrender""",Item,,
-aida-property:Measurement.Size.Count_EntityOrFiller,"en:""aida-property:Measurement.Size.Count_EntityOrFiller""",Property,aida-property-type:Item,HC0005QF4
-aida-item:OrganizationAffiliation.EmploymentMembership,"en:""aida-item:OrganizationAffiliation.EmploymentMembership""",Item,,
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity_Person,"en:""aida-property:GeneralAffiliation.MemberOriginReligionEthnicity_Person""",Property,aida-property-type:Item,
-aida-item:COM.Equipment,"en:""aida-item:COM.Equipment""",Item,,
-aida-item:Movement.TransportArtifact.PreventEntry,"en:""aida-item:Movement.TransportArtifact.PreventEntry""",Item,,
-aida-item:Life.Die.NonviolentDeath,"en:""aida-item:Life.Die.NonviolentDeath""",Item,,
-aida-item:PER.Politician.Mayor,"en:""aida-item:PER.Politician.Mayor""",Item,,
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity_Person,"en:""aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity_Person""",Property,aida-property-type:Item,
-aida-item:FAC.Building.GovernmentBuilding,"en:""aida-item:FAC.Building.GovernmentBuilding""",Item,,
-aida-item:ORG.PoliticalOrganization,"en:""aida-item:ORG.PoliticalOrganization""",Item,,
-aida-property:Information.Make.Make_Brand,"en:""aida-property:Information.Make.Make_Brand""",Property,aida-property-type:Item,HC0005QF4
-aida-item:RES.NumberPercentageVotes.NumberPercentageVotes,"en:""aida-item:RES.NumberPercentageVotes.NumberPercentageVotes""",Item,,
-aida-property:Measurement.Size.HeightLengthWidth_EntityOrFiller,"en:""aida-property:Measurement.Size.HeightLengthWidth_EntityOrFiller""",Property,aida-property-type:Item,
-aida-item:PartWhole.Subsidiary.NationalityCitizen,"en:""aida-item:PartWhole.Subsidiary.NationalityCitizen""",Item,,
-aida-item:PER.Combatant,"en:""aida-item:PER.Combatant""",Item,,
-aida-item:Movement.TransportPerson.EvacuationRescue,"en:""aida-item:Movement.TransportPerson.EvacuationRescue""",Item,,
-aida-item:Government.Formation.StartGPE,"en:""aida-item:Government.Formation.StartGPE""",Item,,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen_Artifact,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen_Artifact""",Property,aida-property-type:Item,
-aida-item:ORG.Government.Agency,"en:""aida-item:ORG.Government.Agency""",Item,,
-aida-item:FAC.Installation.TrainStation,"en:""aida-item:FAC.Installation.TrainStation""",Item,,
-aida-item:WEA.Bullets.Ammunition,"en:""aida-item:WEA.Bullets.Ammunition""",Item,,
-aida-item:VEH.Rocket.Rocket,"en:""aida-item:VEH.Rocket.Rocket""",Item,,
-aida-property:Evaluate.Legitimacy.Illegitimate_Event,"en:""aida-property:Evaluate.Legitimacy.Illegitimate_Event""",Property,aida-property-type:Item,
-aida-item:RES.TurnoutVoters.TurnoutVoters,"en:""aida-item:RES.TurnoutVoters.TurnoutVoters""",Item,,
-aida-item:Life.Injure.IllnessDegradationPhysical,"en:""aida-item:Life.Injure.IllnessDegradationPhysical""",Item,,
-aida-item:WEA.ThrownProjectile,"en:""aida-item:WEA.ThrownProjectile""",Item,,
-aida-item:ORG.CriminalOrganization.CriminalOrganization,"en:""aida-item:ORG.CriminalOrganization.CriminalOrganization""",Item,,
-aida-item:PER.Fan.SportsFan,"en:""aida-item:PER.Fan.SportsFan""",Item,,
-aida-property:Evaluate.Sentiment.Positive_Holder,"en:""aida-property:Evaluate.Sentiment.Positive_Holder""",Property,aida-property-type:Item,
-aida-property:PersonalSocial.Role.TitleFormOfAddress_Title,"en:""aida-property:PersonalSocial.Role.TitleFormOfAddress_Title""",Property,aida-property-type:Item,
-aida-item:BAL.PaperBallot.PaperBallot,"en:""aida-item:BAL.PaperBallot.PaperBallot""",Item,,
-aida-item:Inspection.SensoryObserve.MonitorElection,"en:""aida-item:Inspection.SensoryObserve.MonitorElection""",Item,,
-aida-item:GPE.UrbanArea.City,"en:""aida-item:GPE.UrbanArea.City""",Item,,
-aida-item:PER.ProfessionalPosition.Spokesperson,"en:""aida-item:PER.ProfessionalPosition.Spokesperson""",Item,,
-aida-property:PartWhole.Subsidiary.NationalityCitizen_Subsidiary,"en:""aida-property:PartWhole.Subsidiary.NationalityCitizen_Subsidiary""",Property,aida-property-type:Item,
-aida-item:GeneralAffiliation.Sponsorship.AdvisePlanOrganize,"en:""aida-item:GeneralAffiliation.Sponsorship.AdvisePlanOrganize""",Item,,
-aida-item:TTL,,Item,,
-aida-item:Transaction.Transaction.TransferControl,"en:""aida-item:Transaction.Transaction.TransferControl""",Item,,
-aida-item:OrganizationAffiliation.EmploymentMembership.Employment,"en:""aida-item:OrganizationAffiliation.EmploymentMembership.Employment""",Item,,
-aida-item:GPE.ProvinceState.ProvinceState,"en:""aida-item:GPE.ProvinceState.ProvinceState""",Item,,
-aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen_Organization,"en:""aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen_Organization""",Property,aida-property-type:Item,
-aida-item:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen,"en:""aida-item:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen""",Item,,
-aida-item:GeneralAffiliation.Sponsorship,"en:""aida-item:GeneralAffiliation.Sponsorship""",Item,,
-aida-item:Contact.CommitmentPromiseExpressIntent.Meet,"en:""aida-item:Contact.CommitmentPromiseExpressIntent.Meet""",Item,,
-aida-property:OrganizationAffiliation.Founder.Founder_Organization,"en:""aida-property:OrganizationAffiliation.Founder.Founder_Organization""",Property,aida-property-type:Item,
-aida-property:Measurement.Size.Count_Measurement,"en:""aida-property:Measurement.Size.Count_Measurement""",Property,aida-property-type:Item,HC0005QF4
-aida-item:WEA.Gun.Firearm,"en:""aida-item:WEA.Gun.Firearm""",Item,,
-aida-item:ORG.Government.LegislativeBody,"en:""aida-item:ORG.Government.LegislativeBody""",Item,,
-aida-item:Contact.FuneralVigil,"en:""aida-item:Contact.FuneralVigil""",Item,,
-aida-item:Movement.TransportPerson,"en:""aida-item:Movement.TransportPerson""",Item,,
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen_Citizen,"en:""aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen_Citizen""",Property,aida-property-type:Item,
-aida-item:SID.Cultural.Cultural,"en:""aida-item:SID.Cultural.Cultural""",Item,,
-aida-item:Measurement.Size,"en:""aida-item:Measurement.Size""",Item,,
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen,"en:""aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen""",Item,,
-aida-item:ORG.Government,"en:""aida-item:ORG.Government""",Item,,
-aida-item:CRM.BehaviorCrime,"en:""aida-item:CRM.BehaviorCrime""",Item,,
-aida-item:Government.Agreements.ViolateAgreement,"en:""aida-item:Government.Agreements.ViolateAgreement""",Item,,
-aida-item:FAC.Installation.Airport,"en:""aida-item:FAC.Installation.Airport""",Item,,
-aida-item:ORG.International.MonitoringGroup,"en:""aida-item:ORG.International.MonitoringGroup""",Item,,
-aida-property:Physical.LocatedNear.Surround_Place,"en:""aida-property:Physical.LocatedNear.Surround_Place""",Property,aida-property-type:Item,
-aida-item:PER.Politician.HeadOfGovernment,"en:""aida-item:PER.Politician.HeadOfGovernment""",Item,,
-aida-item:Personnel.Elect,"en:""aida-item:Personnel.Elect""",Item,,
-aida-property:OrganizationAffiliation.Leadership.MilitaryPolice_MilitaryPoliceORG,"en:""aida-property:OrganizationAffiliation.Leadership.MilitaryPolice_MilitaryPoliceORG""",Property,aida-property-type:Item,
-aida-item:GPE,"en:""aida-item:GPE""",Item,,
-aida-item:Movement.TransportArtifact.SendSupplyExport,"en:""aida-item:Movement.TransportArtifact.SendSupplyExport""",Item,,
-aida-property:GeneralAffiliation.Sponsorship.AdvisePlanOrganize_ActorOrEvent,"en:""aida-property:GeneralAffiliation.Sponsorship.AdvisePlanOrganize_ActorOrEvent""",Property,aida-property-type:Item,
-aida-item:VEH.WheeledVehicle,"en:""aida-item:VEH.WheeledVehicle""",Item,,
-aida-item:FAC.GeographicalArea.Border,"en:""aida-item:FAC.GeographicalArea.Border""",Item,,
-aida-item:Transaction.Transaction,"en:""aida-item:Transaction.Transaction""",Item,,
-aida-item:LAW,"en:""aida-item:LAW""",Item,,
-aida-item:Contact.CommandOrder,"en:""aida-item:Contact.CommandOrder""",Item,,
-aida-item:Contact.ThreatenCoerce.Correspondence,"en:""aida-item:Contact.ThreatenCoerce.Correspondence""",Item,,
-aida-item:RES,"en:""aida-item:RES""",Item,,
-aida-item:FAC.Building.OfficeBuilding,"en:""aida-item:FAC.Building.OfficeBuilding""",Item,,
-aida-item:Contact.MediaStatement,"en:""aida-item:Contact.MediaStatement""",Item,,
-aida-item:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity,"en:""aida-item:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity""",Item,,
-aida-item:LAW.Referendum.Referendum,"en:""aida-item:LAW.Referendum.Referendum""",Item,,
-aida-item:ArtifactExistence.DamageDestroy.Destroy,"en:""aida-item:ArtifactExistence.DamageDestroy.Destroy""",Item,,
-aida-property:GeneralAffiliation.OrganizationWebsite.OrganizationWebsite_Website,"en:""aida-property:GeneralAffiliation.OrganizationWebsite.OrganizationWebsite_Website""",Property,aida-property-type:Item,
-aida-item:GPE.UrbanArea.Village,"en:""aida-item:GPE.UrbanArea.Village""",Item,,
-aida-item:Manufacture.Artifact,"en:""aida-item:Manufacture.Artifact""",Item,,
-aida-property:Information.Make.Make_EntityOrFiller,"en:""aida-property:Information.Make.Make_EntityOrFiller""",Property,aida-property-type:Item,HC0005QF4
-aida-item:ArtifactExistence.DamageDestroy,"en:""aida-item:ArtifactExistence.DamageDestroy""",Item,,
-aida-property:Physical.LocatedNear_Place,"en:""aida-property:Physical.LocatedNear_Place""",Property,aida-property-type:Item,HC0005QF4
-aida-property:OrganizationAffiliation.Founder.Founder_Founder,"en:""aida-property:OrganizationAffiliation.Founder.Founder_Founder""",Property,aida-property-type:Item,
-aida-item:WEA.Gas.PoisonGas,"en:""aida-item:WEA.Gas.PoisonGas""",Item,,
-aida-item:MON.Cash.Cash,"en:""aida-item:MON.Cash.Cash""",Item,,
-aida-item:Contact.Collaborate.Meet,"en:""aida-item:Contact.Collaborate.Meet""",Item,,
-aida-item:VEH.Aircraft.Helicopter,"en:""aida-item:VEH.Aircraft.Helicopter""",Item,,
-aida-item:FAC.Way,"en:""aida-item:FAC.Way""",Item,,
-aida-item:COM.Wreckage.Wreckage,"en:""aida-item:COM.Wreckage.Wreckage""",Item,,
-aida-item:GeneralAffiliation.MemberOriginReligionEthnicity,"en:""aida-item:GeneralAffiliation.MemberOriginReligionEthnicity""",Item,,
-aida-item:ORG.Government.LawEnforcementAgency,"en:""aida-item:ORG.Government.LawEnforcementAgency""",Item,,
-aida-item:OrganizationAffiliation.Leadership.Government,"en:""aida-item:OrganizationAffiliation.Leadership.Government""",Item,,
-aida-item:COM.Document.PersonalIdentification,"en:""aida-item:COM.Document.PersonalIdentification""",Item,,
-aida-item:Transaction.Transaction.EmbargoSanction,"en:""aida-item:Transaction.Transaction.EmbargoSanction""",Item,,
-aida-item:Contact.RequestAdvise.Broadcast,"en:""aida-item:Contact.RequestAdvise.Broadcast""",Item,,
-aida-item:ORG.Government.Railway,"en:""aida-item:ORG.Government.Railway""",Item,,
-aida-item:COM.Document.Map,"en:""aida-item:COM.Document.Map""",Item,,
-aida-item:WEA,"en:""aida-item:WEA""",Item,,
-aida-item:PersonalSocial.Unspecified.Political,"en:""aida-item:PersonalSocial.Unspecified.Political""",Item,,
-aida-item:Movement.TransportPerson.GrantEntryAsylum,"en:""aida-item:Movement.TransportPerson.GrantEntryAsylum""",Item,,
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity_Ethnicity,"en:""aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.Ethnicity_Ethnicity""",Property,aida-property-type:Item,
-aida-property:GeneralAffiliation.Sponsorship_ActorOrEvent,"en:""aida-property:GeneralAffiliation.Sponsorship_ActorOrEvent""",Property,aida-property-type:Item,
-aida-item:ORG.Government.FireDepartment,"en:""aida-item:ORG.Government.FireDepartment""",Item,,
-aida-property:Evaluate.Deliberateness.Deliberate_Event,"en:""aida-property:Evaluate.Deliberateness.Deliberate_Event""",Property,aida-property-type:Item,
-aida-item:ORG.International.Commission,"en:""aida-item:ORG.International.Commission""",Item,,
-aida-item:Government.Vote,"en:""aida-item:Government.Vote""",Item,,
-aida-item:LOC.Land,"en:""aida-item:LOC.Land""",Item,,
-aida-item:PER.Police,"en:""aida-item:PER.Police""",Item,,
-aida-item:Transaction.TransferMoney.PayForService,"en:""aida-item:Transaction.TransferMoney.PayForService""",Item,,
-aida-property:Measurement.Size.Percentage_Measurement,"en:""aida-property:Measurement.Size.Percentage_Measurement""",Property,aida-property-type:Item,
-aida-item:Contact.ThreatenCoerce.Meet,"en:""aida-item:Contact.ThreatenCoerce.Meet""",Item,,
-aida-item:VEH.Watercraft.Yacht,"en:""aida-item:VEH.Watercraft.Yacht""",Item,,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession_Owner,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession_Owner""",Property,aida-property-type:Item,
-aida-property:start,,Property,aida-property-type:DateTime,
-aida-item:LOC.Land.Continent,"en:""aida-item:LOC.Land.Continent""",Item,,
-aida-item:ORG.CommercialOrganization.BroadcastingCompany,"en:""aida-item:ORG.CommercialOrganization.BroadcastingCompany""",Item,,
-aida-item:PER.MilitaryPersonnel.MilitaryOfficer,"en:""aida-item:PER.MilitaryPersonnel.MilitaryOfficer""",Item,,
-aida-item:Measurement.Size.Count,"en:""aida-item:Measurement.Size.Count""",Item,,
-aida-item:ORG.International,"en:""aida-item:ORG.International""",Item,,
-aida-item:Conflict.Attack.Stabbing,"en:""aida-item:Conflict.Attack.Stabbing""",Item,,
-aida-item:ORG.PoliticalOrganization.Party,"en:""aida-item:ORG.PoliticalOrganization.Party""",Item,,
-aida-item:Conflict.Attack.Invade,"en:""aida-item:Conflict.Attack.Invade""",Item,,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory_Territory,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory_Territory""",Property,aida-property-type:Item,
-aida-item:FAC.Building,"en:""aida-item:FAC.Building""",Item,,
-aida-item:WEA.MissileSystem.SurfaceToAirMissile,"en:""aida-item:WEA.MissileSystem.SurfaceToAirMissile""",Item,,
-aida-property:OrganizationAffiliation.Leadership.MilitaryPolice_Leader,"en:""aida-property:OrganizationAffiliation.Leadership.MilitaryPolice_Leader""",Property,aida-property-type:Item,
-aida-item:ORG.CommercialOrganization.NewsAgency,"en:""aida-item:ORG.CommercialOrganization.NewsAgency""",Item,,
-aida-item:PER.Fan,"en:""aida-item:PER.Fan""",Item,,
-aida-property:PersonalSocial.Role_Person,"en:""aida-property:PersonalSocial.Role_Person""",Property,aida-property-type:Item,
-aida-item:WEA.Club.Bat,"en:""aida-item:WEA.Club.Bat""",Item,,
-aida-item:WEA.Gas,"en:""aida-item:WEA.Gas""",Item,,
-aida-item:ORG.MilitaryOrganization.NonGovernmentMilitia,"en:""aida-item:ORG.MilitaryOrganization.NonGovernmentMilitia""",Item,,
-aida-item:Movement.TransportArtifact.PreventExit,"en:""aida-item:Movement.TransportArtifact.PreventExit""",Item,,
-aida-item:Movement.TransportPerson.SmuggleExtract,"en:""aida-item:Movement.TransportPerson.SmuggleExtract""",Item,,
-aida-property:Physical.OrganizationHeadquarters.OrganizationHeadquarters_Place,"en:""aida-property:Physical.OrganizationHeadquarters.OrganizationHeadquarters_Place""",Property,aida-property-type:Item,
-aida-item:GeneralAffiliation.Sponsorship.Affiliated,"en:""aida-item:GeneralAffiliation.Sponsorship.Affiliated""",Item,,
-aida-item:OrganizationAffiliation.EmploymentMembership.Membership,"en:""aida-item:OrganizationAffiliation.EmploymentMembership.Membership""",Item,,
-aida-item:Life.Die.DeathCausedByViolentEvents,"en:""aida-item:Life.Die.DeathCausedByViolentEvents""",Item,,
-aida-item:VEH.WheeledVehicle.Truck,"en:""aida-item:VEH.WheeledVehicle.Truck""",Item,,
-aida-item:Government.Agreements,"en:""aida-item:Government.Agreements""",Item,,
-aida-item:BAL.BallotSlate.BallotSlate,"en:""aida-item:BAL.BallotSlate.BallotSlate""",Item,,
-aida-item:PER.Politician.Governor,"en:""aida-item:PER.Politician.Governor""",Item,,
-aida-item:Conflict.Yield.Retreat,"en:""aida-item:Conflict.Yield.Retreat""",Item,,
-aida-item:Personnel.EndPosition.FiringLayoff,"en:""aida-item:Personnel.EndPosition.FiringLayoff""",Item,,
-aida-item:FAC.GeographicalArea,"en:""aida-item:FAC.GeographicalArea""",Item,,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation_Artifact,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation_Artifact""",Property,aida-property-type:Item,
-aida-property:Physical.OrganizationHeadquarters.OrganizationHeadquarters_Organization,"en:""aida-property:Physical.OrganizationHeadquarters.OrganizationHeadquarters_Organization""",Property,aida-property-type:Item,
-aida-item:VAL.Number.Number,"en:""aida-item:VAL.Number.Number""",Item,,
-aida-property:GeneralAffiliation.Sponsorship.AdvisePlanOrganize_Sponsor,"en:""aida-property:GeneralAffiliation.Sponsorship.AdvisePlanOrganize_Sponsor""",Property,aida-property-type:Item,
-aida-item:FAC.Structure.Bridge,"en:""aida-item:FAC.Structure.Bridge""",Item,,
-aida-item:COM.Equipment.MilitaryEquipment,"en:""aida-item:COM.Equipment.MilitaryEquipment""",Item,,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen_Nationality,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen_Nationality""",Property,aida-property-type:Item,
-aida-item:MON,"en:""aida-item:MON""",Item,,
-aida-item:Movement.TransportPerson.SelfMotion,"en:""aida-item:Movement.TransportPerson.SelfMotion""",Item,,
-aida-item:Justice.JudicialConsequences.Convict,"en:""aida-item:Justice.JudicialConsequences.Convict""",Item,,
-aida-item:Government.Agreements.AcceptAgreementContractCeasefire,"en:""aida-item:Government.Agreements.AcceptAgreementContractCeasefire""",Item,,
-aida-item:Movement.TransportPerson.BringCarryUnload,"en:""aida-item:Movement.TransportPerson.BringCarryUnload""",Item,,
-aida-item:WEA.Bullets.LiveRounds,"en:""aida-item:WEA.Bullets.LiveRounds""",Item,,
-aida-item:Life.Die,"en:""aida-item:Life.Die""",Item,,
-aida-item:Measurement.Size.Percentage,"en:""aida-item:Measurement.Size.Percentage""",Item,,
-aida-item:ORG.Government.ProsecutorOffice,"en:""aida-item:ORG.Government.ProsecutorOffice""",Item,,
-aida-item:Government.Formation.MergeGPE,"en:""aida-item:Government.Formation.MergeGPE""",Item,,
-aida-property:PartWhole.Subsidiary_Parent,"en:""aida-property:PartWhole.Subsidiary_Parent""",Property,aida-property-type:Item,
-aida-item:CRM.PoliticalCrime,"en:""aida-item:CRM.PoliticalCrime""",Item,,
-aida-item:Life.Injure.InjuryCausedByViolentEvents,"en:""aida-item:Life.Injure.InjuryCausedByViolentEvents""",Item,,
-aida-item:VEH.MilitaryVehicle.MilitaryBoat,"en:""aida-item:VEH.MilitaryVehicle.MilitaryBoat""",Item,,
-aida-item:Movement.TransportArtifact.ReceiveImport,"en:""aida-item:Movement.TransportArtifact.ReceiveImport""",Item,,
-aida-item:Inspection.SensoryObserve.PhysicalInvestigateInspect,"en:""aida-item:Inspection.SensoryObserve.PhysicalInvestigateInspect""",Item,,
-aida-property:OrganizationAffiliation.EmploymentMembership.Membership_Member,"en:""aida-property:OrganizationAffiliation.EmploymentMembership.Membership_Member""",Property,aida-property-type:Item,
-aida-item:Conflict.Attack.FirearmAttack,"en:""aida-item:Conflict.Attack.FirearmAttack""",Item,,
-aida-item:WEA.Bullets.RubberBullets,"en:""aida-item:WEA.Bullets.RubberBullets""",Item,,
-aida-item:CRM.PoliticalCrime.PoliceMisconduct,"en:""aida-item:CRM.PoliticalCrime.PoliceMisconduct""",Item,,
-aida-item:WEA.ThrownProjectile.Rock,"en:""aida-item:WEA.ThrownProjectile.Rock""",Item,,
-aida-item:Justice.InitiateJudicialProcess.ChargeIndict,"en:""aida-item:Justice.InitiateJudicialProcess.ChargeIndict""",Item,,
-aida-item:Contact.RequestAdvise.Meet,"en:""aida-item:Contact.RequestAdvise.Meet""",Item,,
-aida-item:ArtifactExistence.DamageDestroy.Damage,"en:""aida-item:ArtifactExistence.DamageDestroy.Damage""",Item,,
-aida-item:LOC.Position.AirSpace,"en:""aida-item:LOC.Position.AirSpace""",Item,,
-aida-property:linkTarget,,Property,aida-property-type:URL,
-aida-item:Movement.TransportArtifact,"en:""aida-item:Movement.TransportArtifact""",Item,,
-aida-item:Contact.Discussion.Correspondence,"en:""aida-item:Contact.Discussion.Correspondence""",Item,,
-aida-item:Conflict.Demonstrate,"en:""aida-item:Conflict.Demonstrate""",Item,,
-aida-item:Physical.LocatedNear.Surround,"en:""aida-item:Physical.LocatedNear.Surround""",Item,,
-aida-item:PER.MilitaryPersonnel,"en:""aida-item:PER.MilitaryPersonnel""",Item,,
-aida-item:Conflict.Attack.SetFire,"en:""aida-item:Conflict.Attack.SetFire""",Item,,
-aida-item:Transaction.Transaction.GiftGrantProvideAid,"en:""aida-item:Transaction.Transaction.GiftGrantProvideAid""",Item,,
-aida-item:Contact.CommandOrder.Correspondence,"en:""aida-item:Contact.CommandOrder.Correspondence""",Item,,
-aida-item:FAC.Way.Highway,"en:""aida-item:FAC.Way.Highway""",Item,,
-aida-item:LOC.Position.Region,"en:""aida-item:LOC.Position.Region""",Item,,
-aida-item:Government.Formation,"en:""aida-item:Government.Formation""",Item,,
-aida-property:Evaluate.Legitimacy.Legitimate_Holder,"en:""aida-property:Evaluate.Legitimacy.Legitimate_Holder""",Property,aida-property-type:Item,
-aida-item:Manufacture.Artifact.CreateManufacture,"en:""aida-item:Manufacture.Artifact.CreateManufacture""",Item,,
-aida-item:FAC.GeographicalArea.Checkpoint,"en:""aida-item:FAC.GeographicalArea.Checkpoint""",Item,,
-aida-item:Entity,,Item,,
-aida-item:Conflict.Attack.AirstrikeMissileStrike,"en:""aida-item:Conflict.Attack.AirstrikeMissileStrike""",Item,,
-aida-item:Contact.Negotiate,"en:""aida-item:Contact.Negotiate""",Item,,
-aida-item:Manufacture.Artifact.CreateIntellectualProperty,"en:""aida-item:Manufacture.Artifact.CreateIntellectualProperty""",Item,,
-aida-property:GeneralAffiliation.Sponsorship.HelpSupport_ActorOrEvent,"en:""aida-property:GeneralAffiliation.Sponsorship.HelpSupport_ActorOrEvent""",Property,aida-property-type:Item,
-aida-item:LOC.Position,"en:""aida-item:LOC.Position""",Item,,
-aida-item:BAL,"en:""aida-item:BAL""",Item,,
-aida-item:Transaction.TransferOwnership.GiftGrantProvideAid,"en:""aida-item:Transaction.TransferOwnership.GiftGrantProvideAid""",Item,,
-aida-item:VEH.Watercraft,"en:""aida-item:VEH.Watercraft""",Item,,
-aida-item:FAC.Building.House,"en:""aida-item:FAC.Building.House""",Item,,
-aida-property:OrganizationAffiliation.EmploymentMembership.Membership_PlaceOfMembership,"en:""aida-property:OrganizationAffiliation.EmploymentMembership.Membership_PlaceOfMembership""",Property,aida-property-type:Item,
-aida-item:Transaction.TransferMoney.Purchase,"en:""aida-item:Transaction.TransferMoney.Purchase""",Item,,
-aida-property:PartWhole.Subsidiary_Subsidiary,"en:""aida-property:PartWhole.Subsidiary_Subsidiary""",Property,aida-property-type:Item,
-aida-item:PER.ProfessionalPosition.Paramedic,"en:""aida-item:PER.ProfessionalPosition.Paramedic""",Item,,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation_EntityOrFiller,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation_EntityOrFiller""",Property,aida-property-type:Item,
-aida-item:WEA.Club,"en:""aida-item:WEA.Club""",Item,,
-aida-property:ResponsibilityBlame.AssignBlame.AssignBlame_Event,"en:""aida-property:ResponsibilityBlame.AssignBlame.AssignBlame_Event""",Property,aida-property-type:Item,
-aida-property:PersonalSocial.Role.ProfessionalRole_JobRole,"en:""aida-property:PersonalSocial.Role.ProfessionalRole_JobRole""",Property,aida-property-type:Item,
-aida-item:VEH.Aircraft.CargoAircraft,"en:""aida-item:VEH.Aircraft.CargoAircraft""",Item,,
-aida-item:WEA.DaggerKnifeSword.Hatchet,"en:""aida-item:WEA.DaggerKnifeSword.Hatchet""",Item,,
-aida-item:Conflict.Attack,"en:""aida-item:Conflict.Attack""",Item,,
-aida-item:FAC.Structure.Plaza,"en:""aida-item:FAC.Structure.Plaza""",Item,,
-aida-property:Evaluate.Deliberateness.Deliberate_Holder,"en:""aida-property:Evaluate.Deliberateness.Deliberate_Holder""",Property,aida-property-type:Item,
-aida-property:OrganizationAffiliation.EmploymentMembership.Employment_PlaceOfEmployment,"en:""aida-property:OrganizationAffiliation.EmploymentMembership.Employment_PlaceOfEmployment""",Property,aida-property-type:Item,HC0005QF4
-aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation_EntityOrFiller,"en:""aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation_EntityOrFiller""",Property,aida-property-type:Item,
-aida-item:Contact.CommandOrder.Meet,"en:""aida-item:Contact.CommandOrder.Meet""",Item,,
-aida-item:GPE.Country.Country,"en:""aida-item:GPE.Country.Country""",Item,,
-aida-property:PartWhole.Subsidiary.NationalityCitizen_Parent,"en:""aida-property:PartWhole.Subsidiary.NationalityCitizen_Parent""",Property,aida-property-type:Item,
-aida-item:CRM.FinancialCrime,"en:""aida-item:CRM.FinancialCrime""",Item,,
-aida-item:PER.ProfessionalPosition.Minister,"en:""aida-item:PER.ProfessionalPosition.Minister""",Item,,
-aida-item:Movement.TransportPerson.Hide,"en:""aida-item:Movement.TransportPerson.Hide""",Item,,
-aida-item:FAC.Structure.Tower,"en:""aida-item:FAC.Structure.Tower""",Item,,
-aida-item:CRM.BehaviorCrime.OrganizedCrime,"en:""aida-item:CRM.BehaviorCrime.OrganizedCrime""",Item,,
-aida-item:WEA.DaggerKnifeSword,"en:""aida-item:WEA.DaggerKnifeSword""",Item,,
-aida-item:Manufacture.Artifact.Build,"en:""aida-item:Manufacture.Artifact.Build""",Item,,
-aida-item:Transaction.TransferMoney.EmbargoSanction,"en:""aida-item:Transaction.TransferMoney.EmbargoSanction""",Item,,
-aida-property:Physical.LocatedNear.Surround_EntityOrFiller,"en:""aida-property:Physical.LocatedNear.Surround_EntityOrFiller""",Property,aida-property-type:Item,
-aida-property:Contact.CommitmentPromiseExpressIntent.Meet_Communicator,,Property,aida-property-type:Item,
-aida-item:Movement.TransportPerson.PreventEntry,"en:""aida-item:Movement.TransportPerson.PreventEntry""",Item,,
-aida-item:PersonalSocial.Role.ProfessionalRole,"en:""aida-item:PersonalSocial.Role.ProfessionalRole""",Item,,
-aida-item:Contact.Collaborate,"en:""aida-item:Contact.Collaborate""",Item,,
-aida-item:FAC.Structure.Barricade,"en:""aida-item:FAC.Structure.Barricade""",Item,,
-aida-item:PER.ProfessionalPosition.Firefighter,"en:""aida-item:PER.ProfessionalPosition.Firefighter""",Item,,
-aida-property:Measurement.Size.Weight_Measurement,"en:""aida-property:Measurement.Size.Weight_Measurement""",Property,aida-property-type:Item,
-aida-item:VEH.WheeledVehicle.FireApparatus,"en:""aida-item:VEH.WheeledVehicle.FireApparatus""",Item,,
-aida-item:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation,"en:""aida-item:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation""",Item,,
-aida-item:CRM.FinancialCrime.Forgery,"en:""aida-item:CRM.FinancialCrime.Forgery""",Item,,
-aida-item:WEA.Gas.TearGas,"en:""aida-item:WEA.Gas.TearGas""",Item,,
-aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen_Nationality,"en:""aida-property:GeneralAffiliation.MemberOriginReligionEthnicity.NationalityCitizen_Nationality""",Property,aida-property-type:Item,
-aida-property:OrganizationAffiliation.EmploymentMembership_PlaceOfEmploymentMembership,"en:""aida-property:OrganizationAffiliation.EmploymentMembership_PlaceOfEmploymentMembership""",Property,aida-property-type:Item,
-aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory_Controller,"en:""aida-property:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory_Controller""",Property,aida-property-type:Item,
-aida-item:GPE.OrganizationOfCountries.OrganizationOfCountries,"en:""aida-item:GPE.OrganizationOfCountries.OrganizationOfCountries""",Item,,
-aida-item:Justice.InitiateJudicialProcess,"en:""aida-item:Justice.InitiateJudicialProcess""",Item,,
-aida-property:OrganizationAffiliation.Leadership.HeadOfState_Country,"en:""aida-property:OrganizationAffiliation.Leadership.HeadOfState_Country""",Property,aida-property-type:Item,
-aida-item:ORG.Association.Team,"en:""aida-item:ORG.Association.Team""",Item,,
-aida-item:LOC.Position.Neighborhood,"en:""aida-item:LOC.Position.Neighborhood""",Item,,
-aida-item:Personnel.StartPosition.Hiring,"en:""aida-item:Personnel.StartPosition.Hiring""",Item,,
-aida-item:WEA.Bomb.Grenade,"en:""aida-item:WEA.Bomb.Grenade""",Item,,
-aida-item:COM,"en:""aida-item:COM""",Item,,
-aida-item:VEH.WheeledVehicle.Train,"en:""aida-item:VEH.WheeledVehicle.Train""",Item,,
-aida-item:MON.Assets.Assets,"en:""aida-item:MON.Assets.Assets""",Item,,
-aida-item:Inspection.SensoryObserve,"en:""aida-item:Inspection.SensoryObserve""",Item,,
-aida-property:OrganizationAffiliation.Leadership.HeadOfState_Leader,"en:""aida-property:OrganizationAffiliation.Leadership.HeadOfState_Leader""",Property,aida-property-type:Item,
-aida-item:OrganizationAffiliation.Leadership,"en:""aida-item:OrganizationAffiliation.Leadership""",Item,,
-aida-item:Justice.InitiateJudicialProcess.TrialHearing,"en:""aida-item:Justice.InitiateJudicialProcess.TrialHearing""",Item,,
-aida-item:FAC.Building.ApartmentBuilding,"en:""aida-item:FAC.Building.ApartmentBuilding""",Item,,
-aida-item:Contact.Prevarication,"en:""aida-item:Contact.Prevarication""",Item,,
-aida-item:Transaction.TransferOwnership.Purchase,"en:""aida-item:Transaction.TransferOwnership.Purchase""",Item,,
-aida-item:LOC.GeographicPoint,"en:""aida-item:LOC.GeographicPoint""",Item,,
-aida-item:GeneralAffiliation.Sponsorship.HelpSupport,"en:""aida-item:GeneralAffiliation.Sponsorship.HelpSupport""",Item,,
-aida-property:Contact.CommitmentPromiseExpressIntent.Meet_Place,,Property,aida-property-type:Item,
-aida-item:Life.Injure.IllnessDegradationHungerThirst,"en:""aida-item:Life.Injure.IllnessDegradationHungerThirst""",Item,,
-aida-item:Contact.CommitmentPromiseExpressIntent.Broadcast,"en:""aida-item:Contact.CommitmentPromiseExpressIntent.Broadcast""",Item,,
-aida-property:Physical.Resident.Resident_Place,"en:""aida-property:Physical.Resident.Resident_Place""",Property,aida-property-type:Item,
-aida-item:COM.Equipment.HumanitarianAid,"en:""aida-item:COM.Equipment.HumanitarianAid""",Item,,
-aida-item:CRM,"en:""aida-item:CRM""",Item,,
-aida-item:Movement.TransportPerson.Fall,"en:""aida-item:Movement.TransportPerson.Fall""",Item,,
-aida-item:WEA.MissileSystem,"en:""aida-item:WEA.MissileSystem""",Item,,
-aida-item:Conflict.Attack.BiologicalChemicalPoisonAttack,"en:""aida-item:Conflict.Attack.BiologicalChemicalPoisonAttack""",Item,,
-aida-item:WEA.MissileSystem.MissileLauncher,"en:""aida-item:WEA.MissileSystem.MissileLauncher""",Item,,
-aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation_Organization,"en:""aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation_Organization""",Property,aida-property-type:Item,
-aida-item:Contact.MediaStatement.Broadcast,"en:""aida-item:Contact.MediaStatement.Broadcast""",Item,,
-aida-item:PER.Combatant.Mercenary,"en:""aida-item:PER.Combatant.Mercenary""",Item,,
-aida-item:Transaction.TransferMoney.GiftGrantProvideAid,"en:""aida-item:Transaction.TransferMoney.GiftGrantProvideAid""",Item,,
-aida-item:CRM.FinancialCrime.Fraud,"en:""aida-item:CRM.FinancialCrime.Fraud""",Item,,
-aida-item:Contact.Negotiate.Correspondence,"en:""aida-item:Contact.Negotiate.Correspondence""",Item,,
-aida-item:Personnel.StartPosition,"en:""aida-item:Personnel.StartPosition""",Item,,
-aida-item:WEA.Gun,"en:""aida-item:WEA.Gun""",Item,,
-aida-property:OrganizationAffiliation.Leadership.Government_Leader,"en:""aida-property:OrganizationAffiliation.Leadership.Government_Leader""",Property,aida-property-type:Item,
-aida-item:FAC.Building.StoreShop,"en:""aida-item:FAC.Building.StoreShop""",Item,,
-aida-property:PersonalSocial.Role_Role,"en:""aida-property:PersonalSocial.Role_Role""",Property,aida-property-type:Item,
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation,"en:""aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation""",Item,,
-aida-item:Transaction.TransferOwnership.EmbargoSanction,"en:""aida-item:Transaction.TransferOwnership.EmbargoSanction""",Item,,
-aida-item:ORG.Association,"en:""aida-item:ORG.Association""",Item,,
-aida-property:Measurement.Size.Weight_EntityOrFiller,"en:""aida-property:Measurement.Size.Weight_EntityOrFiller""",Property,aida-property-type:Item,
-aida-item:FAC.Building.School,"en:""aida-item:FAC.Building.School""",Item,,
-aida-item:WEA.MissileSystem.AntiAircraftMissle,"en:""aida-item:WEA.MissileSystem.AntiAircraftMissle""",Item,,
-aida-item:Movement.TransportArtifact.BringCarryUnload,"en:""aida-item:Movement.TransportArtifact.BringCarryUnload""",Item,,
-aida-item:COM.Equipment.Satellite,"en:""aida-item:COM.Equipment.Satellite""",Item,,
-aida-item:Conflict.Attack.SelfDirectedBattle,"en:""aida-item:Conflict.Attack.SelfDirectedBattle""",Item,,
-aida-property:PersonalSocial.Role.TitleFormOfAddress_Person,"en:""aida-property:PersonalSocial.Role.TitleFormOfAddress_Person""",Property,aida-property-type:Item,
-aida-property:Measurement.Size_EntityOrFiller,"en:""aida-property:Measurement.Size_EntityOrFiller""",Property,aida-property-type:Item,
-aida-property:GeneralAffiliation.OrganizationWebsite.OrganizationWebsite_Organization,"en:""aida-property:GeneralAffiliation.OrganizationWebsite.OrganizationWebsite_Organization""",Property,aida-property-type:Item,
-aida-item:Contact.Collaborate.Correspondence,"en:""aida-item:Contact.Collaborate.Correspondence""",Item,,
-aida-item:FAC.Installation,"en:""aida-item:FAC.Installation""",Item,,
-aida-item:Movement.TransportArtifact.DisperseSeparate,"en:""aida-item:Movement.TransportArtifact.DisperseSeparate""",Item,,
-aida-item:PartWhole.Subsidiary.OrganizationSubsidiary,"en:""aida-item:PartWhole.Subsidiary.OrganizationSubsidiary""",Item,,
-aida-item:Government.Vote.ViolationsPreventVote,"en:""aida-item:Government.Vote.ViolationsPreventVote""",Item,,
-aida-item:Justice.JudicialConsequences.Extradite,"en:""aida-item:Justice.JudicialConsequences.Extradite""",Item,,
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession,"en:""aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.OwnershipPossession""",Item,,
-aida-item:VEH,"en:""aida-item:VEH""",Item,,
-aida-item:VEH.WheeledVehicle.Car,"en:""aida-item:VEH.WheeledVehicle.Car""",Item,,
-aida-item:CRM.ViolentCrime,"en:""aida-item:CRM.ViolentCrime""",Item,,
-aida-item:LOC,"en:""aida-item:LOC""",Item,,
-aida-item:WEA.Gun.Artillery,"en:""aida-item:WEA.Gun.Artillery""",Item,,
-aida-property:ResponsibilityBlame.ClaimResponsibility.ClaimResponsibility_Event,"en:""aida-property:ResponsibilityBlame.ClaimResponsibility.ClaimResponsibility_Event""",Property,aida-property-type:Item,
-aida-property:PartWhole.Subsidiary.OrganizationSubsidiary_Parent,"en:""aida-property:PartWhole.Subsidiary.OrganizationSubsidiary_Parent""",Property,aida-property-type:Item,
-aida-item:SID.Political.Opposition,"en:""aida-item:SID.Political.Opposition""",Item,,
-aida-item:VEH.Watercraft.Boat,"en:""aida-item:VEH.Watercraft.Boat""",Item,,
-aida-item:Event,,Item,,
-aida-item:Conflict.Attack.Bombing,"en:""aida-item:Conflict.Attack.Bombing""",Item,,
-aida-item:VAL,"en:""aida-item:VAL""",Item,,
-aida-item:Contact.ThreatenCoerce.Broadcast,"en:""aida-item:Contact.ThreatenCoerce.Broadcast""",Item,,
-aida-item:Justice.Investigate.InvestigateCrime,"en:""aida-item:Justice.Investigate.InvestigateCrime""",Item,,
-aida-property:PersonalSocial.Unspecified.Political_Person,"en:""aida-property:PersonalSocial.Unspecified.Political_Person""",Property,aida-property-type:Item,
-aida-item:Contact.CommandOrder.Broadcast,"en:""aida-item:Contact.CommandOrder.Broadcast""",Item,,
-aida-item:FAC,"en:""aida-item:FAC""",Item,,
-aida-item:VEH.MilitaryVehicle,"en:""aida-item:VEH.MilitaryVehicle""",Item,,
-aida-item:Contact.PublicStatementInPerson,"en:""aida-item:Contact.PublicStatementInPerson""",Item,,
-aida-item:CRM.BehaviorCrime.FailToAssist,"en:""aida-item:CRM.BehaviorCrime.FailToAssist""",Item,,
-aida-property-type:MonolingualText,"en:""aida-property-type:MonolingualText""",Item,,
-aida-item:PersonalSocial.Role.TitleFormOfAddress,"en:""aida-item:PersonalSocial.Role.TitleFormOfAddress""",Item,,
-aida-item:PER.Protester,"en:""aida-item:PER.Protester""",Item,,
-aida-item:Contact.Discussion,"en:""aida-item:Contact.Discussion""",Item,,
-aida-item:GPE.UrbanArea,"en:""aida-item:GPE.UrbanArea""",Item,,
-aida-item:FAC.Building.VotingFacility,"en:""aida-item:FAC.Building.VotingFacility""",Item,,
-aida-item:PersonalSocial.Role,"en:""aida-item:PersonalSocial.Role""",Item,,
-aida-item:VEH.WheeledVehicle.Bus,"en:""aida-item:VEH.WheeledVehicle.Bus""",Item,,
-aida-item:Movement.TransportArtifact.SmuggleExtract,"en:""aida-item:Movement.TransportArtifact.SmuggleExtract""",Item,,
-aida-property:Measurement.Size.HeightLengthWidth_Measurement,"en:""aida-property:Measurement.Size.HeightLengthWidth_Measurement""",Property,aida-property-type:Item,
-aida-item:PER.ProfessionalPosition.Ambassador,"en:""aida-item:PER.ProfessionalPosition.Ambassador""",Item,,
-aida-property-type:Value,"en:""aida-property-type:Value""",Item,,
-aida-property:Evaluate.Sentiment.Negative_Holder,"en:""aida-property:Evaluate.Sentiment.Negative_Holder""",Property,aida-property-type:Item,
-aida-item:WEA.Bomb,"en:""aida-item:WEA.Bomb""",Item,,
-aida-item:ORG.MilitaryOrganization,"en:""aida-item:ORG.MilitaryOrganization""",Item,,
-aida-item:Measurement.Size.Weight,"en:""aida-item:Measurement.Size.Weight""",Item,,
-aida-item:SID.Ideological.Ideological,"en:""aida-item:SID.Ideological.Ideological""",Item,,
-aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen_Nationality,"en:""aida-property:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen_Nationality""",Property,aida-property-type:Item,
-aida-item:Contact.RequestAdvise.Correspondence,"en:""aida-item:Contact.RequestAdvise.Correspondence""",Item,,
-aida-item:Personnel.Elect.WinElection,"en:""aida-item:Personnel.Elect.WinElection""",Item,,
-aida-item:Transaction.TransferMoney,"en:""aida-item:Transaction.TransferMoney""",Item,,
-aida-item:Movement.TransportArtifact.Fall,"en:""aida-item:Movement.TransportArtifact.Fall""",Item,,
-aida-item:Government.Vote.CastVote,"en:""aida-item:Government.Vote.CastVote""",Item,,
-aida-item:ORG.MilitaryOrganization.GovernmentArmedForces,"en:""aida-item:ORG.MilitaryOrganization.GovernmentArmedForces""",Item,,
-aida-item:LOC.GeographicPoint.Address,"en:""aida-item:LOC.GeographicPoint.Address""",Item,,
-aida-item:Transaction.TransferOwnership.BorrowLend,"en:""aida-item:Transaction.TransferOwnership.BorrowLend""",Item,,
-aida-item:Movement.TransportArtifact.Hide,"en:""aida-item:Movement.TransportArtifact.Hide""",Item,,
-aida-item:FAC.Way.Street,"en:""aida-item:FAC.Way.Street""",Item,,
-aida-item:LOC.Position.CrimeScene,"en:""aida-item:LOC.Position.CrimeScene""",Item,,
-aida-item:COM.Equipment.MedicalEquipment,"en:""aida-item:COM.Equipment.MedicalEquipment""",Item,,
-aida-item:PER.Combatant.Sniper,"en:""aida-item:PER.Combatant.Sniper""",Item,,
-aida-item:Conflict.Attack.Hanging,"en:""aida-item:Conflict.Attack.Hanging""",Item,,
-aida-property:Evaluate.Sentiment.Positive_SentimentTarget,"en:""aida-property:Evaluate.Sentiment.Positive_SentimentTarget""",Property,aida-property-type:Item,
-aida-item:Movement.TransportArtifact.GrantEntry,"en:""aida-item:Movement.TransportArtifact.GrantEntry""",Item,,
-aida-item:ORG,"en:""aida-item:ORG""",Item,,
-aida-item:ORG.Government.Council,"en:""aida-item:ORG.Government.Council""",Item,,
-aida-item:Movement.TransportArtifact.NonviolentThrowLaunch,"en:""aida-item:Movement.TransportArtifact.NonviolentThrowLaunch""",Item,,
-aida-property:source,,Property,aida-property-type:URL,
-aida-item:CRM.FinancialCrime.Extortion,"en:""aida-item:CRM.FinancialCrime.Extortion""",Item,,
-aida-item:WEA.GrenadeLauncher.GrenadeLauncher,"en:""aida-item:WEA.GrenadeLauncher.GrenadeLauncher""",Item,,
-aida-item:FAC.Structure,"en:""aida-item:FAC.Structure""",Item,,
-aida-property:PartWhole.Subsidiary.OrganizationSubsidiary_Subsidiary,"en:""aida-property:PartWhole.Subsidiary.OrganizationSubsidiary_Subsidiary""",Property,aida-property-type:Item,
-aida-item:Life.Injure,"en:""aida-item:Life.Injure""",Item,,
-aida-item:PartWhole.Subsidiary,"en:""aida-item:PartWhole.Subsidiary""",Item,,
-aida-item:Movement.TransportPerson.PreventExit,"en:""aida-item:Movement.TransportPerson.PreventExit""",Item,,
-aida-property:Information.Color.Color_Color,"en:""aida-property:Information.Color.Color_Color""",Property,aida-property-type:Item,
-aida-item:OrganizationAffiliation.Leadership.HeadOfState,"en:""aida-item:OrganizationAffiliation.Leadership.HeadOfState""",Item,,
-aida-item:SID,"en:""aida-item:SID""",Item,,
-aida-property:Physical.Resident.Resident_Resident,"en:""aida-property:Physical.Resident.Resident_Resident""",Property,aida-property-type:Item,
-aida-item:ORG.CommercialOrganization.Manufacturer,"en:""aida-item:ORG.CommercialOrganization.Manufacturer""",Item,,
-aida-item:Government.Agreements.RejectNullifyAgreementContractCeasefire,"en:""aida-item:Government.Agreements.RejectNullifyAgreementContractCeasefire""",Item,,
-aida-item:Justice.JudicialConsequences.Execute,"en:""aida-item:Justice.JudicialConsequences.Execute""",Item,,
-aida-item:Conflict.Attack.StealRobHijack,"en:""aida-item:Conflict.Attack.StealRobHijack""",Item,,
-aida-item:Conflict.Demonstrate.MarchProtestPoliticalGathering,"en:""aida-item:Conflict.Demonstrate.MarchProtestPoliticalGathering""",Item,,
-aida-property:hasName,,Property,aida-property-type:MonolingualText,
-aida-item:Justice.JudicialConsequences,"en:""aida-item:Justice.JudicialConsequences""",Item,,
-aida-property:Evaluate.Legitimacy.Legitimate_Event,"en:""aida-property:Evaluate.Legitimacy.Legitimate_Event""",Property,aida-property-type:Item,
-aida-item:Contact.RequestAdvise,"en:""aida-item:Contact.RequestAdvise""",Item,,
-aida-property:GeneralAffiliation.Sponsorship.Affiliated_ActorOrEvent,"en:""aida-property:GeneralAffiliation.Sponsorship.Affiliated_ActorOrEvent""",Property,aida-property-type:Item,
-aida-item:WEA.MissileSystem.AirToAirMissile,"en:""aida-item:WEA.MissileSystem.AirToAirMissile""",Item,,
-aida-item:ORG.MilitaryOrganization.Intelligence,"en:""aida-item:ORG.MilitaryOrganization.Intelligence""",Item,,
-aida-property:Evaluate.Legitimacy.Illegitimate_Holder,"en:""aida-property:Evaluate.Legitimacy.Illegitimate_Holder""",Property,aida-property-type:Item,
-aida-item:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen,"en:""aida-item:GeneralAffiliation.OrganizationPoliticalReligiousAffiliation.NationalityCitizen""",Item,,
-aida-item:Contact.PublicStatementInPerson.Broadcast,"en:""aida-item:Contact.PublicStatementInPerson.Broadcast""",Item,,
-aida-item:VEH.MilitaryVehicle.Tank,"en:""aida-item:VEH.MilitaryVehicle.Tank""",Item,,
-aida-item:Contact.Prevarication.Correspondence,"en:""aida-item:Contact.Prevarication.Correspondence""",Item,,
-aida-item:Movement.TransportPerson.DisperseSeparate,"en:""aida-item:Movement.TransportPerson.DisperseSeparate""",Item,,
-aida-property:ResponsibilityBlame.ClaimResponsibility.ClaimResponsibility_EntityResponsible,"en:""aida-property:ResponsibilityBlame.ClaimResponsibility.ClaimResponsibility_EntityResponsible""",Property,aida-property-type:Item,
-aida-property:OrganizationAffiliation.EmploymentMembership_EmployeeMember,"en:""aida-property:OrganizationAffiliation.EmploymentMembership_EmployeeMember""",Property,aida-property-type:Item,
-aida-property:Measurement.Size.Percentage_EntityOrFiller,"en:""aida-property:Measurement.Size.Percentage_EntityOrFiller""",Property,aida-property-type:Item,
-aida-property:end,,Property,aida-property-type:DateTime,
-aida-item:Physical.LocatedNear,"en:""aida-item:Physical.LocatedNear""",Item,,
-aida-item:Conflict.Attack.Strangling,"en:""aida-item:Conflict.Attack.Strangling""",Item,,
-aida-property:Physical.LocatedNear_EntityOrFiller,"en:""aida-property:Physical.LocatedNear_EntityOrFiller""",Property,aida-property-type:Item,HC0005QF4
-aida-property:GeneralAffiliation.Sponsorship.Affiliated_Sponsor,"en:""aida-property:GeneralAffiliation.Sponsorship.Affiliated_Sponsor""",Property,aida-property-type:Item,
-aida-item:OrganizationAffiliation.Leadership.MilitaryPolice,"en:""aida-item:OrganizationAffiliation.Leadership.MilitaryPolice""",Item,,
-aida-item:Contact.Prevarication.Meet,"en:""aida-item:Contact.Prevarication.Meet""",Item,,
-aida-property:Evaluate.Sentiment.Negative_SentimentTarget,"en:""aida-property:Evaluate.Sentiment.Negative_SentimentTarget""",Property,aida-property-type:Item,
-aida-item:PER,"en:""aida-item:PER""",Item,,
-aida-property:Information.Color.Color_EntityOrFiller,"en:""aida-property:Information.Color.Color_EntityOrFiller""",Property,aida-property-type:Item,
-aida-property:OrganizationAffiliation.EmploymentMembership.Employment_Employee,"en:""aida-property:OrganizationAffiliation.EmploymentMembership.Employment_Employee""",Property,aida-property-type:Item,HC0005QF4
-aida-property-type:URL,"en:""aida-property-type:URL""",Item,,
-aida-property:PersonalSocial.Unspecified_Person,"en:""aida-property:PersonalSocial.Unspecified_Person""",Property,aida-property-type:Item,
-aida-property:PersonalSocial.Role.ProfessionalRole_Person,"en:""aida-property:PersonalSocial.Role.ProfessionalRole_Person""",Property,aida-property-type:Item,
-aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory,"en:""aida-item:GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.ControlTerritory""",Item,,
-aida-item:Contact.Negotiate.Meet,"en:""aida-item:Contact.Negotiate.Meet""",Item,,
-aida-property:GeneralAffiliation.Sponsorship.HelpSupport_Sponsor,"en:""aida-property:GeneralAffiliation.Sponsorship.HelpSupport_Sponsor""",Property,aida-property-type:Item,
-aida-property:Measurement.Size_Measurement,"en:""aida-property:Measurement.Size_Measurement""",Property,aida-property-type:Item,
-aida-property:Evaluate.Deliberateness.Accidental_Holder,"en:""aida-property:Evaluate.Deliberateness.Accidental_Holder""",Property,aida-property-type:Item,
-aida-item:WEA.Bomb.MolotovCocktail,"en:""aida-item:WEA.Bomb.MolotovCocktail""",Item,,
-owl:Class,"en:""owl:Class""",Item,,
-owl:DatatypeProperty,"en:""owl:DatatypeProperty""",Item,,
-owl:ObjectProperty,"en:""owl:ObjectProperty""",Item,,
-rdfs:subClassOf,"en:""rdfs:subClassOf""",Property,aida-property-type:Item,
-rdf:type,"en:""rdf:type""",Property,aida-property-type:Item,
diff --git a/data/test_to_triple_edges.tsv b/data/test_to_triple_edges.tsv
deleted file mode 100644
index 2806a27bb..000000000
--- a/data/test_to_triple_edges.tsv
+++ /dev/null
@@ -1,20 +0,0 @@
-node1	label	node2	id
-q1	p1	'Alice'@en	e1
-q1	p3	'Alicia'@sp	e2
-q1	p6	"Character in a famous novel"	e3
-q1	p8	q2	e4
-p8	type	property	
-p8	p1	'hasFather'@en	e5
-p8	p6	'The relationship of being child of someone'@en	e6
-q2	p1	'Bob'@en	e7
-q1	p8	q2	e8
-e8	p9	^2013-01-01T00:00:00Z/11	
-e8	p9	^2013-01-01T00:00:00Z/8	
-e8	p10	@043.26193/010.92708
-q1	p12	'爱丽丝'@zh	e9
-q1	p11	+1.4U11573	e10
-e10	p13	q2	
-q1	p14	"sh2008003646"	e11
-p14	p1	"Library of Congress authority ID"	e12
-p14	p7	"Library of Congress identifier for persons, organizations, events, places, titles, and subject headings [Format: 1-2 specific letters followed by 8-10 digits (see regex). For manifestations of works, use P1144]"	e13
-q1	p15	"https://www.wikidata.org/wiki/Q1335388"	e14
\ No newline at end of file
diff --git a/data/test_to_triple_prop_types.tsv b/data/test_to_triple_prop_types.tsv
deleted file mode 100644
index 3cc3afe8e..000000000
--- a/data/test_to_triple_prop_types.tsv
+++ /dev/null
@@ -1,9 +0,0 @@
-node1	lable	node2
-p8	property_type	item
-p9	property_type	time
-p10	property_type	globe-coordinate
-p11	property_type	quantity
-p12	property_type	monolingualtext
-p13	property_type	item
-p14	property_type	external-identifier
-p15	property_type	url
\ No newline at end of file
diff --git a/data/wikidataProps.tsv b/data/wikidataProps.tsv
deleted file mode 100644
index dcab69b3e..000000000
--- a/data/wikidataProps.tsv
+++ /dev/null
@@ -1,7439 +0,0 @@
-P493	property_type	external-identifier
-P494	property_type	external-identifier
-P495	property_type	item
-P496	property_type	external-identifier
-P497	property_type	external-identifier
-P498	property_type	external-identifier
-P500	property_type	item
-P501	property_type	item
-P502	property_type	string
-P503	property_type	external-identifier
-P504	property_type	item
-P505	property_type	item
-P506	property_type	external-identifier
-P507	property_type	external-identifier
-P508	property_type	external-identifier
-P509	property_type	item
-P511	property_type	item
-P512	property_type	item
-P514	property_type	item
-P515	property_type	item
-P516	property_type	item
-P517	property_type	item
-P518	property_type	item
-P520	property_type	item
-P521	property_type	item
-P522	property_type	item
-P523	property_type	item
-P524	property_type	item
-P525	property_type	external-identifier
-P527	property_type	item
-P528	property_type	string
-P529	property_type	string
-P530	property_type	item
-P531	property_type	item
-P532	property_type	item
-P533	property_type	item
-P534	property_type	item
-P535	property_type	external-identifier
-P536	property_type	external-identifier
-P537	property_type	item
-P538	property_type	item
-P539	property_type	external-identifier
-P541	property_type	item
-P542	property_type	item
-P543	property_type	item
-P545	property_type	item
-P546	property_type	item
-P547	property_type	item
-P548	property_type	item
-P549	property_type	external-identifier
-P550	property_type	item
-P551	property_type	item
-P552	property_type	item
-P553	property_type	item
-P554	property_type	string
-P555	property_type	string
-P556	property_type	item
-P557	property_type	external-identifier
-P559	property_type	item
-P560	property_type	item
-P561	property_type	string
-P562	property_type	item
-P563	property_type	external-identifier
-P564	property_type	string
-P565	property_type	item
-P566	property_type	item
-P567	property_type	item
-P568	property_type	item
-P569	property_type	time
-P570	property_type	time
-P571	property_type	time
-P574	property_type	time
-P575	property_type	time
-P576	property_type	time
-P577	property_type	time
-P578	property_type	time
-P579	property_type	item
-P580	property_type	time
-P582	property_type	time
-P585	property_type	time
-P586	property_type	external-identifier
-P587	property_type	external-identifier
-P588	property_type	item
-P589	property_type	item
-P590	property_type	external-identifier
-P591	property_type	string
-P592	property_type	external-identifier
-P593	property_type	string
-P594	property_type	external-identifier
-P595	property_type	external-identifier
-P597	property_type	external-identifier
-P598	property_type	item
-P599	property_type	external-identifier
-P600	property_type	external-identifier
-P604	property_type	external-identifier
-P605	property_type	external-identifier
-P606	property_type	time
-P607	property_type	item
-P608	property_type	item
-P609	property_type	item
-P610	property_type	item
-P611	property_type	item
-P612	property_type	item
-P613	property_type	string
-P617	property_type	string
-P618	property_type	item
-P619	property_type	time
-P620	property_type	time
-P621	property_type	time
-P622	property_type	time
-P624	property_type	item
-P625	property_type	globe-coordinate
-P626	property_type	globe-coordinate
-P627	property_type	string
-P628	property_type	external-identifier
-P629	property_type	item
-P630	property_type	external-identifier
-P631	property_type	item
-P632	property_type	external-identifier
-P633	property_type	external-identifier
-P634	property_type	item
-P635	property_type	external-identifier
-P636	property_type	item
-P637	property_type	external-identifier
-P638	property_type	external-identifier
-P639	property_type	external-identifier
-P640	property_type	external-identifier
-P641	property_type	item
-P642	property_type	item
-P644	property_type	string
-P645	property_type	string
-P646	property_type	external-identifier
-P647	property_type	item
-P648	property_type	external-identifier
-P649	property_type	external-identifier
-P650	property_type	external-identifier
-P651	property_type	external-identifier
-P652	property_type	external-identifier
-P653	property_type	external-identifier
-P654	property_type	item
-P655	property_type	item
-P656	property_type	string
-P657	property_type	external-identifier
-P658	property_type	item
-P659	property_type	item
-P660	property_type	item
-P661	property_type	external-identifier
-P662	property_type	external-identifier
-P663	property_type	external-identifier
-P664	property_type	item
-P665	property_type	external-identifier
-P667	property_type	string
-P668	property_type	external-identifier
-P669	property_type	item
-P670	property_type	string
-P671	property_type	external-identifier
-P672	property_type	external-identifier
-P673	property_type	external-identifier
-P674	property_type	item
-P675	property_type	external-identifier
-P676	property_type	item
-P677	property_type	external-identifier
-P678	property_type	item
-P679	property_type	external-identifier
-P680	property_type	item
-P681	property_type	item
-P682	property_type	item
-P683	property_type	external-identifier
-P684	property_type	item
-P685	property_type	external-identifier
-P686	property_type	external-identifier
-P687	property_type	external-identifier
-P688	property_type	item
-P689	property_type	item
-P690	property_type	item
-P691	property_type	external-identifier
-P692	property_type	string
-P693	property_type	item
-P694	property_type	item
-P695	property_type	external-identifier
-P696	property_type	external-identifier
-P697	property_type	item
-P698	property_type	external-identifier
-P699	property_type	external-identifier
-P700	property_type	external-identifier
-P701	property_type	external-identifier
-P702	property_type	item
-P703	property_type	item
-P704	property_type	external-identifier
-P705	property_type	external-identifier
-P706	property_type	item
-P707	property_type	item
-P708	property_type	item
-P709	property_type	external-identifier
-P710	property_type	item
-P711	property_type	string
-P712	property_type	string
-P713	property_type	string
-P714	property_type	external-identifier
-P715	property_type	external-identifier
-P360	property_type	item
-P361	property_type	item
-P364	property_type	item
-P366	property_type	item
-P367	property_type	string
-P368	property_type	string
-P369	property_type	item
-P370	property_type	string
-P371	property_type	item
-P373	property_type	string
-P374	property_type	external-identifier
-P375	property_type	item
-P376	property_type	item
-P377	property_type	external-identifier
-P380	property_type	external-identifier
-P381	property_type	external-identifier
-P382	property_type	external-identifier
-P393	property_type	string
-P395	property_type	string
-P396	property_type	external-identifier
-P397	property_type	item
-P398	property_type	item
-P399	property_type	item
-P400	property_type	item
-P402	property_type	external-identifier
-P403	property_type	item
-P404	property_type	item
-P405	property_type	item
-P406	property_type	item
-P407	property_type	item
-P408	property_type	item
-P409	property_type	external-identifier
-P410	property_type	item
-P411	property_type	item
-P412	property_type	item
-P413	property_type	item
-P414	property_type	item
-P415	property_type	item
-P416	property_type	string
-P417	property_type	item
-P418	property_type	item
-P421	property_type	item
-P423	property_type	item
-P424	property_type	string
-P425	property_type	item
-P426	property_type	string
-P427	property_type	item
-P428	property_type	external-identifier
-P429	property_type	external-identifier
-P432	property_type	external-identifier
-P433	property_type	string
-P434	property_type	external-identifier
-P435	property_type	external-identifier
-P436	property_type	external-identifier
-P437	property_type	item
-P439	property_type	external-identifier
-P440	property_type	external-identifier
-P442	property_type	external-identifier
-P443	property_type	string
-P444	property_type	string
-P447	property_type	item
-P449	property_type	item
-P450	property_type	item
-P451	property_type	item
-P452	property_type	item
-P453	property_type	item
-P454	property_type	external-identifier
-P455	property_type	external-identifier
-P457	property_type	item
-P458	property_type	external-identifier
-P459	property_type	item
-P460	property_type	item
-P461	property_type	item
-P462	property_type	item
-P463	property_type	item
-P464	property_type	external-identifier
-P465	property_type	string
-P466	property_type	item
-P467	property_type	item
-P468	property_type	item
-P469	property_type	item
-P470	property_type	item
-P473	property_type	string
-P474	property_type	string
-P476	property_type	external-identifier
-P477	property_type	external-identifier
-P478	property_type	string
-P479	property_type	item
-P480	property_type	external-identifier
-P481	property_type	external-identifier
-P483	property_type	item
-P484	property_type	external-identifier
-P485	property_type	item
-P486	property_type	external-identifier
-P487	property_type	string
-P488	property_type	item
-P489	property_type	item
-P490	property_type	string
-P491	property_type	string
-P492	property_type	external-identifier
-P179	property_type	item
-P180	property_type	item
-P181	property_type	string
-P183	property_type	item
-P184	property_type	item
-P185	property_type	item
-P186	property_type	item
-P189	property_type	item
-P190	property_type	item
-P193	property_type	item
-P194	property_type	item
-P195	property_type	item
-P196	property_type	item
-P197	property_type	item
-P199	property_type	item
-P200	property_type	item
-P201	property_type	item
-P205	property_type	item
-P206	property_type	item
-P207	property_type	string
-P208	property_type	item
-P209	property_type	item
-P210	property_type	item
-P212	property_type	external-identifier
-P213	property_type	external-identifier
-P214	property_type	external-identifier
-P215	property_type	string
-P217	property_type	string
-P218	property_type	external-identifier
-P219	property_type	external-identifier
-P220	property_type	external-identifier
-P221	property_type	external-identifier
-P223	property_type	string
-P225	property_type	string
-P227	property_type	external-identifier
-P229	property_type	string
-P230	property_type	string
-P231	property_type	external-identifier
-P232	property_type	external-identifier
-P233	property_type	string
-P234	property_type	external-identifier
-P235	property_type	external-identifier
-P236	property_type	external-identifier
-P237	property_type	item
-P238	property_type	string
-P239	property_type	string
-P240	property_type	string
-P241	property_type	item
-P242	property_type	string
-P243	property_type	external-identifier
-P244	property_type	external-identifier
-P245	property_type	external-identifier
-P246	property_type	string
-P247	property_type	external-identifier
-P248	property_type	item
-P249	property_type	string
-P263	property_type	item
-P264	property_type	item
-P267	property_type	external-identifier
-P268	property_type	external-identifier
-P269	property_type	external-identifier
-P270	property_type	external-identifier
-P271	property_type	external-identifier
-P272	property_type	item
-P274	property_type	string
-P275	property_type	item
-P276	property_type	item
-P277	property_type	item
-P278	property_type	external-identifier
-P279	property_type	item
-P281	property_type	string
-P282	property_type	item
-P286	property_type	item
-P287	property_type	item
-P289	property_type	item
-P291	property_type	item
-P296	property_type	string
-P297	property_type	external-identifier
-P298	property_type	external-identifier
-P299	property_type	external-identifier
-P300	property_type	external-identifier
-P301	property_type	item
-P303	property_type	external-identifier
-P304	property_type	string
-P305	property_type	external-identifier
-P306	property_type	item
-P344	property_type	item
-P345	property_type	external-identifier
-P347	property_type	external-identifier
-P348	property_type	string
-P349	property_type	external-identifier
-P350	property_type	external-identifier
-P351	property_type	external-identifier
-P352	property_type	external-identifier
-P353	property_type	external-identifier
-P354	property_type	external-identifier
-P355	property_type	item
-P356	property_type	external-identifier
-P358	property_type	item
-P359	property_type	external-identifier
-P6	property_type	item
-P10	property_type	string
-P14	property_type	string
-P15	property_type	string
-P16	property_type	item
-P17	property_type	item
-P18	property_type	string
-P19	property_type	item
-P20	property_type	item
-P21	property_type	item
-P22	property_type	item
-P25	property_type	item
-P26	property_type	item
-P27	property_type	item
-P30	property_type	item
-P31	property_type	item
-P35	property_type	item
-P36	property_type	item
-P37	property_type	item
-P38	property_type	item
-P39	property_type	item
-P40	property_type	item
-P41	property_type	string
-P47	property_type	item
-P50	property_type	item
-P51	property_type	string
-P53	property_type	item
-P54	property_type	item
-P57	property_type	item
-P58	property_type	item
-P59	property_type	item
-P61	property_type	item
-P65	property_type	item
-P66	property_type	item
-P69	property_type	item
-P78	property_type	item
-P81	property_type	item
-P84	property_type	item
-P85	property_type	item
-P86	property_type	item
-P87	property_type	item
-P88	property_type	item
-P91	property_type	item
-P92	property_type	item
-P94	property_type	string
-P97	property_type	item
-P98	property_type	item
-P101	property_type	item
-P102	property_type	item
-P103	property_type	item
-P105	property_type	item
-P106	property_type	item
-P108	property_type	item
-P109	property_type	string
-P110	property_type	item
-P111	property_type	item
-P112	property_type	item
-P113	property_type	item
-P114	property_type	item
-P115	property_type	item
-P117	property_type	string
-P118	property_type	item
-P119	property_type	item
-P121	property_type	item
-P122	property_type	item
-P123	property_type	item
-P126	property_type	item
-P127	property_type	item
-P128	property_type	item
-P129	property_type	item
-P131	property_type	item
-P135	property_type	item
-P136	property_type	item
-P137	property_type	item
-P138	property_type	item
-P140	property_type	item
-P141	property_type	item
-P143	property_type	item
-P144	property_type	item
-P149	property_type	item
-P150	property_type	item
-P154	property_type	string
-P155	property_type	item
-P156	property_type	item
-P157	property_type	item
-P158	property_type	string
-P159	property_type	item
-P161	property_type	item
-P162	property_type	item
-P163	property_type	item
-P166	property_type	item
-P167	property_type	item
-P169	property_type	item
-P170	property_type	item
-P171	property_type	item
-P172	property_type	item
-P175	property_type	item
-P176	property_type	item
-P177	property_type	item
-P178	property_type	item
-P716	property_type	external-identifier
-P717	property_type	external-identifier
-P718	property_type	external-identifier
-P720	property_type	item
-P721	property_type	external-identifier
-P722	property_type	external-identifier
-P723	property_type	external-identifier
-P724	property_type	external-identifier
-P725	property_type	item
-P726	property_type	item
-P729	property_type	time
-P730	property_type	time
-P731	property_type	external-identifier
-P732	property_type	external-identifier
-P733	property_type	external-identifier
-P734	property_type	item
-P735	property_type	item
-P736	property_type	item
-P737	property_type	item
-P739	property_type	item
-P740	property_type	item
-P741	property_type	item
-P742	property_type	string
-P744	property_type	item
-P745	property_type	external-identifier
-P746	property_type	time
-P747	property_type	item
-P748	property_type	item
-P749	property_type	item
-P750	property_type	item
-P751	property_type	item
-P756	property_type	item
-P757	property_type	external-identifier
-P758	property_type	external-identifier
-P759	property_type	external-identifier
-P760	property_type	external-identifier
-P761	property_type	external-identifier
-P762	property_type	external-identifier
-P763	property_type	external-identifier
-P764	property_type	external-identifier
-P765	property_type	item
-P767	property_type	item
-P768	property_type	item
-P769	property_type	item
-P770	property_type	item
-P771	property_type	external-identifier
-P772	property_type	external-identifier
-P773	property_type	external-identifier
-P774	property_type	external-identifier
-P775	property_type	external-identifier
-P776	property_type	external-identifier
-P777	property_type	external-identifier
-P778	property_type	external-identifier
-P779	property_type	external-identifier
-P780	property_type	item
-P781	property_type	external-identifier
-P782	property_type	external-identifier
-P783	property_type	item
-P784	property_type	item
-P785	property_type	item
-P786	property_type	item
-P787	property_type	item
-P788	property_type	item
-P789	property_type	item
-P790	property_type	item
-P791	property_type	string
-P792	property_type	string
-P793	property_type	item
-P795	property_type	item
-P797	property_type	item
-P798	property_type	string
-P799	property_type	string
-P800	property_type	item
-P802	property_type	item
-P803	property_type	item
-P804	property_type	external-identifier
-P805	property_type	item
-P806	property_type	external-identifier
-P807	property_type	item
-P808	property_type	external-identifier
-P809	property_type	external-identifier
-P811	property_type	item
-P812	property_type	item
-P813	property_type	time
-P814	property_type	item
-P815	property_type	external-identifier
-P816	property_type	item
-P817	property_type	item
-P818	property_type	external-identifier
-P819	property_type	external-identifier
-P820	property_type	string
-P821	property_type	external-identifier
-P822	property_type	item
-P823	property_type	item
-P824	property_type	external-identifier
-P825	property_type	item
-P826	property_type	item
-P827	property_type	external-identifier
-P828	property_type	item
-P829	property_type	external-identifier
-P2131	property_type	quantity
-P2132	property_type	quantity
-P2133	property_type	quantity
-P2134	property_type	quantity
-P2135	property_type	quantity
-P2136	property_type	quantity
-P2137	property_type	quantity
-P2138	property_type	quantity
-P2139	property_type	quantity
-P2140	property_type	quantity
-P2141	property_type	quantity
-P2142	property_type	quantity
-P2143	property_type	quantity
-P2144	property_type	quantity
-P2145	property_type	quantity
-P2146	property_type	quantity
-P2147	property_type	quantity
-P2148	property_type	quantity
-P2149	property_type	quantity
-P2150	property_type	quantity
-P2151	property_type	quantity
-P2152	property_type	item
-P2153	property_type	external-identifier
-P2154	property_type	quantity
-P2155	property_type	item
-P2156	property_type	item
-P2158	property_type	external-identifier
-P2159	property_type	item
-P2160	property_type	quantity
-P2161	property_type	external-identifier
-P2162	property_type	external-identifier
-P2163	property_type	external-identifier
-P2164	property_type	external-identifier
-P2165	property_type	external-identifier
-P2166	property_type	external-identifier
-P2167	property_type	external-identifier
-P2168	property_type	external-identifier
-P2169	property_type	external-identifier
-P2170	property_type	external-identifier
-P2171	property_type	external-identifier
-P2172	property_type	external-identifier
-P2173	property_type	external-identifier
-P2174	property_type	external-identifier
-P2175	property_type	item
-P2176	property_type	item
-P2177	property_type	quantity
-P2178	property_type	item
-P2179	property_type	string
-P2180	property_type	external-identifier
-P2181	property_type	external-identifier
-P2182	property_type	external-identifier
-P2183	property_type	string
-P2184	property_type	item
-P2185	property_type	external-identifier
-P2186	property_type	external-identifier
-P2187	property_type	external-identifier
-P2188	property_type	external-identifier
-P2189	property_type	external-identifier
-P2190	property_type	external-identifier
-P2191	property_type	external-identifier
-P2192	property_type	external-identifier
-P2193	property_type	external-identifier
-P2194	property_type	external-identifier
-P2195	property_type	external-identifier
-P2196	property_type	quantity
-P2197	property_type	quantity
-P2198	property_type	quantity
-P2199	property_type	quantity
-P2200	property_type	quantity
-P2201	property_type	quantity
-P2202	property_type	quantity
-P2203	property_type	quantity
-P2204	property_type	quantity
-P2205	property_type	external-identifier
-P2206	property_type	external-identifier
-P2207	property_type	external-identifier
-P2208	property_type	quantity
-P2209	property_type	external-identifier
-P2210	property_type	item
-P2211	property_type	quantity
-P2212	property_type	quantity
-P2213	property_type	quantity
-P2214	property_type	quantity
-P2215	property_type	quantity
-P2216	property_type	quantity
-P2217	property_type	quantity
-P2218	property_type	quantity
-P2219	property_type	quantity
-P2220	property_type	quantity
-P2221	property_type	quantity
-P2222	property_type	quantity
-P2223	property_type	quantity
-P2225	property_type	quantity
-P2226	property_type	quantity
-P2227	property_type	quantity
-P2228	property_type	quantity
-P2229	property_type	quantity
-P2230	property_type	quantity
-P2231	property_type	quantity
-P2232	property_type	quantity
-P1913	property_type	item
-P1914	property_type	item
-P1915	property_type	item
-P1916	property_type	item
-P1917	property_type	item
-P1918	property_type	item
-P1919	property_type	external-identifier
-P1920	property_type	external-identifier
-P1921	property_type	string
-P1922	property_type	monolingualtext
-P1923	property_type	item
-P1924	property_type	item
-P1925	property_type	external-identifier
-P1928	property_type	external-identifier
-P1929	property_type	external-identifier
-P1930	property_type	external-identifier
-P1931	property_type	string
-P1932	property_type	string
-P1933	property_type	external-identifier
-P1934	property_type	external-identifier
-P1935	property_type	external-identifier
-P1936	property_type	external-identifier
-P1937	property_type	external-identifier
-P1938	property_type	external-identifier
-P1939	property_type	external-identifier
-P1940	property_type	external-identifier
-P1942	property_type	string
-P1943	property_type	string
-P1944	property_type	string
-P1945	property_type	string
-P1947	property_type	external-identifier
-P1948	property_type	external-identifier
-P1949	property_type	external-identifier
-P1950	property_type	item
-P1951	property_type	item
-P1952	property_type	external-identifier
-P1953	property_type	external-identifier
-P1954	property_type	external-identifier
-P1955	property_type	external-identifier
-P1956	property_type	item
-P1957	property_type	url
-P1958	property_type	external-identifier
-P1959	property_type	external-identifier
-P1960	property_type	external-identifier
-P1961	property_type	external-identifier
-P1963	property_type	string
-P1966	property_type	external-identifier
-P1967	property_type	external-identifier
-P1968	property_type	external-identifier
-P1969	property_type	external-identifier
-P1970	property_type	external-identifier
-P1971	property_type	quantity
-P1972	property_type	external-identifier
-P1973	property_type	external-identifier
-P1976	property_type	external-identifier
-P1977	property_type	external-identifier
-P1978	property_type	external-identifier
-P1979	property_type	external-identifier
-P1980	property_type	external-identifier
-P1981	property_type	item
-P1982	property_type	external-identifier
-P1983	property_type	external-identifier
-P1984	property_type	external-identifier
-P1985	property_type	external-identifier
-P1986	property_type	external-identifier
-P1987	property_type	string
-P1988	property_type	external-identifier
-P1989	property_type	external-identifier
-P1990	property_type	item
-P1991	property_type	url
-P1992	property_type	external-identifier
-P1993	property_type	string
-P1994	property_type	external-identifier
-P1995	property_type	item
-P1996	property_type	external-identifier
-P1997	property_type	external-identifier
-P1998	property_type	string
-P1999	property_type	item
-P2000	property_type	external-identifier
-P2001	property_type	string
-P2002	property_type	external-identifier
-P2003	property_type	external-identifier
-P2004	property_type	external-identifier
-P2005	property_type	external-identifier
-P2006	property_type	external-identifier
-P2007	property_type	external-identifier
-P2008	property_type	external-identifier
-P2009	property_type	string
-P2010	property_type	string
-P2011	property_type	external-identifier
-P2012	property_type	item
-P2013	property_type	external-identifier
-P2014	property_type	external-identifier
-P2015	property_type	external-identifier
-P2016	property_type	external-identifier
-P2017	property_type	string
-P2018	property_type	external-identifier
-P2019	property_type	external-identifier
-P2020	property_type	external-identifier
-P2021	property_type	quantity
-P2777	property_type	external-identifier
-P2778	property_type	external-identifier
-P2779	property_type	external-identifier
-P2780	property_type	external-identifier
-P2781	property_type	quantity
-P2782	property_type	external-identifier
-P2783	property_type	external-identifier
-P2784	property_type	item
-P2786	property_type	globe-coordinate
-P2787	property_type	quantity
-P2788	property_type	external-identifier
-P2789	property_type	item
-P2790	property_type	quantity
-P2791	property_type	quantity
-P2792	property_type	external-identifier
-P2793	property_type	quantity
-P2794	property_type	external-identifier
-P2795	property_type	monolingualtext
-P2796	property_type	external-identifier
-P2797	property_type	quantity
-P2798	property_type	external-identifier
-P2799	property_type	external-identifier
-P2800	property_type	external-identifier
-P2801	property_type	external-identifier
-P2802	property_type	string
-P2803	property_type	quantity
-P2804	property_type	external-identifier
-P2805	property_type	external-identifier
-P2806	property_type	quantity
-P2807	property_type	quantity
-P2808	property_type	quantity
-P2809	property_type	external-identifier
-P2810	property_type	external-identifier
-P2811	property_type	external-identifier
-P2812	property_type	external-identifier
-P2813	property_type	item
-P2814	property_type	external-identifier
-P2815	property_type	external-identifier
-P2816	property_type	external-identifier
-P2817	property_type	item
-P2818	property_type	external-identifier
-P2819	property_type	external-identifier
-P2820	property_type	item
-P2821	property_type	item
-P2822	property_type	item
-P2823	property_type	external-identifier
-P2824	property_type	external-identifier
-P2825	property_type	item
-P2826	property_type	external-identifier
-P2827	property_type	item
-P2828	property_type	item
-P2829	property_type	external-identifier
-P2830	property_type	external-identifier
-P2831	property_type	item
-P2832	property_type	external-identifier
-P2833	property_type	external-identifier
-P2834	property_type	quantity
-P2835	property_type	quantity
-P2836	property_type	quantity
-P2838	property_type	item
-P2839	property_type	item
-P2840	property_type	external-identifier
-P2841	property_type	item
-P2842	property_type	item
-P2843	property_type	external-identifier
-P2844	property_type	quantity
-P2845	property_type	external-identifier
-P2846	property_type	item
-P2847	property_type	external-identifier
-P2848	property_type	item
-P2849	property_type	item
-P2850	property_type	external-identifier
-P2851	property_type	item
-P2852	property_type	item
-P2853	property_type	item
-P2854	property_type	quantity
-P2855	property_type	quantity
-P2856	property_type	external-identifier
-P2857	property_type	external-identifier
-P2858	property_type	external-identifier
-P2859	property_type	string
-P2860	property_type	item
-P2861	property_type	external-identifier
-P2862	property_type	external-identifier
-P2863	property_type	external-identifier
-P2864	property_type	external-identifier
-P2865	property_type	external-identifier
-P2866	property_type	external-identifier
-P2867	property_type	external-identifier
-P2868	property_type	item
-P2869	property_type	item
-P2870	property_type	external-identifier
-P2871	property_type	external-identifier
-P2872	property_type	item
-P2873	property_type	quantity
-P2874	property_type	external-identifier
-P2875	property_type	item
-P2876	property_type	item
-P2877	property_type	external-identifier
-P2878	property_type	external-identifier
-P2879	property_type	external-identifier
-P2880	property_type	external-identifier
-P2881	property_type	item
-P2882	property_type	item
-P2883	property_type	external-identifier
-P2884	property_type	quantity
-P2886	property_type	external-identifier
-P2887	property_type	external-identifier
-P2888	property_type	url
-P2889	property_type	external-identifier
-P2892	property_type	external-identifier
-P2893	property_type	string
-P2894	property_type	item
-P2895	property_type	quantity
-P2896	property_type	quantity
-P2897	property_type	external-identifier
-P2898	property_type	external-identifier
-P2899	property_type	quantity
-P2900	property_type	string
-P2903	property_type	external-identifier
-P2904	property_type	external-identifier
-P2905	property_type	external-identifier
-P2907	property_type	quantity
-P2908	property_type	external-identifier
-P2909	property_type	external-identifier
-P2910	property_type	string
-P2911	property_type	quantity
-P2912	property_type	item
-P2913	property_type	time
-P2914	property_type	external-identifier
-P2915	property_type	external-identifier
-P2916	property_type	monolingualtext
-P2917	property_type	external-identifier
-P2918	property_type	string
-P2919	property_type	string
-P2922	property_type	item
-P2923	property_type	quantity
-P2924	property_type	external-identifier
-P2925	property_type	item
-P2926	property_type	external-identifier
-P2927	property_type	quantity
-P2928	property_type	quantity
-P2929	property_type	quantity
-P2930	property_type	external-identifier
-P2931	property_type	external-identifier
-P2935	property_type	item
-P2936	property_type	item
-P2937	property_type	item
-P2938	property_type	external-identifier
-P2939	property_type	external-identifier
-P2940	property_type	external-identifier
-P2941	property_type	external-identifier
-P2942	property_type	external-identifier
-P2943	property_type	external-identifier
-P2944	property_type	external-identifier
-P2945	property_type	external-identifier
-P2946	property_type	external-identifier
-P2948	property_type	external-identifier
-P2949	property_type	external-identifier
-P2950	property_type	external-identifier
-P2951	property_type	external-identifier
-P2952	property_type	external-identifier
-P2953	property_type	external-identifier
-P2954	property_type	external-identifier
-P2955	property_type	quantity
-P2956	property_type	external-identifier
-P2957	property_type	quantity
-P2959	property_type	item
-P2960	property_type	time
-P2961	property_type	external-identifier
-P2962	property_type	item
-P2963	property_type	external-identifier
-P2964	property_type	item
-P2965	property_type	external-identifier
-P2966	property_type	external-identifier
-P2967	property_type	external-identifier
-P2968	property_type	external-identifier
-P2969	property_type	external-identifier
-P2970	property_type	external-identifier
-P2971	property_type	external-identifier
-P2972	property_type	external-identifier
-P2973	property_type	external-identifier
-P2974	property_type	item
-P2975	property_type	item
-P2976	property_type	item
-P2977	property_type	external-identifier
-P2978	property_type	item
-P2979	property_type	string
-P2980	property_type	external-identifier
-P2981	property_type	external-identifier
-P2982	property_type	external-identifier
-P2983	property_type	external-identifier
-P2984	property_type	external-identifier
-P2985	property_type	external-identifier
-P2986	property_type	external-identifier
-P2987	property_type	external-identifier
-P2988	property_type	external-identifier
-P2989	property_type	item
-P2990	property_type	external-identifier
-P2991	property_type	external-identifier
-P2233	property_type	quantity
-P2234	property_type	quantity
-P2235	property_type	url
-P2236	property_type	url
-P2238	property_type	item
-P2239	property_type	item
-P2240	property_type	quantity
-P2241	property_type	item
-P2242	property_type	external-identifier
-P2243	property_type	quantity
-P2244	property_type	quantity
-P2248	property_type	quantity
-P2249	property_type	external-identifier
-P2250	property_type	quantity
-P2252	property_type	external-identifier
-P2253	property_type	external-identifier
-P2254	property_type	quantity
-P2255	property_type	external-identifier
-P2257	property_type	quantity
-P2258	property_type	string
-P2259	property_type	string
-P2260	property_type	quantity
-P2261	property_type	quantity
-P2262	property_type	quantity
-P2263	property_type	string
-P2264	property_type	external-identifier
-P2266	property_type	external-identifier
-P2267	property_type	external-identifier
-P2268	property_type	external-identifier
-P2270	property_type	external-identifier
-P2271	property_type	string
-P2272	property_type	external-identifier
-P2273	property_type	external-identifier
-P2275	property_type	monolingualtext
-P2276	property_type	external-identifier
-P2277	property_type	external-identifier
-P2278	property_type	external-identifier
-P2279	property_type	item
-P2280	property_type	external-identifier
-P2281	property_type	external-identifier
-P2282	property_type	external-identifier
-P2283	property_type	item
-P2284	property_type	quantity
-P2285	property_type	time
-P2286	property_type	item
-P2287	property_type	external-identifier
-P2288	property_type	item
-P2289	property_type	item
-P2290	property_type	external-identifier
-P2291	property_type	item
-P2292	property_type	quantity
-P2293	property_type	item
-P2294	property_type	quantity
-P2295	property_type	quantity
-P2296	property_type	quantity
-P2297	property_type	quantity
-P2298	property_type	external-identifier
-P2299	property_type	quantity
-P2300	property_type	quantity
-P2302	property_type	item
-P2303	property_type	item
-P2304	property_type	string
-P2305	property_type	item
-P2306	property_type	string
-P2307	property_type	string
-P2308	property_type	item
-P2309	property_type	item
-P2310	property_type	time
-P2311	property_type	time
-P2312	property_type	quantity
-P2313	property_type	quantity
-P2315	property_type	monolingualtext
-P2316	property_type	item
-P2317	property_type	string
-P2318	property_type	item
-P2319	property_type	item
-P2320	property_type	quantity
-P2321	property_type	item
-P2322	property_type	string
-P2323	property_type	external-identifier
-P2324	property_type	quantity
-P2325	property_type	quantity
-P2326	property_type	external-identifier
-P2327	property_type	external-identifier
-P2328	property_type	external-identifier
-P2329	property_type	item
-P2330	property_type	external-identifier
-P2331	property_type	external-identifier
-P2332	property_type	external-identifier
-P2333	property_type	external-identifier
-P2334	property_type	external-identifier
-P2335	property_type	external-identifier
-P2336	property_type	external-identifier
-P2337	property_type	external-identifier
-P2338	property_type	external-identifier
-P2339	property_type	external-identifier
-P2340	property_type	external-identifier
-P2341	property_type	item
-P2342	property_type	external-identifier
-P2343	property_type	string
-P2024	property_type	external-identifier
-P2025	property_type	external-identifier
-P2026	property_type	external-identifier
-P2027	property_type	external-identifier
-P2028	property_type	external-identifier
-P2029	property_type	external-identifier
-P2030	property_type	external-identifier
-P2031	property_type	time
-P2032	property_type	time
-P2033	property_type	item
-P2034	property_type	external-identifier
-P2036	property_type	external-identifier
-P2037	property_type	external-identifier
-P2038	property_type	external-identifier
-P2040	property_type	external-identifier
-P2041	property_type	external-identifier
-P2042	property_type	external-identifier
-P2043	property_type	quantity
-P2044	property_type	quantity
-P2045	property_type	quantity
-P2046	property_type	quantity
-P2047	property_type	quantity
-P2048	property_type	quantity
-P2049	property_type	quantity
-P2050	property_type	quantity
-P2051	property_type	quantity
-P2052	property_type	quantity
-P2053	property_type	quantity
-P2054	property_type	quantity
-P2055	property_type	quantity
-P2056	property_type	quantity
-P2057	property_type	external-identifier
-P2058	property_type	item
-P2060	property_type	quantity
-P2061	property_type	item
-P2062	property_type	external-identifier
-P2063	property_type	external-identifier
-P2064	property_type	external-identifier
-P2065	property_type	external-identifier
-P2066	property_type	quantity
-P2067	property_type	quantity
-P2068	property_type	quantity
-P2069	property_type	quantity
-P2070	property_type	external-identifier
-P2071	property_type	external-identifier
-P2072	property_type	external-identifier
-P2073	property_type	quantity
-P2074	property_type	external-identifier
-P2075	property_type	quantity
-P2076	property_type	quantity
-P2077	property_type	quantity
-P2078	property_type	url
-P2079	property_type	item
-P2080	property_type	external-identifier
-P2081	property_type	external-identifier
-P2082	property_type	external-identifier
-P2083	property_type	external-identifier
-P2084	property_type	external-identifier
-P2085	property_type	external-identifier
-P2086	property_type	external-identifier
-P2087	property_type	external-identifier
-P2088	property_type	external-identifier
-P2089	property_type	external-identifier
-P2090	property_type	external-identifier
-P2091	property_type	external-identifier
-P2092	property_type	external-identifier
-P2093	property_type	string
-P2094	property_type	item
-P2095	property_type	item
-P2096	property_type	monolingualtext
-P2097	property_type	quantity
-P2098	property_type	item
-P2099	property_type	external-identifier
-P2100	property_type	external-identifier
-P2101	property_type	quantity
-P2102	property_type	quantity
-P2103	property_type	quantity
-P2105	property_type	quantity
-P2106	property_type	external-identifier
-P2107	property_type	quantity
-P2108	property_type	external-identifier
-P2109	property_type	quantity
-P2112	property_type	quantity
-P2113	property_type	quantity
-P2114	property_type	quantity
-P2115	property_type	external-identifier
-P2116	property_type	quantity
-P2117	property_type	quantity
-P2118	property_type	quantity
-P2119	property_type	quantity
-P2120	property_type	quantity
-P2121	property_type	quantity
-P2123	property_type	external-identifier
-P2124	property_type	quantity
-P2125	property_type	string
-P2126	property_type	string
-P2127	property_type	item
-P2128	property_type	quantity
-P2129	property_type	quantity
-P2130	property_type	quantity
-P2344	property_type	external-identifier
-P2345	property_type	external-identifier
-P2346	property_type	external-identifier
-P2347	property_type	external-identifier
-P2348	property_type	item
-P2349	property_type	external-identifier
-P2350	property_type	external-identifier
-P2351	property_type	quantity
-P2352	property_type	item
-P2353	property_type	item
-P2354	property_type	item
-P2355	property_type	external-identifier
-P2357	property_type	string
-P2358	property_type	item
-P2359	property_type	item
-P2360	property_type	item
-P2361	property_type	item
-P2362	property_type	quantity
-P2363	property_type	item
-P2364	property_type	string
-P2365	property_type	item
-P2366	property_type	item
-P2367	property_type	external-identifier
-P2368	property_type	string
-P2369	property_type	external-identifier
-P2370	property_type	quantity
-P2371	property_type	item
-P2372	property_type	external-identifier
-P2373	property_type	external-identifier
-P2374	property_type	quantity
-P2375	property_type	item
-P2376	property_type	item
-P2377	property_type	item
-P2378	property_type	item
-P2379	property_type	item
-P2380	property_type	external-identifier
-P2381	property_type	external-identifier
-P2382	property_type	external-identifier
-P2383	property_type	external-identifier
-P2384	property_type	item
-P2385	property_type	external-identifier
-P2386	property_type	quantity
-P2387	property_type	external-identifier
-P2388	property_type	item
-P2389	property_type	item
-P2390	property_type	external-identifier
-P2391	property_type	external-identifier
-P2392	property_type	item
-P2393	property_type	external-identifier
-P2394	property_type	external-identifier
-P2396	property_type	item
-P2397	property_type	external-identifier
-P2398	property_type	external-identifier
-P2399	property_type	external-identifier
-P2400	property_type	external-identifier
-P2401	property_type	external-identifier
-P2402	property_type	quantity
-P2403	property_type	quantity
-P2404	property_type	quantity
-P2405	property_type	quantity
-P2406	property_type	quantity
-P2407	property_type	quantity
-P2408	property_type	item
-P2409	property_type	external-identifier
-P2410	property_type	string
-P2411	property_type	string
-P2412	property_type	external-identifier
-P2413	property_type	external-identifier
-P2414	property_type	item
-P2415	property_type	quantity
-P2416	property_type	item
-P2417	property_type	item
-P2418	property_type	external-identifier
-P2421	property_type	external-identifier
-P2423	property_type	external-identifier
-P2424	property_type	external-identifier
-P2425	property_type	string
-P2426	property_type	external-identifier
-P2427	property_type	external-identifier
-P2428	property_type	external-identifier
-P2429	property_type	item
-P2430	property_type	quantity
-P2431	property_type	external-identifier
-P2432	property_type	external-identifier
-P2433	property_type	item
-P2434	property_type	external-identifier
-P2435	property_type	external-identifier
-P2436	property_type	quantity
-P2437	property_type	quantity
-P2438	property_type	item
-P2440	property_type	string
-P2441	property_type	monolingualtext
-P2442	property_type	quantity
-P2443	property_type	item
-P2444	property_type	item
-P2445	property_type	item
-P2446	property_type	external-identifier
-P2447	property_type	external-identifier
-P2448	property_type	external-identifier
-P2449	property_type	external-identifier
-P2992	property_type	item
-P2993	property_type	quantity
-P2997	property_type	quantity
-P2998	property_type	quantity
-P2999	property_type	quantity
-P3000	property_type	quantity
-P3001	property_type	quantity
-P3002	property_type	external-identifier
-P3003	property_type	external-identifier
-P3004	property_type	external-identifier
-P3005	property_type	item
-P3006	property_type	external-identifier
-P3007	property_type	external-identifier
-P3008	property_type	external-identifier
-P3009	property_type	external-identifier
-P3010	property_type	external-identifier
-P3012	property_type	external-identifier
-P3013	property_type	quantity
-P3014	property_type	item
-P3015	property_type	item
-P3016	property_type	external-identifier
-P3017	property_type	external-identifier
-P3018	property_type	item
-P3019	property_type	item
-P3020	property_type	quantity
-P3021	property_type	external-identifier
-P3022	property_type	item
-P3023	property_type	external-identifier
-P3024	property_type	external-identifier
-P3025	property_type	item
-P3026	property_type	item
-P3027	property_type	item
-P3028	property_type	item
-P3029	property_type	external-identifier
-P3030	property_type	string
-P3031	property_type	external-identifier
-P3032	property_type	item
-P3033	property_type	item
-P3034	property_type	external-identifier
-P3035	property_type	external-identifier
-P3036	property_type	quantity
-P3037	property_type	item
-P3038	property_type	external-identifier
-P3039	property_type	quantity
-P3040	property_type	external-identifier
-P3041	property_type	quantity
-P3042	property_type	external-identifier
-P3043	property_type	external-identifier
-P3044	property_type	external-identifier
-P3045	property_type	external-identifier
-P3046	property_type	external-identifier
-P3047	property_type	external-identifier
-P3048	property_type	external-identifier
-P3049	property_type	external-identifier
-P3050	property_type	external-identifier
-P3051	property_type	external-identifier
-P3052	property_type	external-identifier
-P3053	property_type	external-identifier
-P3054	property_type	external-identifier
-P3055	property_type	external-identifier
-P3056	property_type	external-identifier
-P3057	property_type	external-identifier
-P3058	property_type	external-identifier
-P3059	property_type	external-identifier
-P3060	property_type	external-identifier
-P3061	property_type	external-identifier
-P3063	property_type	quantity
-P3064	property_type	external-identifier
-P3065	property_type	external-identifier
-P3066	property_type	external-identifier
-P3067	property_type	string
-P3068	property_type	external-identifier
-P3069	property_type	external-identifier
-P3070	property_type	quantity
-P3071	property_type	quantity
-P3072	property_type	external-identifier
-P3073	property_type	external-identifier
-P3074	property_type	external-identifier
-P3075	property_type	item
-P3076	property_type	external-identifier
-P3077	property_type	external-identifier
-P3078	property_type	quantity
-P3080	property_type	item
-P3081	property_type	item
-P3082	property_type	item
-P3083	property_type	external-identifier
-P3085	property_type	item
-P3086	property_type	quantity
-P3087	property_type	quantity
-P3088	property_type	external-identifier
-P3089	property_type	external-identifier
-P3090	property_type	string
-P3091	property_type	item
-P3092	property_type	item
-P3093	property_type	item
-P3094	property_type	item
-P3095	property_type	item
-P3096	property_type	item
-P3097	property_type	external-identifier
-P3098	property_type	external-identifier
-P830	property_type	external-identifier
-P831	property_type	item
-P832	property_type	item
-P833	property_type	item
-P834	property_type	item
-P835	property_type	string
-P836	property_type	external-identifier
-P837	property_type	item
-P838	property_type	external-identifier
-P839	property_type	external-identifier
-P840	property_type	item
-P841	property_type	item
-P842	property_type	external-identifier
-P843	property_type	external-identifier
-P844	property_type	external-identifier
-P845	property_type	external-identifier
-P846	property_type	external-identifier
-P847	property_type	string
-P849	property_type	external-identifier
-P850	property_type	external-identifier
-P852	property_type	item
-P853	property_type	item
-P854	property_type	url
-P855	property_type	url
-P856	property_type	url
-P858	property_type	external-identifier
-P859	property_type	item
-P860	property_type	external-identifier
-P861	property_type	external-identifier
-P862	property_type	external-identifier
-P863	property_type	external-identifier
-P864	property_type	external-identifier
-P865	property_type	external-identifier
-P866	property_type	external-identifier
-P867	property_type	external-identifier
-P868	property_type	item
-P870	property_type	item
-P872	property_type	item
-P873	property_type	item
-P874	property_type	string
-P875	property_type	string
-P876	property_type	string
-P877	property_type	string
-P878	property_type	item
-P879	property_type	string
-P880	property_type	item
-P881	property_type	item
-P882	property_type	external-identifier
-P884	property_type	external-identifier
-P885	property_type	item
-P886	property_type	external-identifier
-P887	property_type	item
-P888	property_type	external-identifier
-P889	property_type	external-identifier
-P892	property_type	external-identifier
-P893	property_type	external-identifier
-P894	property_type	external-identifier
-P897	property_type	string
-P898	property_type	string
-P901	property_type	external-identifier
-P902	property_type	external-identifier
-P905	property_type	external-identifier
-P906	property_type	external-identifier
-P908	property_type	item
-P909	property_type	external-identifier
-P910	property_type	item
-P911	property_type	external-identifier
-P912	property_type	item
-P913	property_type	item
-P914	property_type	item
-P915	property_type	item
-P916	property_type	item
-P917	property_type	external-identifier
-P918	property_type	external-identifier
-P919	property_type	external-identifier
-P920	property_type	string
-P921	property_type	item
-P922	property_type	item
-P923	property_type	item
-P924	property_type	item
-P925	property_type	item
-P926	property_type	item
-P927	property_type	item
-P928	property_type	item
-P929	property_type	item
-P930	property_type	item
-P931	property_type	item
-P932	property_type	external-identifier
-P933	property_type	external-identifier
-P935	property_type	string
-P937	property_type	item
-P938	property_type	external-identifier
-P939	property_type	external-identifier
-P941	property_type	item
-P942	property_type	item
-P943	property_type	item
-P944	property_type	item
-P945	property_type	item
-P946	property_type	string
-P947	property_type	external-identifier
-P4031	property_type	external-identifier
-P4032	property_type	item
-P4033	property_type	external-identifier
-P4034	property_type	external-identifier
-P4035	property_type	external-identifier
-P4036	property_type	quantity
-P4037	property_type	external-identifier
-P4038	property_type	external-identifier
-P4040	property_type	external-identifier
-P4041	property_type	external-identifier
-P4042	property_type	external-identifier
-P4043	property_type	item
-P4044	property_type	item
-P4045	property_type	string
-P4046	property_type	external-identifier
-P4047	property_type	string
-P4048	property_type	external-identifier
-P4050	property_type	external-identifier
-P4051	property_type	external-identifier
-P4052	property_type	external-identifier
-P4053	property_type	external-identifier
-P4054	property_type	external-identifier
-P4055	property_type	external-identifier
-P4056	property_type	external-identifier
-P4057	property_type	external-identifier
-P4058	property_type	external-identifier
-P4059	property_type	external-identifier
-P4060	property_type	external-identifier
-P4061	property_type	external-identifier
-P4062	property_type	external-identifier
-P4063	property_type	external-identifier
-P4065	property_type	external-identifier
-P4066	property_type	external-identifier
-P4067	property_type	external-identifier
-P4068	property_type	external-identifier
-P4069	property_type	external-identifier
-P4070	property_type	item
-P4071	property_type	external-identifier
-P4072	property_type	external-identifier
-P4073	property_type	external-identifier
-P4074	property_type	external-identifier
-P4075	property_type	external-identifier
-P4076	property_type	external-identifier
-P4077	property_type	external-identifier
-P4078	property_type	string
-P4079	property_type	external-identifier
-P4080	property_type	quantity
-P4081	property_type	external-identifier
-P4082	property_type	item
-P4083	property_type	external-identifier
-P4084	property_type	external-identifier
-P4085	property_type	external-identifier
-P4086	property_type	external-identifier
-P4087	property_type	external-identifier
-P4088	property_type	external-identifier
-P4089	property_type	external-identifier
-P4090	property_type	external-identifier
-P4091	property_type	string
-P4092	property_type	string
-P4093	property_type	external-identifier
-P4094	property_type	external-identifier
-P4095	property_type	external-identifier
-P4096	property_type	external-identifier
-P4097	property_type	external-identifier
-P4098	property_type	external-identifier
-P4099	property_type	item
-P4100	property_type	item
-P4101	property_type	item
-P4102	property_type	external-identifier
-P4103	property_type	quantity
-P4104	property_type	external-identifier
-P4105	property_type	quantity
-P4106	property_type	external-identifier
-P4107	property_type	external-identifier
-P4108	property_type	external-identifier
-P4109	property_type	external-identifier
-P4110	property_type	external-identifier
-P4111	property_type	external-identifier
-P4112	property_type	external-identifier
-P4113	property_type	external-identifier
-P4114	property_type	external-identifier
-P4115	property_type	external-identifier
-P4116	property_type	external-identifier
-P4117	property_type	external-identifier
-P4118	property_type	external-identifier
-P4119	property_type	external-identifier
-P4120	property_type	external-identifier
-P4121	property_type	external-identifier
-P4122	property_type	external-identifier
-P4123	property_type	external-identifier
-P4124	property_type	external-identifier
-P4125	property_type	external-identifier
-P4126	property_type	external-identifier
-P4127	property_type	external-identifier
-P4128	property_type	external-identifier
-P4129	property_type	external-identifier
-P4130	property_type	external-identifier
-P4131	property_type	quantity
-P4132	property_type	item
-P4133	property_type	external-identifier
-P2450	property_type	external-identifier
-P2451	property_type	external-identifier
-P2452	property_type	external-identifier
-P2453	property_type	item
-P2454	property_type	external-identifier
-P2455	property_type	external-identifier
-P2456	property_type	external-identifier
-P2457	property_type	external-identifier
-P2458	property_type	external-identifier
-P2459	property_type	external-identifier
-P2460	property_type	external-identifier
-P2461	property_type	external-identifier
-P2462	property_type	item
-P2463	property_type	external-identifier
-P2464	property_type	external-identifier
-P2465	property_type	external-identifier
-P2467	property_type	external-identifier
-P2468	property_type	external-identifier
-P2469	property_type	external-identifier
-P2470	property_type	external-identifier
-P2471	property_type	external-identifier
-P2472	property_type	external-identifier
-P2473	property_type	external-identifier
-P2474	property_type	external-identifier
-P2475	property_type	external-identifier
-P2476	property_type	external-identifier
-P2477	property_type	external-identifier
-P2478	property_type	external-identifier
-P2479	property_type	external-identifier
-P2480	property_type	external-identifier
-P2481	property_type	external-identifier
-P2482	property_type	external-identifier
-P2483	property_type	external-identifier
-P2484	property_type	external-identifier
-P2485	property_type	external-identifier
-P2486	property_type	external-identifier
-P2487	property_type	external-identifier
-P2488	property_type	url
-P2489	property_type	external-identifier
-P2490	property_type	string
-P2491	property_type	external-identifier
-P2492	property_type	external-identifier
-P2493	property_type	external-identifier
-P2494	property_type	external-identifier
-P2496	property_type	external-identifier
-P2497	property_type	external-identifier
-P2498	property_type	external-identifier
-P2499	property_type	item
-P2500	property_type	item
-P2501	property_type	item
-P2502	property_type	item
-P2503	property_type	external-identifier
-P2504	property_type	external-identifier
-P2505	property_type	item
-P2506	property_type	external-identifier
-P2507	property_type	item
-P2508	property_type	external-identifier
-P2509	property_type	external-identifier
-P2510	property_type	external-identifier
-P2511	property_type	external-identifier
-P2512	property_type	item
-P2513	property_type	external-identifier
-P2514	property_type	external-identifier
-P2515	property_type	item
-P2516	property_type	external-identifier
-P2517	property_type	item
-P2518	property_type	external-identifier
-P2519	property_type	external-identifier
-P2520	property_type	url
-P2521	property_type	monolingualtext
-P2522	property_type	item
-P2524	property_type	external-identifier
-P2525	property_type	external-identifier
-P2526	property_type	external-identifier
-P2527	property_type	quantity
-P2528	property_type	quantity
-P2529	property_type	external-identifier
-P2530	property_type	external-identifier
-P2531	property_type	external-identifier
-P2532	property_type	quantity
-P2533	property_type	external-identifier
-P2534	property_type	string
-P2535	property_type	string
-P2536	property_type	external-identifier
-P2537	property_type	external-identifier
-P2538	property_type	external-identifier
-P2539	property_type	external-identifier
-P2540	property_type	string
-P2541	property_type	item
-P2542	property_type	quantity
-P2545	property_type	item
-P2546	property_type	item
-P2547	property_type	quantity
-P2548	property_type	item
-P2549	property_type	external-identifier
-P2550	property_type	item
-P2551	property_type	item
-P2552	property_type	string
-P2553	property_type	item
-P2554	property_type	item
-P4135	property_type	quantity
-P4136	property_type	external-identifier
-P4137	property_type	quantity
-P4138	property_type	external-identifier
-P4139	property_type	external-identifier
-P4140	property_type	quantity
-P4141	property_type	external-identifier
-P4142	property_type	external-identifier
-P4143	property_type	external-identifier
-P4144	property_type	external-identifier
-P4145	property_type	external-identifier
-P4146	property_type	external-identifier
-P4147	property_type	item
-P4149	property_type	item
-P4150	property_type	string
-P4151	property_type	item
-P4152	property_type	string
-P4153	property_type	quantity
-P4154	property_type	external-identifier
-P4155	property_type	string
-P4156	property_type	external-identifier
-P4157	property_type	external-identifier
-P4158	property_type	external-identifier
-P4159	property_type	external-identifier
-P4160	property_type	external-identifier
-P4161	property_type	external-identifier
-P4162	property_type	external-identifier
-P4163	property_type	quantity
-P4164	property_type	external-identifier
-P4165	property_type	external-identifier
-P4166	property_type	external-identifier
-P4167	property_type	external-identifier
-P4168	property_type	external-identifier
-P4169	property_type	external-identifier
-P4170	property_type	external-identifier
-P4171	property_type	external-identifier
-P4172	property_type	external-identifier
-P4173	property_type	external-identifier
-P4174	property_type	external-identifier
-P4175	property_type	external-identifier
-P4176	property_type	quantity
-P4177	property_type	external-identifier
-P4178	property_type	external-identifier
-P4179	property_type	string
-P4180	property_type	external-identifier
-P4181	property_type	external-identifier
-P4182	property_type	external-identifier
-P4183	property_type	quantity
-P4184	property_type	quantity
-P4185	property_type	item
-P4186	property_type	external-identifier
-P4187	property_type	string
-P4188	property_type	string
-P4189	property_type	string
-P4190	property_type	external-identifier
-P4191	property_type	external-identifier
-P4192	property_type	external-identifier
-P4193	property_type	external-identifier
-P4194	property_type	external-identifier
-P4195	property_type	item
-P4196	property_type	string
-P4197	property_type	external-identifier
-P4198	property_type	external-identifier
-P4199	property_type	external-identifier
-P4200	property_type	external-identifier
-P4201	property_type	external-identifier
-P4202	property_type	item
-P4203	property_type	external-identifier
-P4204	property_type	external-identifier
-P4206	property_type	external-identifier
-P4207	property_type	external-identifier
-P4208	property_type	external-identifier
-P4209	property_type	external-identifier
-P4210	property_type	external-identifier
-P4211	property_type	external-identifier
-P4212	property_type	external-identifier
-P4213	property_type	string
-P4214	property_type	quantity
-P4215	property_type	external-identifier
-P4216	property_type	external-identifier
-P4217	property_type	external-identifier
-P4218	property_type	quantity
-P4219	property_type	external-identifier
-P4220	property_type	item
-P4221	property_type	external-identifier
-P4222	property_type	external-identifier
-P4223	property_type	external-identifier
-P4224	property_type	item
-P4225	property_type	string
-P4226	property_type	external-identifier
-P4227	property_type	external-identifier
-P4228	property_type	external-identifier
-P4229	property_type	external-identifier
-P4230	property_type	external-identifier
-P4231	property_type	external-identifier
-P4232	property_type	external-identifier
-P4233	property_type	external-identifier
-P4235	property_type	external-identifier
-P4236	property_type	external-identifier
-P4238	property_type	url
-P2555	property_type	quantity
-P2556	property_type	quantity
-P2557	property_type	quantity
-P2558	property_type	external-identifier
-P2559	property_type	monolingualtext
-P2560	property_type	item
-P2561	property_type	monolingualtext
-P2562	property_type	monolingualtext
-P2563	property_type	item
-P2564	property_type	item
-P2565	property_type	quantity
-P2566	property_type	external-identifier
-P2567	property_type	item
-P2568	property_type	item
-P2571	property_type	item
-P2572	property_type	string
-P2573	property_type	quantity
-P2574	property_type	external-identifier
-P2575	property_type	item
-P2576	property_type	external-identifier
-P2577	property_type	item
-P2578	property_type	item
-P2579	property_type	item
-P2580	property_type	external-identifier
-P2581	property_type	external-identifier
-P2582	property_type	external-identifier
-P2583	property_type	quantity
-P2584	property_type	external-identifier
-P2585	property_type	external-identifier
-P2586	property_type	external-identifier
-P2587	property_type	item
-P2588	property_type	external-identifier
-P2589	property_type	external-identifier
-P2590	property_type	external-identifier
-P2591	property_type	item
-P2592	property_type	external-identifier
-P2593	property_type	external-identifier
-P2595	property_type	quantity
-P2596	property_type	item
-P2597	property_type	item
-P2598	property_type	string
-P2599	property_type	quantity
-P2600	property_type	external-identifier
-P2601	property_type	external-identifier
-P2602	property_type	external-identifier
-P2603	property_type	external-identifier
-P2604	property_type	external-identifier
-P2605	property_type	external-identifier
-P2606	property_type	external-identifier
-P2607	property_type	external-identifier
-P2610	property_type	quantity
-P2611	property_type	external-identifier
-P2612	property_type	external-identifier
-P2613	property_type	external-identifier
-P2614	property_type	item
-P2618	property_type	external-identifier
-P2619	property_type	external-identifier
-P2620	property_type	external-identifier
-P2621	property_type	external-identifier
-P2622	property_type	external-identifier
-P2623	property_type	external-identifier
-P2624	property_type	external-identifier
-P2625	property_type	external-identifier
-P2626	property_type	external-identifier
-P2627	property_type	external-identifier
-P2628	property_type	external-identifier
-P2629	property_type	item
-P2630	property_type	quantity
-P2631	property_type	external-identifier
-P2632	property_type	item
-P2633	property_type	item
-P2634	property_type	item
-P2635	property_type	quantity
-P2636	property_type	external-identifier
-P2637	property_type	item
-P2638	property_type	external-identifier
-P2639	property_type	external-identifier
-P2640	property_type	external-identifier
-P2641	property_type	external-identifier
-P2642	property_type	external-identifier
-P2643	property_type	item
-P2645	property_type	quantity
-P2646	property_type	external-identifier
-P2647	property_type	item
-P2648	property_type	external-identifier
-P2649	property_type	url
-P2650	property_type	item
-P2651	property_type	external-identifier
-P2652	property_type	item
-P2655	property_type	external-identifier
-P2657	property_type	external-identifier
-P2658	property_type	quantity
-P2659	property_type	quantity
-P2660	property_type	quantity
-P2661	property_type	quantity
-P2662	property_type	quantity
-P2663	property_type	quantity
-P2664	property_type	quantity
-P2665	property_type	quantity
-P2666	property_type	external-identifier
-P3099	property_type	external-identifier
-P3100	property_type	external-identifier
-P3101	property_type	external-identifier
-P3102	property_type	external-identifier
-P3103	property_type	item
-P3104	property_type	external-identifier
-P3105	property_type	external-identifier
-P3106	property_type	external-identifier
-P3107	property_type	external-identifier
-P3108	property_type	external-identifier
-P3109	property_type	external-identifier
-P3110	property_type	external-identifier
-P3111	property_type	external-identifier
-P3112	property_type	external-identifier
-P3113	property_type	item
-P3114	property_type	external-identifier
-P3115	property_type	external-identifier
-P3116	property_type	external-identifier
-P3117	property_type	external-identifier
-P3118	property_type	external-identifier
-P3119	property_type	external-identifier
-P3120	property_type	external-identifier
-P3121	property_type	external-identifier
-P3122	property_type	external-identifier
-P3123	property_type	external-identifier
-P3124	property_type	external-identifier
-P3125	property_type	external-identifier
-P3126	property_type	external-identifier
-P3127	property_type	external-identifier
-P3128	property_type	external-identifier
-P3129	property_type	external-identifier
-P3130	property_type	external-identifier
-P3131	property_type	external-identifier
-P3132	property_type	monolingualtext
-P3133	property_type	external-identifier
-P3134	property_type	external-identifier
-P3135	property_type	external-identifier
-P3136	property_type	external-identifier
-P3137	property_type	item
-P3138	property_type	external-identifier
-P3139	property_type	external-identifier
-P3140	property_type	external-identifier
-P3141	property_type	external-identifier
-P3142	property_type	external-identifier
-P3143	property_type	external-identifier
-P3144	property_type	external-identifier
-P3145	property_type	external-identifier
-P3146	property_type	external-identifier
-P3147	property_type	external-identifier
-P3148	property_type	item
-P3149	property_type	item
-P3150	property_type	item
-P3151	property_type	external-identifier
-P3152	property_type	external-identifier
-P3153	property_type	external-identifier
-P3154	property_type	external-identifier
-P3155	property_type	external-identifier
-P3156	property_type	item
-P3157	property_type	quantity
-P3158	property_type	item
-P3159	property_type	external-identifier
-P3160	property_type	external-identifier
-P3161	property_type	item
-P3162	property_type	external-identifier
-P3163	property_type	external-identifier
-P3165	property_type	external-identifier
-P3166	property_type	external-identifier
-P3167	property_type	external-identifier
-P3168	property_type	external-identifier
-P3169	property_type	external-identifier
-P3170	property_type	external-identifier
-P3171	property_type	external-identifier
-P3172	property_type	external-identifier
-P3173	property_type	item
-P3174	property_type	item
-P3175	property_type	external-identifier
-P3176	property_type	string
-P3177	property_type	external-identifier
-P3178	property_type	external-identifier
-P3179	property_type	item
-P3180	property_type	external-identifier
-P3181	property_type	external-identifier
-P3182	property_type	external-identifier
-P3183	property_type	external-identifier
-P3184	property_type	external-identifier
-P3185	property_type	external-identifier
-P3186	property_type	external-identifier
-P3187	property_type	external-identifier
-P3188	property_type	external-identifier
-P3189	property_type	item
-P3190	property_type	item
-P3191	property_type	external-identifier
-P3192	property_type	external-identifier
-P3193	property_type	external-identifier
-P3194	property_type	external-identifier
-P3195	property_type	item
-P3196	property_type	external-identifier
-P3197	property_type	external-identifier
-P3198	property_type	external-identifier
-P3199	property_type	external-identifier
-P948	property_type	string
-P949	property_type	external-identifier
-P950	property_type	external-identifier
-P951	property_type	external-identifier
-P952	property_type	string
-P953	property_type	url
-P954	property_type	external-identifier
-P957	property_type	external-identifier
-P958	property_type	string
-P959	property_type	external-identifier
-P960	property_type	external-identifier
-P961	property_type	external-identifier
-P962	property_type	external-identifier
-P963	property_type	url
-P964	property_type	external-identifier
-P965	property_type	string
-P966	property_type	external-identifier
-P967	property_type	item
-P968	property_type	url
-P969	property_type	string
-P970	property_type	item
-P971	property_type	item
-P972	property_type	item
-P973	property_type	url
-P974	property_type	item
-P980	property_type	external-identifier
-P981	property_type	external-identifier
-P982	property_type	external-identifier
-P984	property_type	external-identifier
-P988	property_type	external-identifier
-P989	property_type	string
-P990	property_type	string
-P991	property_type	item
-P993	property_type	string
-P994	property_type	string
-P995	property_type	string
-P996	property_type	string
-P998	property_type	external-identifier
-P999	property_type	external-identifier
-P1000	property_type	item
-P1001	property_type	item
-P1002	property_type	item
-P1003	property_type	external-identifier
-P1004	property_type	external-identifier
-P1005	property_type	external-identifier
-P1006	property_type	external-identifier
-P1007	property_type	external-identifier
-P1010	property_type	external-identifier
-P1011	property_type	item
-P1012	property_type	item
-P1013	property_type	item
-P1014	property_type	external-identifier
-P1015	property_type	external-identifier
-P1016	property_type	item
-P1017	property_type	external-identifier
-P1018	property_type	item
-P1019	property_type	url
-P1021	property_type	external-identifier
-P1022	property_type	external-identifier
-P1023	property_type	external-identifier
-P1024	property_type	external-identifier
-P1025	property_type	external-identifier
-P1026	property_type	item
-P1027	property_type	item
-P1028	property_type	item
-P1029	property_type	item
-P1030	property_type	string
-P1031	property_type	string
-P1032	property_type	item
-P1033	property_type	item
-P1034	property_type	item
-P1035	property_type	item
-P1036	property_type	string
-P1037	property_type	item
-P1038	property_type	item
-P1039	property_type	item
-P1040	property_type	item
-P1041	property_type	item
-P1042	property_type	external-identifier
-P1043	property_type	external-identifier
-P1044	property_type	external-identifier
-P1045	property_type	external-identifier
-P1046	property_type	item
-P1047	property_type	external-identifier
-P1048	property_type	external-identifier
-P1049	property_type	item
-P1050	property_type	item
-P1051	property_type	external-identifier
-P1052	property_type	external-identifier
-P1053	property_type	external-identifier
-P1054	property_type	external-identifier
-P1055	property_type	external-identifier
-P1056	property_type	item
-P1057	property_type	item
-P1058	property_type	external-identifier
-P1059	property_type	external-identifier
-P1060	property_type	item
-P1064	property_type	item
-P1065	property_type	url
-P1066	property_type	item
-P2667	property_type	item
-P2668	property_type	item
-P2669	property_type	time
-P2670	property_type	item
-P2671	property_type	external-identifier
-P2672	property_type	external-identifier
-P2673	property_type	item
-P2674	property_type	item
-P2675	property_type	item
-P2676	property_type	string
-P2677	property_type	string
-P2678	property_type	external-identifier
-P2679	property_type	item
-P2680	property_type	item
-P2681	property_type	item
-P2682	property_type	item
-P2683	property_type	external-identifier
-P2684	property_type	item
-P2685	property_type	external-identifier
-P2686	property_type	external-identifier
-P2687	property_type	external-identifier
-P2688	property_type	external-identifier
-P2689	property_type	external-identifier
-P2694	property_type	external-identifier
-P2695	property_type	item
-P2696	property_type	external-identifier
-P2697	property_type	external-identifier
-P2698	property_type	external-identifier
-P2699	property_type	url
-P2700	property_type	item
-P2701	property_type	item
-P2702	property_type	item
-P2703	property_type	external-identifier
-P2704	property_type	external-identifier
-P2705	property_type	external-identifier
-P2708	property_type	external-identifier
-P2709	property_type	external-identifier
-P2710	property_type	quantity
-P2712	property_type	quantity
-P2713	property_type	string
-P2715	property_type	item
-P2716	property_type	string
-P2717	property_type	quantity
-P2718	property_type	quantity
-P2719	property_type	string
-P2720	property_type	string
-P2721	property_type	external-identifier
-P2722	property_type	external-identifier
-P2723	property_type	external-identifier
-P2724	property_type	external-identifier
-P2725	property_type	external-identifier
-P2726	property_type	external-identifier
-P2727	property_type	external-identifier
-P2728	property_type	external-identifier
-P2729	property_type	external-identifier
-P2730	property_type	external-identifier
-P2732	property_type	external-identifier
-P2733	property_type	external-identifier
-P2734	property_type	external-identifier
-P2735	property_type	external-identifier
-P2736	property_type	external-identifier
-P2737	property_type	item
-P2738	property_type	item
-P2739	property_type	item
-P2740	property_type	external-identifier
-P2741	property_type	external-identifier
-P2742	property_type	external-identifier
-P2743	property_type	item
-P2744	property_type	string
-P2745	property_type	external-identifier
-P2746	property_type	item
-P2747	property_type	item
-P2748	property_type	external-identifier
-P2749	property_type	external-identifier
-P2750	property_type	external-identifier
-P2751	property_type	external-identifier
-P2752	property_type	external-identifier
-P2753	property_type	external-identifier
-P2754	property_type	time
-P2755	property_type	external-identifier
-P2756	property_type	item
-P2758	property_type	item
-P2759	property_type	external-identifier
-P2760	property_type	external-identifier
-P2761	property_type	external-identifier
-P2762	property_type	external-identifier
-P2763	property_type	external-identifier
-P2764	property_type	external-identifier
-P2765	property_type	external-identifier
-P2766	property_type	external-identifier
-P2767	property_type	external-identifier
-P2768	property_type	external-identifier
-P2769	property_type	quantity
-P2770	property_type	item
-P2771	property_type	external-identifier
-P2772	property_type	external-identifier
-P2773	property_type	external-identifier
-P2774	property_type	external-identifier
-P2775	property_type	external-identifier
-P2776	property_type	external-identifier
-P4851	property_type	quantity
-P4852	property_type	external-identifier
-P4853	property_type	external-identifier
-P4854	property_type	external-identifier
-P4855	property_type	external-identifier
-P4856	property_type	string
-P4857	property_type	external-identifier
-P4858	property_type	external-identifier
-P4859	property_type	external-identifier
-P4860	property_type	external-identifier
-P4861	property_type	external-identifier
-P4862	property_type	external-identifier
-P4863	property_type	string
-P4864	property_type	string
-P4866	property_type	external-identifier
-P4867	property_type	string
-P4868	property_type	external-identifier
-P4869	property_type	external-identifier
-P4870	property_type	external-identifier
-P4871	property_type	external-identifier
-P4872	property_type	external-identifier
-P4873	property_type	item
-P4875	property_type	item
-P4876	property_type	quantity
-P4878	property_type	item
-P4879	property_type	external-identifier
-P4880	property_type	external-identifier
-P4881	property_type	external-identifier
-P4882	property_type	item
-P4883	property_type	external-identifier
-P4884	property_type	item
-P4885	property_type	external-identifier
-P4886	property_type	external-identifier
-P4887	property_type	external-identifier
-P4888	property_type	external-identifier
-P4889	property_type	external-identifier
-P4890	property_type	external-identifier
-P4891	property_type	external-identifier
-P4892	property_type	external-identifier
-P4893	property_type	external-identifier
-P4894	property_type	external-identifier
-P4895	property_type	quantity
-P4896	property_type	string
-P4897	property_type	external-identifier
-P4898	property_type	external-identifier
-P4899	property_type	external-identifier
-P4900	property_type	item
-P4901	property_type	external-identifier
-P4902	property_type	external-identifier
-P4903	property_type	external-identifier
-P4904	property_type	external-identifier
-P4905	property_type	external-identifier
-P4906	property_type	external-identifier
-P4907	property_type	external-identifier
-P4908	property_type	item
-P4909	property_type	quantity
-P4910	property_type	external-identifier
-P4911	property_type	external-identifier
-P4912	property_type	quantity
-P4913	property_type	item
-P4914	property_type	string
-P4915	property_type	item
-P4916	property_type	external-identifier
-P4917	property_type	external-identifier
-P4919	property_type	external-identifier
-P4920	property_type	external-identifier
-P4921	property_type	external-identifier
-P4922	property_type	external-identifier
-P4923	property_type	external-identifier
-P4924	property_type	external-identifier
-P4925	property_type	external-identifier
-P4926	property_type	external-identifier
-P4927	property_type	external-identifier
-P4928	property_type	external-identifier
-P4929	property_type	external-identifier
-P4930	property_type	external-identifier
-P4931	property_type	external-identifier
-P4932	property_type	external-identifier
-P4933	property_type	external-identifier
-P4934	property_type	item
-P4935	property_type	external-identifier
-P4936	property_type	external-identifier
-P4937	property_type	external-identifier
-P4938	property_type	external-identifier
-P4939	property_type	external-identifier
-P4940	property_type	external-identifier
-P4941	property_type	external-identifier
-P4942	property_type	external-identifier
-P4943	property_type	external-identifier
-P4944	property_type	external-identifier
-P4945	property_type	url
-P4946	property_type	external-identifier
-P4947	property_type	external-identifier
-P4948	property_type	external-identifier
-P4949	property_type	external-identifier
-P4950	property_type	external-identifier
-P4951	property_type	string
-P4952	property_type	item
-P4953	property_type	external-identifier
-P4954	property_type	item
-P4239	property_type	monolingualtext
-P4240	property_type	item
-P4241	property_type	item
-P4242	property_type	quantity
-P4243	property_type	string
-P4244	property_type	external-identifier
-P4245	property_type	external-identifier
-P4246	property_type	external-identifier
-P4247	property_type	external-identifier
-P4248	property_type	external-identifier
-P4249	property_type	external-identifier
-P4250	property_type	quantity
-P4251	property_type	external-identifier
-P4252	property_type	external-identifier
-P4253	property_type	quantity
-P4254	property_type	external-identifier
-P4255	property_type	external-identifier
-P4256	property_type	external-identifier
-P4257	property_type	external-identifier
-P4258	property_type	external-identifier
-P4259	property_type	external-identifier
-P4260	property_type	external-identifier
-P4261	property_type	external-identifier
-P4262	property_type	external-identifier
-P4263	property_type	external-identifier
-P4264	property_type	external-identifier
-P4265	property_type	external-identifier
-P4266	property_type	external-identifier
-P4267	property_type	external-identifier
-P4268	property_type	quantity
-P4269	property_type	quantity
-P4270	property_type	external-identifier
-P4271	property_type	item
-P4272	property_type	external-identifier
-P4273	property_type	external-identifier
-P4274	property_type	external-identifier
-P4275	property_type	external-identifier
-P4276	property_type	external-identifier
-P4277	property_type	external-identifier
-P4278	property_type	external-identifier
-P4279	property_type	external-identifier
-P4280	property_type	external-identifier
-P4281	property_type	external-identifier
-P4282	property_type	external-identifier
-P4283	property_type	external-identifier
-P4284	property_type	external-identifier
-P4285	property_type	external-identifier
-P4286	property_type	external-identifier
-P4287	property_type	external-identifier
-P4288	property_type	external-identifier
-P4289	property_type	external-identifier
-P4290	property_type	item
-P4291	property_type	string
-P4292	property_type	item
-P4293	property_type	external-identifier
-P4294	property_type	external-identifier
-P4295	property_type	quantity
-P4296	property_type	quantity
-P4297	property_type	external-identifier
-P4298	property_type	external-identifier
-P4299	property_type	external-identifier
-P4300	property_type	external-identifier
-P4301	property_type	external-identifier
-P4302	property_type	external-identifier
-P4303	property_type	external-identifier
-P4304	property_type	external-identifier
-P4305	property_type	external-identifier
-P4306	property_type	external-identifier
-P4307	property_type	external-identifier
-P4308	property_type	external-identifier
-P4309	property_type	external-identifier
-P4310	property_type	external-identifier
-P4311	property_type	external-identifier
-P4312	property_type	item
-P4313	property_type	external-identifier
-P4314	property_type	external-identifier
-P4315	property_type	external-identifier
-P4316	property_type	string
-P4317	property_type	external-identifier
-P4318	property_type	external-identifier
-P4319	property_type	external-identifier
-P4320	property_type	item
-P4321	property_type	item
-P4322	property_type	item
-P4323	property_type	item
-P4324	property_type	item
-P4325	property_type	string
-P4326	property_type	external-identifier
-P4327	property_type	external-identifier
-P4328	property_type	external-identifier
-P4329	property_type	item
-P4330	property_type	item
-P4331	property_type	external-identifier
-P4332	property_type	external-identifier
-P4333	property_type	external-identifier
-P4334	property_type	external-identifier
-P4335	property_type	external-identifier
-P4336	property_type	external-identifier
-P4337	property_type	external-identifier
-P4338	property_type	external-identifier
-P3200	property_type	external-identifier
-P3201	property_type	external-identifier
-P3202	property_type	external-identifier
-P3203	property_type	external-identifier
-P3204	property_type	external-identifier
-P3205	property_type	item
-P3206	property_type	external-identifier
-P3207	property_type	external-identifier
-P3208	property_type	external-identifier
-P3209	property_type	external-identifier
-P3211	property_type	external-identifier
-P3212	property_type	external-identifier
-P3213	property_type	external-identifier
-P3215	property_type	external-identifier
-P3216	property_type	item
-P3217	property_type	external-identifier
-P3218	property_type	external-identifier
-P3219	property_type	external-identifier
-P3220	property_type	external-identifier
-P3221	property_type	external-identifier
-P3222	property_type	external-identifier
-P3223	property_type	external-identifier
-P3224	property_type	external-identifier
-P3225	property_type	external-identifier
-P3226	property_type	external-identifier
-P3227	property_type	external-identifier
-P3228	property_type	string
-P3229	property_type	external-identifier
-P3230	property_type	external-identifier
-P3231	property_type	external-identifier
-P3232	property_type	external-identifier
-P3233	property_type	external-identifier
-P3234	property_type	external-identifier
-P3235	property_type	external-identifier
-P3236	property_type	external-identifier
-P3237	property_type	external-identifier
-P3238	property_type	string
-P3240	property_type	external-identifier
-P3241	property_type	external-identifier
-P3242	property_type	external-identifier
-P3243	property_type	external-identifier
-P3245	property_type	external-identifier
-P3246	property_type	external-identifier
-P3248	property_type	external-identifier
-P3250	property_type	external-identifier
-P3251	property_type	quantity
-P3252	property_type	quantity
-P3253	property_type	quantity
-P3254	property_type	url
-P3256	property_type	external-identifier
-P3257	property_type	external-identifier
-P3258	property_type	external-identifier
-P3259	property_type	item
-P3260	property_type	quantity
-P3261	property_type	item
-P3262	property_type	item
-P3263	property_type	item
-P3264	property_type	item
-P3265	property_type	external-identifier
-P3266	property_type	external-identifier
-P3267	property_type	external-identifier
-P3268	property_type	url
-P3269	property_type	external-identifier
-P3270	property_type	quantity
-P3271	property_type	quantity
-P3272	property_type	external-identifier
-P3273	property_type	external-identifier
-P3274	property_type	item
-P3275	property_type	item
-P3276	property_type	external-identifier
-P3277	property_type	external-identifier
-P3279	property_type	item
-P3280	property_type	external-identifier
-P3281	property_type	external-identifier
-P3283	property_type	external-identifier
-P3284	property_type	external-identifier
-P3285	property_type	external-identifier
-P3286	property_type	external-identifier
-P3288	property_type	external-identifier
-P3289	property_type	external-identifier
-P3290	property_type	external-identifier
-P3291	property_type	external-identifier
-P3292	property_type	external-identifier
-P3293	property_type	external-identifier
-P3294	property_type	item
-P3295	property_type	string
-P3296	property_type	external-identifier
-P3297	property_type	external-identifier
-P3298	property_type	external-identifier
-P3299	property_type	external-identifier
-P3300	property_type	item
-P3301	property_type	item
-P3302	property_type	external-identifier
-P3303	property_type	string
-P3304	property_type	external-identifier
-P3305	property_type	external-identifier
-P3306	property_type	item
-P3307	property_type	external-identifier
-P3308	property_type	external-identifier
-P3309	property_type	external-identifier
-P1067	property_type	external-identifier
-P1068	property_type	item
-P1069	property_type	external-identifier
-P1070	property_type	external-identifier
-P1071	property_type	item
-P1072	property_type	item
-P1073	property_type	item
-P1074	property_type	item
-P1075	property_type	item
-P1076	property_type	external-identifier
-P1077	property_type	string
-P1078	property_type	item
-P1079	property_type	item
-P1080	property_type	item
-P1081	property_type	quantity
-P1082	property_type	quantity
-P1083	property_type	quantity
-P1084	property_type	external-identifier
-P1085	property_type	external-identifier
-P1086	property_type	quantity
-P1087	property_type	quantity
-P1088	property_type	quantity
-P1090	property_type	quantity
-P1092	property_type	quantity
-P1093	property_type	quantity
-P1096	property_type	quantity
-P1097	property_type	quantity
-P1098	property_type	quantity
-P1099	property_type	quantity
-P1100	property_type	quantity
-P1101	property_type	quantity
-P1102	property_type	quantity
-P1103	property_type	quantity
-P1104	property_type	quantity
-P1106	property_type	quantity
-P1107	property_type	quantity
-P1108	property_type	quantity
-P1109	property_type	quantity
-P1110	property_type	quantity
-P1111	property_type	quantity
-P1113	property_type	quantity
-P1114	property_type	quantity
-P1115	property_type	external-identifier
-P1116	property_type	external-identifier
-P1117	property_type	quantity
-P1120	property_type	quantity
-P1121	property_type	quantity
-P1122	property_type	quantity
-P1123	property_type	quantity
-P1125	property_type	quantity
-P1126	property_type	quantity
-P1127	property_type	quantity
-P1128	property_type	quantity
-P1129	property_type	quantity
-P1132	property_type	quantity
-P1133	property_type	external-identifier
-P1135	property_type	item
-P1136	property_type	item
-P1137	property_type	item
-P1138	property_type	external-identifier
-P1139	property_type	quantity
-P1140	property_type	external-identifier
-P1141	property_type	quantity
-P1142	property_type	item
-P1143	property_type	external-identifier
-P1144	property_type	external-identifier
-P1145	property_type	item
-P1146	property_type	external-identifier
-P1148	property_type	quantity
-P1149	property_type	string
-P1150	property_type	string
-P1151	property_type	item
-P1153	property_type	external-identifier
-P1154	property_type	external-identifier
-P1155	property_type	external-identifier
-P1156	property_type	external-identifier
-P1157	property_type	external-identifier
-P1158	property_type	item
-P1159	property_type	external-identifier
-P1160	property_type	external-identifier
-P1161	property_type	string
-P1162	property_type	string
-P1163	property_type	string
-P1164	property_type	quantity
-P1165	property_type	item
-P1167	property_type	external-identifier
-P1168	property_type	external-identifier
-P1170	property_type	item
-P1171	property_type	item
-P1172	property_type	external-identifier
-P1174	property_type	quantity
-P1181	property_type	quantity
-P1182	property_type	external-identifier
-P1183	property_type	string
-P1184	property_type	external-identifier
-P1185	property_type	external-identifier
-P1186	property_type	external-identifier
-P1187	property_type	external-identifier
-P1188	property_type	external-identifier
-P1189	property_type	external-identifier
-P5374	property_type	external-identifier
-P5375	property_type	external-identifier
-P5376	property_type	external-identifier
-P5377	property_type	external-identifier
-P5378	property_type	external-identifier
-P5379	property_type	external-identifier
-P5380	property_type	external-identifier
-P5381	property_type	external-identifier
-P5382	property_type	external-identifier
-P5383	property_type	external-identifier
-P5384	property_type	external-identifier
-P5385	property_type	external-identifier
-P5386	property_type	item
-P5387	property_type	external-identifier
-P5388	property_type	external-identifier
-P5389	property_type	item
-P5390	property_type	external-identifier
-P5391	property_type	external-identifier
-P5392	property_type	external-identifier
-P5393	property_type	external-identifier
-P5394	property_type	external-identifier
-P5395	property_type	external-identifier
-P5396	property_type	external-identifier
-P5397	property_type	external-identifier
-P5398	property_type	external-identifier
-P5400	property_type	external-identifier
-P5401	property_type	string
-P5402	property_type	string
-P5403	property_type	external-identifier
-P5404	property_type	external-identifier
-P5406	property_type	external-identifier
-P5407	property_type	external-identifier
-P5408	property_type	external-identifier
-P5409	property_type	external-identifier
-P5410	property_type	external-identifier
-P5411	property_type	external-identifier
-P5412	property_type	string
-P5413	property_type	external-identifier
-P5414	property_type	external-identifier
-P5415	property_type	external-identifier
-P5417	property_type	external-identifier
-P5418	property_type	external-identifier
-P5419	property_type	external-identifier
-P5420	property_type	external-identifier
-P5421	property_type	external-identifier
-P5422	property_type	item
-P5423	property_type	string
-P5424	property_type	external-identifier
-P5425	property_type	item
-P5426	property_type	item
-P5427	property_type	quantity
-P5428	property_type	quantity
-P5429	property_type	external-identifier
-P5430	property_type	external-identifier
-P5431	property_type	external-identifier
-P5432	property_type	external-identifier
-P5434	property_type	external-identifier
-P5435	property_type	external-identifier
-P5436	property_type	quantity
-P5437	property_type	external-identifier
-P5438	property_type	item
-P5439	property_type	item
-P5440	property_type	external-identifier
-P5441	property_type	external-identifier
-P5442	property_type	external-identifier
-P5443	property_type	external-identifier
-P5444	property_type	item
-P5445	property_type	external-identifier
-P5446	property_type	item
-P5447	property_type	quantity
-P5448	property_type	quantity
-P5449	property_type	external-identifier
-P5450	property_type	external-identifier
-P5451	property_type	external-identifier
-P5452	property_type	external-identifier
-P5453	property_type	external-identifier
-P5454	property_type	external-identifier
-P5455	property_type	external-identifier
-P5456	property_type	external-identifier
-P5457	property_type	external-identifier
-P5458	property_type	external-identifier
-P5459	property_type	external-identifier
-P5460	property_type	item
-P5461	property_type	string
-P5462	property_type	external-identifier
-P5463	property_type	external-identifier
-P5464	property_type	external-identifier
-P5465	property_type	external-identifier
-P5466	property_type	external-identifier
-P5467	property_type	external-identifier
-P5468	property_type	external-identifier
-P5469	property_type	external-identifier
-P5470	property_type	external-identifier
-P5471	property_type	string
-P5473	property_type	external-identifier
-P5474	property_type	quantity
-P5475	property_type	item
-P5476	property_type	external-identifier
-P5477	property_type	external-identifier
-P5478	property_type	external-identifier
-P4339	property_type	external-identifier
-P4340	property_type	external-identifier
-P4341	property_type	quantity
-P4342	property_type	external-identifier
-P4343	property_type	external-identifier
-P4344	property_type	external-identifier
-P4345	property_type	item
-P4346	property_type	external-identifier
-P4347	property_type	external-identifier
-P4348	property_type	external-identifier
-P4349	property_type	external-identifier
-P4350	property_type	quantity
-P4351	property_type	external-identifier
-P4352	property_type	external-identifier
-P4353	property_type	item
-P4354	property_type	string
-P4355	property_type	external-identifier
-P4356	property_type	external-identifier
-P4357	property_type	external-identifier
-P4358	property_type	external-identifier
-P4359	property_type	external-identifier
-P4360	property_type	external-identifier
-P4361	property_type	external-identifier
-P4362	property_type	external-identifier
-P4363	property_type	external-identifier
-P4364	property_type	external-identifier
-P4365	property_type	external-identifier
-P4366	property_type	external-identifier
-P4367	property_type	external-identifier
-P4368	property_type	external-identifier
-P4369	property_type	external-identifier
-P4370	property_type	external-identifier
-P4371	property_type	external-identifier
-P4372	property_type	external-identifier
-P4373	property_type	external-identifier
-P4374	property_type	external-identifier
-P4375	property_type	external-identifier
-P4376	property_type	external-identifier
-P4377	property_type	external-identifier
-P4379	property_type	item
-P4380	property_type	external-identifier
-P4381	property_type	external-identifier
-P4382	property_type	external-identifier
-P4383	property_type	external-identifier
-P4384	property_type	external-identifier
-P4385	property_type	external-identifier
-P4386	property_type	external-identifier
-P4387	property_type	item
-P4388	property_type	external-identifier
-P4389	property_type	external-identifier
-P4390	property_type	item
-P4391	property_type	external-identifier
-P4392	property_type	external-identifier
-P4393	property_type	external-identifier
-P4394	property_type	external-identifier
-P4395	property_type	external-identifier
-P4396	property_type	external-identifier
-P4397	property_type	external-identifier
-P4398	property_type	external-identifier
-P4399	property_type	external-identifier
-P4400	property_type	external-identifier
-P4401	property_type	external-identifier
-P4402	property_type	external-identifier
-P4403	property_type	quantity
-P4404	property_type	external-identifier
-P4405	property_type	external-identifier
-P4406	property_type	external-identifier
-P4407	property_type	external-identifier
-P4408	property_type	external-identifier
-P4409	property_type	external-identifier
-P4410	property_type	external-identifier
-P4411	property_type	external-identifier
-P4412	property_type	external-identifier
-P4413	property_type	external-identifier
-P4414	property_type	external-identifier
-P4415	property_type	external-identifier
-P4416	property_type	external-identifier
-P4417	property_type	external-identifier
-P4418	property_type	external-identifier
-P4419	property_type	external-identifier
-P4421	property_type	external-identifier
-P4422	property_type	external-identifier
-P4423	property_type	external-identifier
-P4424	property_type	item
-P4425	property_type	item
-P4426	property_type	item
-P4427	property_type	external-identifier
-P4428	property_type	item
-P4429	property_type	external-identifier
-P4430	property_type	external-identifier
-P4431	property_type	external-identifier
-P4432	property_type	external-identifier
-P4433	property_type	external-identifier
-P4434	property_type	external-identifier
-P4435	property_type	external-identifier
-P4436	property_type	external-identifier
-P4437	property_type	item
-P4438	property_type	external-identifier
-P4439	property_type	external-identifier
-P4440	property_type	external-identifier
-P4955	property_type	external-identifier
-P4956	property_type	external-identifier
-P4957	property_type	external-identifier
-P4958	property_type	item
-P4959	property_type	external-identifier
-P4960	property_type	external-identifier
-P4961	property_type	external-identifier
-P4962	property_type	external-identifier
-P4963	property_type	external-identifier
-P4964	property_type	external-identifier
-P4965	property_type	external-identifier
-P4966	property_type	external-identifier
-P4967	property_type	item
-P4968	property_type	item
-P4969	property_type	item
-P4970	property_type	string
-P4971	property_type	external-identifier
-P4972	property_type	external-identifier
-P4973	property_type	external-identifier
-P4974	property_type	external-identifier
-P4975	property_type	external-identifier
-P4976	property_type	external-identifier
-P4977	property_type	external-identifier
-P4978	property_type	external-identifier
-P4979	property_type	external-identifier
-P4980	property_type	external-identifier
-P4981	property_type	external-identifier
-P4982	property_type	external-identifier
-P4983	property_type	external-identifier
-P4985	property_type	external-identifier
-P4986	property_type	external-identifier
-P4987	property_type	external-identifier
-P4988	property_type	item
-P4989	property_type	external-identifier
-P4991	property_type	external-identifier
-P4992	property_type	external-identifier
-P4993	property_type	external-identifier
-P4994	property_type	external-identifier
-P4996	property_type	external-identifier
-P4997	property_type	url
-P4998	property_type	string
-P4999	property_type	quantity
-P5001	property_type	external-identifier
-P5002	property_type	external-identifier
-P5003	property_type	external-identifier
-P5004	property_type	item
-P5005	property_type	external-identifier
-P5006	property_type	external-identifier
-P5007	property_type	external-identifier
-P5008	property_type	item
-P5009	property_type	item
-P5010	property_type	external-identifier
-P5011	property_type	external-identifier
-P5012	property_type	item
-P5013	property_type	external-identifier
-P5014	property_type	external-identifier
-P5015	property_type	external-identifier
-P5016	property_type	external-identifier
-P5017	property_type	time
-P5018	property_type	external-identifier
-P5019	property_type	external-identifier
-P5020	property_type	external-identifier
-P5021	property_type	item
-P5022	property_type	quantity
-P5023	property_type	item
-P5024	property_type	item
-P5025	property_type	item
-P5026	property_type	item
-P5027	property_type	quantity
-P5028	property_type	item
-P5029	property_type	external-identifier
-P5030	property_type	item
-P5031	property_type	external-identifier
-P5032	property_type	external-identifier
-P5033	property_type	external-identifier
-P5034	property_type	external-identifier
-P5035	property_type	external-identifier
-P5036	property_type	external-identifier
-P5037	property_type	external-identifier
-P5038	property_type	external-identifier
-P5039	property_type	external-identifier
-P5040	property_type	item
-P5041	property_type	item
-P5042	property_type	item
-P5043	property_type	quantity
-P5044	property_type	quantity
-P5045	property_type	quantity
-P5046	property_type	string
-P5047	property_type	external-identifier
-P5048	property_type	external-identifier
-P5049	property_type	external-identifier
-P5050	property_type	external-identifier
-P5051	property_type	item
-P5052	property_type	item
-P5053	property_type	item
-P5054	property_type	item
-P5055	property_type	external-identifier
-P5056	property_type	item
-P5057	property_type	external-identifier
-P5058	property_type	external-identifier
-P3310	property_type	item
-P3311	property_type	string
-P3314	property_type	external-identifier
-P3315	property_type	external-identifier
-P3316	property_type	external-identifier
-P3318	property_type	external-identifier
-P3320	property_type	item
-P3321	property_type	monolingualtext
-P3322	property_type	external-identifier
-P3323	property_type	item
-P3324	property_type	external-identifier
-P3325	property_type	external-identifier
-P3326	property_type	external-identifier
-P3327	property_type	external-identifier
-P3328	property_type	external-identifier
-P3329	property_type	external-identifier
-P3330	property_type	external-identifier
-P3331	property_type	external-identifier
-P3332	property_type	external-identifier
-P3333	property_type	external-identifier
-P3335	property_type	item
-P3337	property_type	quantity
-P3338	property_type	external-identifier
-P3339	property_type	external-identifier
-P3340	property_type	external-identifier
-P3341	property_type	external-identifier
-P3342	property_type	item
-P3343	property_type	external-identifier
-P3344	property_type	external-identifier
-P3345	property_type	external-identifier
-P3346	property_type	external-identifier
-P3347	property_type	external-identifier
-P3348	property_type	external-identifier
-P3349	property_type	item
-P3350	property_type	external-identifier
-P3351	property_type	external-identifier
-P3352	property_type	external-identifier
-P3353	property_type	external-identifier
-P3354	property_type	item
-P3355	property_type	item
-P3356	property_type	item
-P3357	property_type	item
-P3358	property_type	item
-P3359	property_type	item
-P3360	property_type	external-identifier
-P3361	property_type	external-identifier
-P3362	property_type	quantity
-P3363	property_type	external-identifier
-P3364	property_type	item
-P3365	property_type	external-identifier
-P3366	property_type	external-identifier
-P3367	property_type	external-identifier
-P3368	property_type	external-identifier
-P3370	property_type	external-identifier
-P3371	property_type	external-identifier
-P3372	property_type	external-identifier
-P3373	property_type	item
-P3374	property_type	item
-P3375	property_type	external-identifier
-P3376	property_type	external-identifier
-P3377	property_type	external-identifier
-P3378	property_type	external-identifier
-P3379	property_type	external-identifier
-P3380	property_type	external-identifier
-P3381	property_type	external-identifier
-P3382	property_type	external-identifier
-P3383	property_type	string
-P3385	property_type	external-identifier
-P3386	property_type	external-identifier
-P3387	property_type	quantity
-P3388	property_type	external-identifier
-P3389	property_type	external-identifier
-P3390	property_type	external-identifier
-P3391	property_type	external-identifier
-P3392	property_type	external-identifier
-P3393	property_type	external-identifier
-P3394	property_type	external-identifier
-P3395	property_type	quantity
-P3396	property_type	external-identifier
-P3397	property_type	external-identifier
-P3398	property_type	external-identifier
-P3399	property_type	external-identifier
-P3400	property_type	external-identifier
-P3401	property_type	external-identifier
-P3402	property_type	item
-P3403	property_type	item
-P3404	property_type	external-identifier
-P3405	property_type	external-identifier
-P3406	property_type	external-identifier
-P3407	property_type	external-identifier
-P3408	property_type	external-identifier
-P3409	property_type	external-identifier
-P3410	property_type	external-identifier
-P3411	property_type	external-identifier
-P3412	property_type	external-identifier
-P3413	property_type	external-identifier
-P3414	property_type	external-identifier
-P3415	property_type	item
-P3416	property_type	item
-P3417	property_type	external-identifier
-P1190	property_type	string
-P1191	property_type	time
-P1192	property_type	item
-P1193	property_type	quantity
-P1194	property_type	item
-P1195	property_type	string
-P1196	property_type	item
-P1198	property_type	quantity
-P1199	property_type	item
-P1200	property_type	item
-P1201	property_type	item
-P1202	property_type	item
-P1203	property_type	external-identifier
-P1204	property_type	item
-P1207	property_type	external-identifier
-P1208	property_type	external-identifier
-P1209	property_type	external-identifier
-P1210	property_type	item
-P1211	property_type	item
-P1212	property_type	external-identifier
-P1213	property_type	external-identifier
-P1214	property_type	external-identifier
-P1215	property_type	quantity
-P1216	property_type	external-identifier
-P1217	property_type	external-identifier
-P1218	property_type	external-identifier
-P1219	property_type	external-identifier
-P1220	property_type	external-identifier
-P1221	property_type	item
-P1225	property_type	external-identifier
-P1227	property_type	item
-P1229	property_type	external-identifier
-P1230	property_type	external-identifier
-P1232	property_type	external-identifier
-P1233	property_type	external-identifier
-P1234	property_type	external-identifier
-P1235	property_type	external-identifier
-P1236	property_type	external-identifier
-P1237	property_type	external-identifier
-P1238	property_type	external-identifier
-P1239	property_type	external-identifier
-P1240	property_type	string
-P1241	property_type	external-identifier
-P1242	property_type	external-identifier
-P1243	property_type	external-identifier
-P1245	property_type	external-identifier
-P1246	property_type	external-identifier
-P1247	property_type	quantity
-P1248	property_type	external-identifier
-P1249	property_type	time
-P1250	property_type	external-identifier
-P1251	property_type	external-identifier
-P1252	property_type	external-identifier
-P1253	property_type	external-identifier
-P1254	property_type	external-identifier
-P1255	property_type	external-identifier
-P1256	property_type	external-identifier
-P1257	property_type	string
-P1258	property_type	external-identifier
-P1259	property_type	globe-coordinate
-P1260	property_type	external-identifier
-P1261	property_type	external-identifier
-P1262	property_type	external-identifier
-P1263	property_type	external-identifier
-P1264	property_type	item
-P1265	property_type	external-identifier
-P1266	property_type	external-identifier
-P1267	property_type	external-identifier
-P1268	property_type	item
-P1269	property_type	item
-P1270	property_type	external-identifier
-P1271	property_type	external-identifier
-P1272	property_type	external-identifier
-P1273	property_type	external-identifier
-P1274	property_type	external-identifier
-P1275	property_type	external-identifier
-P1276	property_type	external-identifier
-P1277	property_type	external-identifier
-P1278	property_type	external-identifier
-P1279	property_type	quantity
-P1280	property_type	external-identifier
-P1281	property_type	external-identifier
-P1282	property_type	string
-P1283	property_type	item
-P1284	property_type	external-identifier
-P1285	property_type	external-identifier
-P1286	property_type	external-identifier
-P1287	property_type	external-identifier
-P1288	property_type	external-identifier
-P1289	property_type	external-identifier
-P1290	property_type	item
-P1291	property_type	external-identifier
-P1292	property_type	external-identifier
-P1293	property_type	external-identifier
-P1294	property_type	external-identifier
-P1295	property_type	quantity
-P1296	property_type	external-identifier
-P1297	property_type	external-identifier
-P1299	property_type	item
-P1300	property_type	external-identifier
-P4441	property_type	quantity
-P4442	property_type	quantity
-P4443	property_type	item
-P4444	property_type	item
-P4445	property_type	quantity
-P4446	property_type	item
-P4447	property_type	quantity
-P4448	property_type	quantity
-P4449	property_type	external-identifier
-P4450	property_type	external-identifier
-P4451	property_type	external-identifier
-P4452	property_type	item
-P4453	property_type	external-identifier
-P4454	property_type	external-identifier
-P4455	property_type	external-identifier
-P4456	property_type	external-identifier
-P4457	property_type	external-identifier
-P4458	property_type	external-identifier
-P4459	property_type	external-identifier
-P4460	property_type	external-identifier
-P4461	property_type	external-identifier
-P4462	property_type	external-identifier
-P4463	property_type	external-identifier
-P4464	property_type	external-identifier
-P4465	property_type	external-identifier
-P4466	property_type	external-identifier
-P4467	property_type	external-identifier
-P4468	property_type	external-identifier
-P4469	property_type	external-identifier
-P4470	property_type	external-identifier
-P4471	property_type	external-identifier
-P4472	property_type	external-identifier
-P4473	property_type	external-identifier
-P4474	property_type	external-identifier
-P4475	property_type	external-identifier
-P4476	property_type	external-identifier
-P4477	property_type	external-identifier
-P4478	property_type	external-identifier
-P4479	property_type	external-identifier
-P4480	property_type	external-identifier
-P4481	property_type	external-identifier
-P4482	property_type	external-identifier
-P4483	property_type	external-identifier
-P4484	property_type	external-identifier
-P4485	property_type	external-identifier
-P4486	property_type	external-identifier
-P4487	property_type	external-identifier
-P4488	property_type	external-identifier
-P4489	property_type	external-identifier
-P4490	property_type	external-identifier
-P4491	property_type	external-identifier
-P4493	property_type	external-identifier
-P4494	property_type	external-identifier
-P4495	property_type	external-identifier
-P4496	property_type	string
-P4498	property_type	external-identifier
-P4500	property_type	quantity
-P4501	property_type	quantity
-P4502	property_type	external-identifier
-P4503	property_type	external-identifier
-P4504	property_type	external-identifier
-P4505	property_type	external-identifier
-P4506	property_type	string
-P4507	property_type	external-identifier
-P4508	property_type	external-identifier
-P4509	property_type	external-identifier
-P4510	property_type	item
-P4511	property_type	quantity
-P4512	property_type	external-identifier
-P4513	property_type	external-identifier
-P4514	property_type	external-identifier
-P4515	property_type	external-identifier
-P4516	property_type	external-identifier
-P4517	property_type	external-identifier
-P4518	property_type	external-identifier
-P4519	property_type	quantity
-P4520	property_type	external-identifier
-P4521	property_type	external-identifier
-P4522	property_type	external-identifier
-P4523	property_type	external-identifier
-P4524	property_type	external-identifier
-P4525	property_type	external-identifier
-P4526	property_type	external-identifier
-P4527	property_type	external-identifier
-P4528	property_type	external-identifier
-P4529	property_type	external-identifier
-P4530	property_type	external-identifier
-P4531	property_type	external-identifier
-P4532	property_type	external-identifier
-P4533	property_type	external-identifier
-P4534	property_type	external-identifier
-P4535	property_type	external-identifier
-P4536	property_type	external-identifier
-P4537	property_type	external-identifier
-P4538	property_type	external-identifier
-P4539	property_type	external-identifier
-P4540	property_type	external-identifier
-P4541	property_type	external-identifier
-P4542	property_type	external-identifier
-P4543	property_type	item
-P5479	property_type	quantity
-P5480	property_type	quantity
-P5481	property_type	quantity
-P5483	property_type	quantity
-P5485	property_type	external-identifier
-P5488	property_type	external-identifier
-P5489	property_type	external-identifier
-P5490	property_type	external-identifier
-P5491	property_type	external-identifier
-P5492	property_type	external-identifier
-P5493	property_type	external-identifier
-P5494	property_type	external-identifier
-P5495	property_type	external-identifier
-P5496	property_type	external-identifier
-P5497	property_type	external-identifier
-P5498	property_type	external-identifier
-P5499	property_type	external-identifier
-P5500	property_type	external-identifier
-P5501	property_type	external-identifier
-P5502	property_type	external-identifier
-P5503	property_type	external-identifier
-P5504	property_type	external-identifier
-P5505	property_type	external-identifier
-P5506	property_type	external-identifier
-P5507	property_type	external-identifier
-P5508	property_type	external-identifier
-P5509	property_type	external-identifier
-P5510	property_type	external-identifier
-P5513	property_type	external-identifier
-P5514	property_type	item
-P5515	property_type	external-identifier
-P5516	property_type	external-identifier
-P5517	property_type	external-identifier
-P5518	property_type	string
-P5519	property_type	string
-P5520	property_type	quantity
-P5522	property_type	item
-P5523	property_type	item
-P5524	property_type	quantity
-P5525	property_type	external-identifier
-P5526	property_type	quantity
-P5527	property_type	external-identifier
-P5528	property_type	external-identifier
-P5529	property_type	quantity
-P5530	property_type	external-identifier
-P5531	property_type	external-identifier
-P5532	property_type	external-identifier
-P5533	property_type	external-identifier
-P5534	property_type	external-identifier
-P5535	property_type	external-identifier
-P5536	property_type	external-identifier
-P5537	property_type	item
-P5538	property_type	external-identifier
-P5539	property_type	external-identifier
-P5540	property_type	external-identifier
-P5541	property_type	external-identifier
-P5542	property_type	external-identifier
-P5543	property_type	external-identifier
-P5544	property_type	external-identifier
-P5545	property_type	external-identifier
-P5546	property_type	external-identifier
-P5547	property_type	external-identifier
-P5548	property_type	string
-P5549	property_type	external-identifier
-P5550	property_type	external-identifier
-P5551	property_type	external-identifier
-P5552	property_type	external-identifier
-P5553	property_type	external-identifier
-P5554	property_type	external-identifier
-P5555	property_type	string
-P5556	property_type	external-identifier
-P5557	property_type	external-identifier
-P5558	property_type	external-identifier
-P5559	property_type	external-identifier
-P5560	property_type	item
-P5561	property_type	external-identifier
-P5562	property_type	external-identifier
-P5563	property_type	external-identifier
-P5564	property_type	item
-P5565	property_type	external-identifier
-P5566	property_type	external-identifier
-P5567	property_type	external-identifier
-P5568	property_type	external-identifier
-P5569	property_type	external-identifier
-P5570	property_type	external-identifier
-P5571	property_type	external-identifier
-P5572	property_type	item
-P5573	property_type	external-identifier
-P5574	property_type	external-identifier
-P5575	property_type	quantity
-P5576	property_type	external-identifier
-P5578	property_type	external-identifier
-P5579	property_type	external-identifier
-P5580	property_type	external-identifier
-P5581	property_type	external-identifier
-P5582	property_type	quantity
-P5584	property_type	external-identifier
-P5585	property_type	external-identifier
-P5586	property_type	external-identifier
-P5587	property_type	external-identifier
-P1301	property_type	quantity
-P1302	property_type	item
-P1303	property_type	item
-P1304	property_type	item
-P1305	property_type	external-identifier
-P1307	property_type	external-identifier
-P1308	property_type	item
-P1309	property_type	external-identifier
-P1310	property_type	item
-P1311	property_type	external-identifier
-P1312	property_type	item
-P1313	property_type	item
-P1314	property_type	quantity
-P1315	property_type	external-identifier
-P1316	property_type	external-identifier
-P1317	property_type	time
-P1318	property_type	item
-P1319	property_type	time
-P1320	property_type	external-identifier
-P1321	property_type	item
-P1322	property_type	item
-P1323	property_type	external-identifier
-P1324	property_type	url
-P1325	property_type	url
-P1326	property_type	time
-P1327	property_type	item
-P1329	property_type	string
-P1330	property_type	external-identifier
-P1331	property_type	external-identifier
-P1332	property_type	globe-coordinate
-P1333	property_type	globe-coordinate
-P1334	property_type	globe-coordinate
-P1335	property_type	globe-coordinate
-P1336	property_type	item
-P1338	property_type	external-identifier
-P1339	property_type	quantity
-P1340	property_type	item
-P1341	property_type	external-identifier
-P1342	property_type	quantity
-P1343	property_type	item
-P1344	property_type	item
-P1345	property_type	quantity
-P1346	property_type	item
-P1347	property_type	item
-P1348	property_type	url
-P1349	property_type	item
-P1350	property_type	quantity
-P1351	property_type	quantity
-P1352	property_type	quantity
-P1353	property_type	string
-P1354	property_type	item
-P1355	property_type	quantity
-P1356	property_type	quantity
-P1357	property_type	quantity
-P1358	property_type	quantity
-P1359	property_type	quantity
-P1360	property_type	string
-P1362	property_type	external-identifier
-P1363	property_type	item
-P1364	property_type	external-identifier
-P1365	property_type	item
-P1366	property_type	item
-P1367	property_type	external-identifier
-P1368	property_type	external-identifier
-P1369	property_type	external-identifier
-P1370	property_type	external-identifier
-P1371	property_type	external-identifier
-P1372	property_type	item
-P1373	property_type	quantity
-P1375	property_type	external-identifier
-P1376	property_type	item
-P1377	property_type	external-identifier
-P1378	property_type	external-identifier
-P1380	property_type	external-identifier
-P1381	property_type	external-identifier
-P1382	property_type	item
-P1383	property_type	item
-P1385	property_type	external-identifier
-P1386	property_type	external-identifier
-P1387	property_type	item
-P1388	property_type	external-identifier
-P1389	property_type	item
-P1390	property_type	quantity
-P1391	property_type	external-identifier
-P1392	property_type	external-identifier
-P1393	property_type	item
-P1394	property_type	external-identifier
-P1395	property_type	external-identifier
-P1396	property_type	string
-P1397	property_type	external-identifier
-P1398	property_type	item
-P1399	property_type	item
-P1400	property_type	external-identifier
-P1401	property_type	url
-P1402	property_type	string
-P1403	property_type	item
-P1404	property_type	external-identifier
-P1406	property_type	item
-P1407	property_type	external-identifier
-P1408	property_type	item
-P4544	property_type	external-identifier
-P4545	property_type	item
-P4546	property_type	external-identifier
-P4547	property_type	external-identifier
-P4548	property_type	external-identifier
-P4549	property_type	external-identifier
-P4550	property_type	external-identifier
-P4551	property_type	external-identifier
-P4552	property_type	item
-P4553	property_type	external-identifier
-P4554	property_type	external-identifier
-P4555	property_type	external-identifier
-P4556	property_type	external-identifier
-P4557	property_type	external-identifier
-P4558	property_type	external-identifier
-P4559	property_type	external-identifier
-P4560	property_type	external-identifier
-P4561	property_type	external-identifier
-P4562	property_type	external-identifier
-P4563	property_type	external-identifier
-P4564	property_type	external-identifier
-P4565	property_type	string
-P4566	property_type	time
-P4567	property_type	external-identifier
-P4568	property_type	external-identifier
-P4569	property_type	external-identifier
-P4571	property_type	external-identifier
-P4572	property_type	external-identifier
-P4573	property_type	string
-P4574	property_type	external-identifier
-P4575	property_type	string
-P4576	property_type	external-identifier
-P4577	property_type	external-identifier
-P4578	property_type	external-identifier
-P4579	property_type	external-identifier
-P4580	property_type	external-identifier
-P4581	property_type	external-identifier
-P4582	property_type	external-identifier
-P4583	property_type	external-identifier
-P4584	property_type	item
-P4585	property_type	external-identifier
-P4586	property_type	item
-P4587	property_type	external-identifier
-P4588	property_type	external-identifier
-P4589	property_type	external-identifier
-P4590	property_type	external-identifier
-P4591	property_type	external-identifier
-P4592	property_type	external-identifier
-P4593	property_type	external-identifier
-P4594	property_type	external-identifier
-P4595	property_type	string
-P4596	property_type	external-identifier
-P4597	property_type	external-identifier
-P4598	property_type	external-identifier
-P4599	property_type	item
-P4600	property_type	item
-P4601	property_type	external-identifier
-P4602	property_type	time
-P4604	property_type	external-identifier
-P4605	property_type	external-identifier
-P4606	property_type	external-identifier
-P4607	property_type	external-identifier
-P4608	property_type	item
-P4609	property_type	external-identifier
-P4610	property_type	external-identifier
-P4611	property_type	external-identifier
-P4612	property_type	external-identifier
-P4613	property_type	external-identifier
-P4614	property_type	item
-P4615	property_type	external-identifier
-P4616	property_type	external-identifier
-P4617	property_type	external-identifier
-P4618	property_type	external-identifier
-P4619	property_type	external-identifier
-P4620	property_type	external-identifier
-P4621	property_type	external-identifier
-P4622	property_type	item
-P4623	property_type	external-identifier
-P4624	property_type	item
-P4625	property_type	external-identifier
-P4626	property_type	item
-P4627	property_type	external-identifier
-P4628	property_type	item
-P4629	property_type	external-identifier
-P4630	property_type	external-identifier
-P4631	property_type	external-identifier
-P4632	property_type	external-identifier
-P4633	property_type	string
-P4634	property_type	item
-P4635	property_type	external-identifier
-P4636	property_type	external-identifier
-P4637	property_type	external-identifier
-P4638	property_type	external-identifier
-P4639	property_type	external-identifier
-P4640	property_type	string
-P4641	property_type	external-identifier
-P4642	property_type	external-identifier
-P4643	property_type	external-identifier
-P4644	property_type	external-identifier
-P4645	property_type	external-identifier
-P3418	property_type	external-identifier
-P3419	property_type	external-identifier
-P3420	property_type	external-identifier
-P3421	property_type	external-identifier
-P3422	property_type	external-identifier
-P3423	property_type	external-identifier
-P3424	property_type	external-identifier
-P3425	property_type	external-identifier
-P3426	property_type	external-identifier
-P3427	property_type	external-identifier
-P3428	property_type	item
-P3429	property_type	external-identifier
-P3430	property_type	external-identifier
-P3431	property_type	external-identifier
-P3432	property_type	item
-P3433	property_type	item
-P3434	property_type	external-identifier
-P3435	property_type	external-identifier
-P3436	property_type	external-identifier
-P3437	property_type	item
-P3438	property_type	item
-P3439	property_type	quantity
-P3440	property_type	item
-P3441	property_type	string
-P3442	property_type	external-identifier
-P3443	property_type	external-identifier
-P3444	property_type	external-identifier
-P3445	property_type	external-identifier
-P3446	property_type	external-identifier
-P3447	property_type	item
-P3448	property_type	item
-P3449	property_type	external-identifier
-P3450	property_type	item
-P3451	property_type	string
-P3452	property_type	item
-P3453	property_type	external-identifier
-P3454	property_type	external-identifier
-P3455	property_type	external-identifier
-P3456	property_type	external-identifier
-P3457	property_type	quantity
-P3458	property_type	external-identifier
-P3459	property_type	external-identifier
-P3460	property_type	item
-P3461	property_type	item
-P3462	property_type	external-identifier
-P3463	property_type	external-identifier
-P3464	property_type	item
-P3465	property_type	quantity
-P3466	property_type	external-identifier
-P3467	property_type	external-identifier
-P3468	property_type	external-identifier
-P3469	property_type	external-identifier
-P3470	property_type	external-identifier
-P3471	property_type	external-identifier
-P3472	property_type	external-identifier
-P3473	property_type	external-identifier
-P3475	property_type	external-identifier
-P3476	property_type	external-identifier
-P3477	property_type	external-identifier
-P3478	property_type	external-identifier
-P3479	property_type	external-identifier
-P3480	property_type	external-identifier
-P3481	property_type	external-identifier
-P3482	property_type	external-identifier
-P3483	property_type	external-identifier
-P3485	property_type	quantity
-P3486	property_type	quantity
-P3487	property_type	quantity
-P3488	property_type	quantity
-P3489	property_type	item
-P3490	property_type	item
-P3491	property_type	item
-P3492	property_type	quantity
-P3493	property_type	item
-P3494	property_type	item
-P3495	property_type	external-identifier
-P3496	property_type	item
-P3497	property_type	item
-P3498	property_type	external-identifier
-P3499	property_type	external-identifier
-P3500	property_type	external-identifier
-P3501	property_type	item
-P3502	property_type	external-identifier
-P3503	property_type	external-identifier
-P3504	property_type	external-identifier
-P3505	property_type	external-identifier
-P3506	property_type	external-identifier
-P3507	property_type	external-identifier
-P3509	property_type	external-identifier
-P3511	property_type	external-identifier
-P3512	property_type	item
-P3513	property_type	external-identifier
-P3514	property_type	external-identifier
-P3515	property_type	external-identifier
-P3516	property_type	external-identifier
-P3517	property_type	external-identifier
-P3518	property_type	external-identifier
-P3519	property_type	external-identifier
-P3520	property_type	external-identifier
-P3521	property_type	external-identifier
-P5059	property_type	item
-P5061	property_type	monolingualtext
-P5062	property_type	external-identifier
-P5063	property_type	external-identifier
-P5064	property_type	external-identifier
-P5065	property_type	quantity
-P5066	property_type	quantity
-P5067	property_type	quantity
-P5068	property_type	external-identifier
-P5069	property_type	quantity
-P5070	property_type	item
-P5071	property_type	quantity
-P5072	property_type	item
-P5073	property_type	external-identifier
-P5075	property_type	external-identifier
-P5076	property_type	external-identifier
-P5077	property_type	external-identifier
-P5078	property_type	external-identifier
-P5079	property_type	external-identifier
-P5080	property_type	external-identifier
-P5081	property_type	external-identifier
-P5082	property_type	external-identifier
-P5083	property_type	external-identifier
-P5084	property_type	external-identifier
-P5085	property_type	external-identifier
-P5086	property_type	external-identifier
-P5087	property_type	external-identifier
-P5088	property_type	external-identifier
-P5090	property_type	external-identifier
-P5091	property_type	external-identifier
-P5092	property_type	external-identifier
-P5093	property_type	external-identifier
-P5094	property_type	external-identifier
-P5095	property_type	item
-P5096	property_type	item
-P5097	property_type	external-identifier
-P5098	property_type	external-identifier
-P5099	property_type	external-identifier
-P5101	property_type	external-identifier
-P5102	property_type	item
-P5103	property_type	external-identifier
-P5104	property_type	external-identifier
-P5105	property_type	item
-P5106	property_type	external-identifier
-P5107	property_type	external-identifier
-P5108	property_type	external-identifier
-P5109	property_type	item
-P5110	property_type	item
-P5114	property_type	external-identifier
-P5115	property_type	external-identifier
-P5116	property_type	external-identifier
-P5117	property_type	external-identifier
-P5118	property_type	external-identifier
-P5119	property_type	external-identifier
-P5120	property_type	external-identifier
-P5121	property_type	external-identifier
-P5122	property_type	external-identifier
-P5123	property_type	external-identifier
-P5124	property_type	external-identifier
-P5125	property_type	item
-P5126	property_type	item
-P5127	property_type	external-identifier
-P5128	property_type	external-identifier
-P5129	property_type	external-identifier
-P5131	property_type	item
-P5132	property_type	item
-P5133	property_type	item
-P5134	property_type	item
-P5135	property_type	item
-P5136	property_type	item
-P5137	property_type	item
-P5138	property_type	item
-P5139	property_type	string
-P5140	property_type	globe-coordinate
-P5141	property_type	quantity
-P5142	property_type	external-identifier
-P5143	property_type	external-identifier
-P5144	property_type	external-identifier
-P5145	property_type	external-identifier
-P5146	property_type	external-identifier
-P5147	property_type	external-identifier
-P5148	property_type	external-identifier
-P5149	property_type	external-identifier
-P5150	property_type	item
-P5151	property_type	external-identifier
-P5152	property_type	item
-P5153	property_type	external-identifier
-P5154	property_type	external-identifier
-P5156	property_type	external-identifier
-P5157	property_type	external-identifier
-P5158	property_type	external-identifier
-P5159	property_type	external-identifier
-P5160	property_type	external-identifier
-P5161	property_type	external-identifier
-P5162	property_type	external-identifier
-P5163	property_type	external-identifier
-P5164	property_type	external-identifier
-P5165	property_type	external-identifier
-P5166	property_type	item
-P5167	property_type	quantity
-P5588	property_type	item
-P5589	property_type	item
-P5590	property_type	external-identifier
-P5591	property_type	item
-P5592	property_type	quantity
-P5593	property_type	quantity
-P5594	property_type	quantity
-P5595	property_type	quantity
-P5596	property_type	quantity
-P5597	property_type	external-identifier
-P5598	property_type	external-identifier
-P5599	property_type	external-identifier
-P5600	property_type	external-identifier
-P5601	property_type	external-identifier
-P5602	property_type	external-identifier
-P5603	property_type	external-identifier
-P5604	property_type	external-identifier
-P5605	property_type	external-identifier
-P5606	property_type	item
-P5607	property_type	item
-P5608	property_type	quantity
-P5609	property_type	external-identifier
-P5610	property_type	external-identifier
-P5611	property_type	external-identifier
-P5612	property_type	external-identifier
-P5613	property_type	external-identifier
-P5614	property_type	external-identifier
-P5615	property_type	external-identifier
-P5616	property_type	external-identifier
-P5617	property_type	external-identifier
-P5618	property_type	external-identifier
-P5619	property_type	external-identifier
-P5620	property_type	external-identifier
-P5621	property_type	external-identifier
-P5622	property_type	external-identifier
-P5623	property_type	item
-P5624	property_type	quantity
-P5625	property_type	string
-P5626	property_type	external-identifier
-P5627	property_type	external-identifier
-P5628	property_type	external-identifier
-P5629	property_type	external-identifier
-P5630	property_type	quantity
-P5631	property_type	external-identifier
-P5632	property_type	external-identifier
-P5633	property_type	external-identifier
-P5634	property_type	external-identifier
-P5635	property_type	external-identifier
-P5636	property_type	external-identifier
-P5637	property_type	external-identifier
-P5638	property_type	external-identifier
-P5639	property_type	external-identifier
-P5640	property_type	external-identifier
-P5641	property_type	external-identifier
-P5642	property_type	item
-P5643	property_type	external-identifier
-P5644	property_type	external-identifier
-P5645	property_type	external-identifier
-P5646	property_type	external-identifier
-P5647	property_type	external-identifier
-P5648	property_type	external-identifier
-P5649	property_type	external-identifier
-P5650	property_type	external-identifier
-P5651	property_type	external-identifier
-P5652	property_type	external-identifier
-P5653	property_type	external-identifier
-P5654	property_type	external-identifier
-P5655	property_type	external-identifier
-P5656	property_type	external-identifier
-P5657	property_type	external-identifier
-P5658	property_type	item
-P5659	property_type	external-identifier
-P5661	property_type	external-identifier
-P5662	property_type	external-identifier
-P5663	property_type	external-identifier
-P5664	property_type	external-identifier
-P5665	property_type	external-identifier
-P5666	property_type	external-identifier
-P5667	property_type	external-identifier
-P5668	property_type	quantity
-P5669	property_type	quantity
-P5670	property_type	quantity
-P5672	property_type	quantity
-P5673	property_type	quantity
-P5674	property_type	quantity
-P5675	property_type	quantity
-P5676	property_type	quantity
-P5677	property_type	quantity
-P5678	property_type	quantity
-P5679	property_type	quantity
-P5680	property_type	external-identifier
-P5681	property_type	quantity
-P5682	property_type	quantity
-P5683	property_type	external-identifier
-P5685	property_type	quantity
-P5686	property_type	external-identifier
-P5687	property_type	external-identifier
-P5688	property_type	external-identifier
-P5690	property_type	external-identifier
-P5691	property_type	external-identifier
-P4646	property_type	item
-P4647	property_type	item
-P4649	property_type	item
-P4650	property_type	external-identifier
-P4651	property_type	external-identifier
-P4652	property_type	external-identifier
-P4653	property_type	item
-P4654	property_type	string
-P4655	property_type	external-identifier
-P4656	property_type	url
-P4657	property_type	external-identifier
-P4658	property_type	external-identifier
-P4659	property_type	external-identifier
-P4660	property_type	external-identifier
-P4661	property_type	item
-P4662	property_type	external-identifier
-P4663	property_type	external-identifier
-P4664	property_type	external-identifier
-P4665	property_type	external-identifier
-P4666	property_type	external-identifier
-P4667	property_type	external-identifier
-P4668	property_type	external-identifier
-P4669	property_type	string
-P4670	property_type	external-identifier
-P4671	property_type	external-identifier
-P4672	property_type	external-identifier
-P4673	property_type	external-identifier
-P4674	property_type	external-identifier
-P4675	property_type	item
-P4676	property_type	external-identifier
-P4677	property_type	external-identifier
-P4678	property_type	external-identifier
-P4679	property_type	external-identifier
-P4680	property_type	item
-P4681	property_type	external-identifier
-P4682	property_type	external-identifier
-P4683	property_type	external-identifier
-P4684	property_type	external-identifier
-P4685	property_type	external-identifier
-P4686	property_type	external-identifier
-P4687	property_type	external-identifier
-P4688	property_type	item
-P4689	property_type	external-identifier
-P4690	property_type	external-identifier
-P4691	property_type	external-identifier
-P4692	property_type	external-identifier
-P4693	property_type	external-identifier
-P4694	property_type	external-identifier
-P4695	property_type	external-identifier
-P4696	property_type	external-identifier
-P4697	property_type	external-identifier
-P4698	property_type	external-identifier
-P4699	property_type	external-identifier
-P4700	property_type	external-identifier
-P4701	property_type	external-identifier
-P4702	property_type	external-identifier
-P4703	property_type	external-identifier
-P4704	property_type	external-identifier
-P4705	property_type	external-identifier
-P4706	property_type	external-identifier
-P4707	property_type	external-identifier
-P4708	property_type	external-identifier
-P4709	property_type	external-identifier
-P4710	property_type	external-identifier
-P4711	property_type	external-identifier
-P4712	property_type	external-identifier
-P4713	property_type	external-identifier
-P4714	property_type	quantity
-P4715	property_type	external-identifier
-P4716	property_type	external-identifier
-P4717	property_type	external-identifier
-P4718	property_type	external-identifier
-P4720	property_type	external-identifier
-P4721	property_type	external-identifier
-P4722	property_type	external-identifier
-P4723	property_type	external-identifier
-P4724	property_type	external-identifier
-P4725	property_type	external-identifier
-P4726	property_type	external-identifier
-P4727	property_type	external-identifier
-P4728	property_type	external-identifier
-P4729	property_type	external-identifier
-P4730	property_type	external-identifier
-P4731	property_type	external-identifier
-P4732	property_type	external-identifier
-P4733	property_type	item
-P4734	property_type	external-identifier
-P4735	property_type	external-identifier
-P4736	property_type	external-identifier
-P4737	property_type	external-identifier
-P4738	property_type	external-identifier
-P4739	property_type	external-identifier
-P4740	property_type	external-identifier
-P4741	property_type	external-identifier
-P4742	property_type	external-identifier
-P4743	property_type	item
-P4744	property_type	external-identifier
-P4745	property_type	item
-P4746	property_type	external-identifier
-P4747	property_type	external-identifier
-P3522	property_type	external-identifier
-P3523	property_type	external-identifier
-P3524	property_type	external-identifier
-P3525	property_type	external-identifier
-P3526	property_type	external-identifier
-P3527	property_type	external-identifier
-P3528	property_type	external-identifier
-P3529	property_type	quantity
-P3530	property_type	quantity
-P3531	property_type	external-identifier
-P3532	property_type	external-identifier
-P3533	property_type	external-identifier
-P3534	property_type	external-identifier
-P3535	property_type	external-identifier
-P3536	property_type	external-identifier
-P3537	property_type	external-identifier
-P3538	property_type	external-identifier
-P3539	property_type	external-identifier
-P3541	property_type	external-identifier
-P3542	property_type	external-identifier
-P3544	property_type	external-identifier
-P3545	property_type	external-identifier
-P3546	property_type	external-identifier
-P3547	property_type	external-identifier
-P3548	property_type	external-identifier
-P3549	property_type	external-identifier
-P3550	property_type	external-identifier
-P3551	property_type	external-identifier
-P3552	property_type	external-identifier
-P3553	property_type	external-identifier
-P3554	property_type	external-identifier
-P3555	property_type	external-identifier
-P3556	property_type	external-identifier
-P3557	property_type	external-identifier
-P3558	property_type	external-identifier
-P3559	property_type	quantity
-P3560	property_type	external-identifier
-P3561	property_type	external-identifier
-P3562	property_type	external-identifier
-P3563	property_type	external-identifier
-P3564	property_type	external-identifier
-P3565	property_type	external-identifier
-P3566	property_type	external-identifier
-P3567	property_type	external-identifier
-P3568	property_type	external-identifier
-P3569	property_type	external-identifier
-P3570	property_type	external-identifier
-P3571	property_type	external-identifier
-P3572	property_type	external-identifier
-P3573	property_type	external-identifier
-P3574	property_type	external-identifier
-P3575	property_type	quantity
-P3576	property_type	external-identifier
-P3577	property_type	external-identifier
-P3578	property_type	item
-P3579	property_type	external-identifier
-P3580	property_type	external-identifier
-P3581	property_type	external-identifier
-P3582	property_type	external-identifier
-P3583	property_type	external-identifier
-P3584	property_type	external-identifier
-P3585	property_type	external-identifier
-P3586	property_type	external-identifier
-P3587	property_type	external-identifier
-P3588	property_type	external-identifier
-P3589	property_type	external-identifier
-P3590	property_type	external-identifier
-P3591	property_type	external-identifier
-P3592	property_type	item
-P3593	property_type	external-identifier
-P3594	property_type	external-identifier
-P3595	property_type	external-identifier
-P3596	property_type	external-identifier
-P3597	property_type	external-identifier
-P3598	property_type	external-identifier
-P3599	property_type	external-identifier
-P3600	property_type	external-identifier
-P3601	property_type	external-identifier
-P3602	property_type	item
-P3603	property_type	external-identifier
-P3604	property_type	external-identifier
-P3605	property_type	external-identifier
-P3606	property_type	external-identifier
-P3607	property_type	external-identifier
-P3608	property_type	external-identifier
-P3609	property_type	external-identifier
-P3610	property_type	item
-P3611	property_type	external-identifier
-P3612	property_type	external-identifier
-P3613	property_type	external-identifier
-P3614	property_type	external-identifier
-P3615	property_type	external-identifier
-P3616	property_type	external-identifier
-P3618	property_type	quantity
-P3619	property_type	external-identifier
-P3620	property_type	external-identifier
-P3621	property_type	external-identifier
-P3622	property_type	external-identifier
-P3623	property_type	external-identifier
-P3624	property_type	external-identifier
-P1409	property_type	external-identifier
-P1410	property_type	quantity
-P1411	property_type	item
-P1412	property_type	item
-P1414	property_type	item
-P1415	property_type	external-identifier
-P1416	property_type	item
-P1417	property_type	external-identifier
-P1418	property_type	quantity
-P1419	property_type	item
-P1420	property_type	item
-P1421	property_type	url
-P1422	property_type	external-identifier
-P1423	property_type	item
-P1424	property_type	item
-P1425	property_type	item
-P1427	property_type	item
-P1428	property_type	external-identifier
-P1429	property_type	item
-P1430	property_type	external-identifier
-P1431	property_type	item
-P1433	property_type	item
-P1434	property_type	item
-P1435	property_type	item
-P1436	property_type	quantity
-P1437	property_type	item
-P1438	property_type	string
-P1439	property_type	external-identifier
-P1440	property_type	external-identifier
-P1441	property_type	item
-P1442	property_type	string
-P1443	property_type	item
-P1444	property_type	item
-P1445	property_type	item
-P1446	property_type	quantity
-P1447	property_type	external-identifier
-P1448	property_type	monolingualtext
-P1449	property_type	monolingualtext
-P1450	property_type	monolingualtext
-P1451	property_type	monolingualtext
-P1453	property_type	external-identifier
-P1454	property_type	item
-P1455	property_type	item
-P1456	property_type	item
-P1457	property_type	quantity
-P1458	property_type	quantity
-P1459	property_type	external-identifier
-P1460	property_type	external-identifier
-P1461	property_type	string
-P1462	property_type	item
-P1463	property_type	external-identifier
-P1464	property_type	item
-P1465	property_type	item
-P1466	property_type	external-identifier
-P1467	property_type	external-identifier
-P1468	property_type	external-identifier
-P1469	property_type	external-identifier
-P1470	property_type	quantity
-P1471	property_type	string
-P1472	property_type	string
-P1473	property_type	external-identifier
-P1474	property_type	external-identifier
-P1476	property_type	monolingualtext
-P1477	property_type	monolingualtext
-P1478	property_type	item
-P1479	property_type	item
-P1480	property_type	item
-P1481	property_type	external-identifier
-P1482	property_type	url
-P1483	property_type	external-identifier
-P1529	property_type	external-identifier
-P1531	property_type	item
-P1532	property_type	item
-P1533	property_type	item
-P1534	property_type	item
-P1535	property_type	item
-P1536	property_type	item
-P1537	property_type	item
-P1538	property_type	quantity
-P1539	property_type	quantity
-P1540	property_type	quantity
-P1541	property_type	external-identifier
-P1542	property_type	item
-P1543	property_type	string
-P1544	property_type	external-identifier
-P1545	property_type	string
-P1546	property_type	item
-P1547	property_type	item
-P1548	property_type	quantity
-P1549	property_type	monolingualtext
-P1550	property_type	external-identifier
-P1551	property_type	external-identifier
-P1552	property_type	item
-P1553	property_type	external-identifier
-P1554	property_type	external-identifier
-P1555	property_type	external-identifier
-P1556	property_type	external-identifier
-P1557	property_type	item
-P1558	property_type	item
-P1559	property_type	monolingualtext
-P5168	property_type	monolingualtext
-P5169	property_type	external-identifier
-P5170	property_type	external-identifier
-P5171	property_type	external-identifier
-P5172	property_type	external-identifier
-P5173	property_type	external-identifier
-P5174	property_type	external-identifier
-P5175	property_type	external-identifier
-P5176	property_type	external-identifier
-P5177	property_type	external-identifier
-P5178	property_type	url
-P5179	property_type	external-identifier
-P5180	property_type	external-identifier
-P5181	property_type	external-identifier
-P5182	property_type	external-identifier
-P5183	property_type	external-identifier
-P5184	property_type	external-identifier
-P5185	property_type	item
-P5186	property_type	item
-P5187	property_type	monolingualtext
-P5188	property_type	string
-P5189	property_type	string
-P5191	property_type	string
-P5192	property_type	string
-P5193	property_type	string
-P5194	property_type	item
-P5195	property_type	url
-P5196	property_type	item
-P5197	property_type	external-identifier
-P5198	property_type	external-identifier
-P5199	property_type	external-identifier
-P5200	property_type	external-identifier
-P5201	property_type	item
-P5202	property_type	item
-P5203	property_type	item
-P5204	property_type	time
-P5205	property_type	quantity
-P5206	property_type	item
-P5207	property_type	external-identifier
-P5208	property_type	external-identifier
-P5209	property_type	external-identifier
-P5210	property_type	external-identifier
-P5211	property_type	external-identifier
-P5212	property_type	external-identifier
-P5213	property_type	external-identifier
-P5214	property_type	external-identifier
-P5215	property_type	external-identifier
-P5216	property_type	external-identifier
-P5217	property_type	external-identifier
-P5218	property_type	external-identifier
-P5219	property_type	external-identifier
-P5220	property_type	external-identifier
-P5221	property_type	external-identifier
-P5222	property_type	external-identifier
-P5223	property_type	external-identifier
-P5224	property_type	external-identifier
-P5225	property_type	external-identifier
-P5226	property_type	external-identifier
-P5227	property_type	external-identifier
-P5229	property_type	external-identifier
-P5230	property_type	quantity
-P5231	property_type	external-identifier
-P5232	property_type	external-identifier
-P5233	property_type	external-identifier
-P5234	property_type	external-identifier
-P5235	property_type	external-identifier
-P5236	property_type	item
-P5237	property_type	item
-P5238	property_type	string
-P5239	property_type	external-identifier
-P5240	property_type	external-identifier
-P5241	property_type	external-identifier
-P5242	property_type	external-identifier
-P5243	property_type	external-identifier
-P5244	property_type	item
-P5245	property_type	external-identifier
-P5246	property_type	external-identifier
-P5247	property_type	external-identifier
-P5248	property_type	item
-P5249	property_type	item
-P5250	property_type	external-identifier
-P5251	property_type	external-identifier
-P5252	property_type	string
-P5253	property_type	external-identifier
-P5254	property_type	external-identifier
-P5255	property_type	external-identifier
-P5256	property_type	external-identifier
-P5257	property_type	external-identifier
-P5258	property_type	external-identifier
-P5259	property_type	external-identifier
-P5260	property_type	external-identifier
-P5261	property_type	external-identifier
-P5262	property_type	external-identifier
-P5263	property_type	external-identifier
-P5264	property_type	external-identifier
-P5265	property_type	external-identifier
-P5266	property_type	external-identifier
-P5267	property_type	external-identifier
-P5268	property_type	external-identifier
-P5269	property_type	external-identifier
-P5692	property_type	string
-P5693	property_type	external-identifier
-P5694	property_type	external-identifier
-P5695	property_type	external-identifier
-P5696	property_type	external-identifier
-P5698	property_type	external-identifier
-P5699	property_type	external-identifier
-P5700	property_type	external-identifier
-P5703	property_type	string
-P5704	property_type	external-identifier
-P5705	property_type	external-identifier
-P5706	property_type	quantity
-P5707	property_type	item
-P5708	property_type	quantity
-P5709	property_type	quantity
-P5710	property_type	external-identifier
-P5711	property_type	external-identifier
-P5712	property_type	external-identifier
-P5713	property_type	item
-P5714	property_type	external-identifier
-P5715	property_type	url
-P5716	property_type	external-identifier
-P5717	property_type	external-identifier
-P5718	property_type	external-identifier
-P5719	property_type	external-identifier
-P5720	property_type	external-identifier
-P5721	property_type	external-identifier
-P5722	property_type	external-identifier
-P5723	property_type	external-identifier
-P5724	property_type	external-identifier
-P5725	property_type	external-identifier
-P5726	property_type	external-identifier
-P5727	property_type	external-identifier
-P5731	property_type	external-identifier
-P5732	property_type	external-identifier
-P5733	property_type	external-identifier
-P5734	property_type	external-identifier
-P5735	property_type	external-identifier
-P5736	property_type	external-identifier
-P5737	property_type	external-identifier
-P5738	property_type	external-identifier
-P5739	property_type	external-identifier
-P5740	property_type	external-identifier
-P5742	property_type	external-identifier
-P5743	property_type	external-identifier
-P5744	property_type	external-identifier
-P5745	property_type	external-identifier
-P5746	property_type	external-identifier
-P5747	property_type	external-identifier
-P5748	property_type	external-identifier
-P5749	property_type	external-identifier
-P5750	property_type	external-identifier
-P5752	property_type	external-identifier
-P5753	property_type	item
-P5755	property_type	external-identifier
-P5756	property_type	external-identifier
-P5757	property_type	external-identifier
-P5758	property_type	external-identifier
-P5759	property_type	external-identifier
-P5760	property_type	external-identifier
-P5761	property_type	external-identifier
-P5762	property_type	external-identifier
-P5763	property_type	external-identifier
-P5764	property_type	external-identifier
-P5765	property_type	external-identifier
-P5768	property_type	external-identifier
-P5769	property_type	item
-P5770	property_type	external-identifier
-P5771	property_type	external-identifier
-P5772	property_type	external-identifier
-P5773	property_type	external-identifier
-P5774	property_type	external-identifier
-P5775	property_type	string
-P5776	property_type	external-identifier
-P5777	property_type	external-identifier
-P5778	property_type	string
-P5779	property_type	external-identifier
-P5780	property_type	external-identifier
-P5782	property_type	external-identifier
-P5783	property_type	external-identifier
-P5784	property_type	external-identifier
-P5785	property_type	external-identifier
-P5786	property_type	external-identifier
-P5787	property_type	external-identifier
-P5788	property_type	external-identifier
-P5789	property_type	external-identifier
-P5790	property_type	external-identifier
-P5791	property_type	external-identifier
-P5792	property_type	external-identifier
-P5793	property_type	external-identifier
-P5794	property_type	external-identifier
-P5795	property_type	external-identifier
-P5796	property_type	external-identifier
-P5797	property_type	external-identifier
-P5798	property_type	string
-P5799	property_type	external-identifier
-P5800	property_type	item
-P5801	property_type	external-identifier
-P5802	property_type	item
-P5803	property_type	external-identifier
-P3625	property_type	string
-P3626	property_type	external-identifier
-P3627	property_type	external-identifier
-P3628	property_type	external-identifier
-P3629	property_type	quantity
-P3630	property_type	external-identifier
-P3631	property_type	external-identifier
-P3632	property_type	external-identifier
-P3633	property_type	external-identifier
-P3634	property_type	external-identifier
-P3635	property_type	external-identifier
-P3636	property_type	external-identifier
-P3637	property_type	external-identifier
-P3638	property_type	external-identifier
-P3639	property_type	external-identifier
-P3640	property_type	external-identifier
-P3641	property_type	external-identifier
-P3642	property_type	external-identifier
-P3643	property_type	item
-P3644	property_type	external-identifier
-P3645	property_type	external-identifier
-P3646	property_type	external-identifier
-P3647	property_type	external-identifier
-P3648	property_type	item
-P3650	property_type	item
-P3651	property_type	external-identifier
-P3652	property_type	external-identifier
-P3653	property_type	external-identifier
-P3654	property_type	external-identifier
-P3655	property_type	external-identifier
-P3656	property_type	external-identifier
-P3657	property_type	external-identifier
-P3658	property_type	external-identifier
-P3659	property_type	external-identifier
-P3660	property_type	external-identifier
-P3661	property_type	external-identifier
-P3662	property_type	external-identifier
-P3663	property_type	external-identifier
-P3664	property_type	external-identifier
-P3665	property_type	external-identifier
-P3666	property_type	external-identifier
-P3667	property_type	external-identifier
-P3668	property_type	external-identifier
-P3669	property_type	external-identifier
-P3670	property_type	external-identifier
-P3671	property_type	external-identifier
-P3672	property_type	external-identifier
-P3673	property_type	external-identifier
-P3674	property_type	external-identifier
-P3675	property_type	external-identifier
-P3676	property_type	external-identifier
-P3677	property_type	external-identifier
-P3678	property_type	external-identifier
-P3679	property_type	item
-P3680	property_type	item
-P3681	property_type	external-identifier
-P3682	property_type	external-identifier
-P3683	property_type	external-identifier
-P3684	property_type	external-identifier
-P3685	property_type	external-identifier
-P3686	property_type	external-identifier
-P3687	property_type	external-identifier
-P3689	property_type	external-identifier
-P3690	property_type	external-identifier
-P3691	property_type	external-identifier
-P3692	property_type	external-identifier
-P3693	property_type	external-identifier
-P3694	property_type	external-identifier
-P3695	property_type	external-identifier
-P3696	property_type	external-identifier
-P3697	property_type	external-identifier
-P3698	property_type	external-identifier
-P3699	property_type	external-identifier
-P3700	property_type	external-identifier
-P3701	property_type	item
-P3702	property_type	external-identifier
-P3703	property_type	external-identifier
-P3704	property_type	external-identifier
-P3705	property_type	external-identifier
-P3706	property_type	external-identifier
-P3707	property_type	external-identifier
-P3708	property_type	external-identifier
-P3709	property_type	item
-P3710	property_type	external-identifier
-P3711	property_type	external-identifier
-P3712	property_type	item
-P3713	property_type	item
-P3714	property_type	external-identifier
-P3715	property_type	external-identifier
-P3716	property_type	item
-P3717	property_type	external-identifier
-P3718	property_type	external-identifier
-P3719	property_type	item
-P3720	property_type	external-identifier
-P3721	property_type	string
-P3723	property_type	external-identifier
-P3724	property_type	external-identifier
-P3725	property_type	external-identifier
-P3726	property_type	external-identifier
-P3727	property_type	external-identifier
-P4748	property_type	external-identifier
-P4749	property_type	external-identifier
-P4750	property_type	external-identifier
-P4751	property_type	external-identifier
-P4752	property_type	external-identifier
-P4753	property_type	external-identifier
-P4754	property_type	external-identifier
-P4755	property_type	external-identifier
-P4756	property_type	external-identifier
-P4757	property_type	external-identifier
-P4758	property_type	external-identifier
-P4759	property_type	external-identifier
-P4760	property_type	external-identifier
-P4761	property_type	external-identifier
-P4762	property_type	external-identifier
-P4763	property_type	external-identifier
-P4764	property_type	external-identifier
-P4765	property_type	url
-P4766	property_type	external-identifier
-P4768	property_type	external-identifier
-P4769	property_type	external-identifier
-P4770	property_type	item
-P4771	property_type	external-identifier
-P4772	property_type	external-identifier
-P4773	property_type	external-identifier
-P4774	property_type	item
-P4775	property_type	string
-P4776	property_type	string
-P4777	property_type	item
-P4778	property_type	external-identifier
-P4779	property_type	external-identifier
-P4780	property_type	external-identifier
-P4781	property_type	external-identifier
-P4782	property_type	external-identifier
-P4783	property_type	external-identifier
-P4784	property_type	external-identifier
-P4785	property_type	external-identifier
-P4786	property_type	external-identifier
-P4787	property_type	external-identifier
-P4788	property_type	item
-P4789	property_type	external-identifier
-P4790	property_type	external-identifier
-P4791	property_type	item
-P4792	property_type	item
-P4793	property_type	external-identifier
-P4794	property_type	item
-P4795	property_type	external-identifier
-P4796	property_type	external-identifier
-P4797	property_type	external-identifier
-P4798	property_type	external-identifier
-P4799	property_type	external-identifier
-P4800	property_type	external-identifier
-P4801	property_type	external-identifier
-P4802	property_type	external-identifier
-P4803	property_type	external-identifier
-P4804	property_type	external-identifier
-P4805	property_type	item
-P4806	property_type	external-identifier
-P4807	property_type	external-identifier
-P4808	property_type	external-identifier
-P4809	property_type	item
-P4810	property_type	item
-P4811	property_type	external-identifier
-P4812	property_type	external-identifier
-P4813	property_type	external-identifier
-P4814	property_type	external-identifier
-P4815	property_type	quantity
-P4816	property_type	external-identifier
-P4818	property_type	external-identifier
-P4819	property_type	external-identifier
-P4820	property_type	external-identifier
-P4821	property_type	external-identifier
-P4822	property_type	external-identifier
-P4823	property_type	external-identifier
-P4824	property_type	external-identifier
-P4825	property_type	quantity
-P4826	property_type	quantity
-P4827	property_type	external-identifier
-P4829	property_type	external-identifier
-P4830	property_type	external-identifier
-P4831	property_type	external-identifier
-P4832	property_type	external-identifier
-P4833	property_type	external-identifier
-P4834	property_type	external-identifier
-P4835	property_type	external-identifier
-P4836	property_type	external-identifier
-P4837	property_type	string
-P4838	property_type	external-identifier
-P4839	property_type	external-identifier
-P4840	property_type	external-identifier
-P4841	property_type	quantity
-P4842	property_type	external-identifier
-P4843	property_type	item
-P4844	property_type	item
-P4845	property_type	external-identifier
-P4846	property_type	external-identifier
-P4847	property_type	external-identifier
-P4848	property_type	external-identifier
-P4849	property_type	external-identifier
-P4850	property_type	item
-P5270	property_type	external-identifier
-P5271	property_type	external-identifier
-P5272	property_type	external-identifier
-P5273	property_type	external-identifier
-P5274	property_type	external-identifier
-P5275	property_type	external-identifier
-P5276	property_type	string
-P5277	property_type	item
-P5278	property_type	item
-P5279	property_type	string
-P5280	property_type	item
-P5281	property_type	quantity
-P5282	property_type	url
-P5283	property_type	external-identifier
-P5284	property_type	external-identifier
-P5285	property_type	external-identifier
-P5286	property_type	string
-P5287	property_type	external-identifier
-P5288	property_type	external-identifier
-P5289	property_type	external-identifier
-P5290	property_type	external-identifier
-P5291	property_type	external-identifier
-P5292	property_type	external-identifier
-P5293	property_type	external-identifier
-P5294	property_type	external-identifier
-P5295	property_type	external-identifier
-P5296	property_type	external-identifier
-P5297	property_type	external-identifier
-P5298	property_type	external-identifier
-P5299	property_type	external-identifier
-P5300	property_type	external-identifier
-P5301	property_type	external-identifier
-P5302	property_type	external-identifier
-P5303	property_type	external-identifier
-P5304	property_type	item
-P5305	property_type	url
-P5306	property_type	external-identifier
-P5307	property_type	item
-P5308	property_type	external-identifier
-P5309	property_type	external-identifier
-P5310	property_type	external-identifier
-P5311	property_type	external-identifier
-P5312	property_type	external-identifier
-P5313	property_type	external-identifier
-P5314	property_type	item
-P5315	property_type	external-identifier
-P5316	property_type	external-identifier
-P5317	property_type	item
-P5318	property_type	external-identifier
-P5319	property_type	external-identifier
-P5320	property_type	external-identifier
-P5321	property_type	external-identifier
-P5323	property_type	item
-P5324	property_type	external-identifier
-P5325	property_type	external-identifier
-P5326	property_type	item
-P5327	property_type	external-identifier
-P5328	property_type	item
-P5329	property_type	external-identifier
-P5330	property_type	item
-P5331	property_type	external-identifier
-P5332	property_type	external-identifier
-P5333	property_type	external-identifier
-P5334	property_type	external-identifier
-P5335	property_type	external-identifier
-P5336	property_type	external-identifier
-P5337	property_type	external-identifier
-P5338	property_type	external-identifier
-P5339	property_type	external-identifier
-P5340	property_type	external-identifier
-P5341	property_type	external-identifier
-P5343	property_type	external-identifier
-P5344	property_type	external-identifier
-P5345	property_type	external-identifier
-P5346	property_type	external-identifier
-P5348	property_type	quantity
-P5349	property_type	quantity
-P5350	property_type	string
-P5351	property_type	string
-P5352	property_type	string
-P5353	property_type	item
-P5354	property_type	external-identifier
-P5355	property_type	external-identifier
-P5356	property_type	external-identifier
-P5357	property_type	external-identifier
-P5358	property_type	external-identifier
-P5359	property_type	external-identifier
-P5360	property_type	external-identifier
-P5361	property_type	external-identifier
-P5362	property_type	external-identifier
-P5363	property_type	external-identifier
-P5364	property_type	external-identifier
-P5365	property_type	external-identifier
-P5366	property_type	external-identifier
-P5368	property_type	external-identifier
-P5369	property_type	external-identifier
-P5370	property_type	external-identifier
-P5371	property_type	external-identifier
-P5372	property_type	external-identifier
-P5373	property_type	external-identifier
-P1560	property_type	item
-P1561	property_type	quantity
-P1562	property_type	external-identifier
-P1563	property_type	external-identifier
-P1564	property_type	external-identifier
-P1565	property_type	external-identifier
-P1566	property_type	external-identifier
-P1567	property_type	external-identifier
-P1568	property_type	item
-P1571	property_type	item
-P1573	property_type	external-identifier
-P1574	property_type	item
-P1575	property_type	external-identifier
-P1576	property_type	item
-P1577	property_type	external-identifier
-P1578	property_type	external-identifier
-P1579	property_type	external-identifier
-P1580	property_type	external-identifier
-P1581	property_type	url
-P1582	property_type	item
-P1583	property_type	external-identifier
-P1584	property_type	external-identifier
-P1585	property_type	external-identifier
-P1586	property_type	external-identifier
-P1587	property_type	external-identifier
-P1588	property_type	string
-P1589	property_type	item
-P1590	property_type	quantity
-P1591	property_type	item
-P1592	property_type	item
-P1593	property_type	item
-P1594	property_type	item
-P1595	property_type	item
-P1596	property_type	item
-P1598	property_type	item
-P1599	property_type	external-identifier
-P1600	property_type	external-identifier
-P1601	property_type	external-identifier
-P1602	property_type	external-identifier
-P1603	property_type	quantity
-P1604	property_type	item
-P1605	property_type	item
-P1606	property_type	item
-P1607	property_type	external-identifier
-P1608	property_type	external-identifier
-P1609	property_type	external-identifier
-P1610	property_type	external-identifier
-P1611	property_type	item
-P1612	property_type	string
-P1613	property_type	url
-P1614	property_type	external-identifier
-P1615	property_type	external-identifier
-P1616	property_type	external-identifier
-P1617	property_type	external-identifier
-P1618	property_type	string
-P1619	property_type	time
-P1620	property_type	item
-P1621	property_type	string
-P1622	property_type	item
-P1624	property_type	external-identifier
-P1625	property_type	item
-P1626	property_type	external-identifier
-P1627	property_type	external-identifier
-P1628	property_type	url
-P1629	property_type	item
-P1630	property_type	string
-P1631	property_type	external-identifier
-P1632	property_type	external-identifier
-P1635	property_type	monolingualtext
-P1636	property_type	time
-P1637	property_type	item
-P1638	property_type	monolingualtext
-P1639	property_type	item
-P1640	property_type	item
-P1641	property_type	quantity
-P1642	property_type	item
-P1643	property_type	item
-P1644	property_type	external-identifier
-P1645	property_type	external-identifier
-P1647	property_type	string
-P1648	property_type	external-identifier
-P1649	property_type	external-identifier
-P1650	property_type	external-identifier
-P1651	property_type	external-identifier
-P1652	property_type	item
-P1653	property_type	external-identifier
-P1654	property_type	item
-P1656	property_type	item
-P1657	property_type	item
-P1659	property_type	string
-P1660	property_type	item
-P1661	property_type	quantity
-P1662	property_type	external-identifier
-P1663	property_type	external-identifier
-P1664	property_type	external-identifier
-P1665	property_type	external-identifier
-P1666	property_type	external-identifier
-P1667	property_type	external-identifier
-P1668	property_type	external-identifier
-P1669	property_type	external-identifier
-P3728	property_type	external-identifier
-P3729	property_type	item
-P3730	property_type	item
-P3731	property_type	external-identifier
-P3732	property_type	external-identifier
-P3733	property_type	external-identifier
-P3734	property_type	item
-P3735	property_type	external-identifier
-P3736	property_type	external-identifier
-P3737	property_type	quantity
-P3738	property_type	quantity
-P3739	property_type	item
-P3740	property_type	quantity
-P3741	property_type	item
-P3742	property_type	external-identifier
-P3743	property_type	external-identifier
-P3744	property_type	quantity
-P3745	property_type	external-identifier
-P3746	property_type	external-identifier
-P3747	property_type	external-identifier
-P3748	property_type	external-identifier
-P3749	property_type	external-identifier
-P3750	property_type	external-identifier
-P3751	property_type	external-identifier
-P3752	property_type	string
-P3753	property_type	string
-P3754	property_type	string
-P3755	property_type	string
-P3756	property_type	string
-P3757	property_type	string
-P3758	property_type	external-identifier
-P3759	property_type	external-identifier
-P3760	property_type	external-identifier
-P3761	property_type	string
-P3762	property_type	external-identifier
-P3763	property_type	external-identifier
-P3764	property_type	item
-P3765	property_type	external-identifier
-P3766	property_type	external-identifier
-P3767	property_type	external-identifier
-P3768	property_type	external-identifier
-P3769	property_type	external-identifier
-P3770	property_type	external-identifier
-P3771	property_type	item
-P3772	property_type	item
-P3773	property_type	item
-P3774	property_type	item
-P3775	property_type	item
-P3776	property_type	item
-P3777	property_type	item
-P3778	property_type	item
-P3779	property_type	item
-P3780	property_type	item
-P3781	property_type	item
-P3782	property_type	external-identifier
-P3783	property_type	external-identifier
-P3784	property_type	external-identifier
-P3785	property_type	external-identifier
-P3786	property_type	external-identifier
-P3787	property_type	external-identifier
-P3788	property_type	external-identifier
-P3789	property_type	external-identifier
-P3790	property_type	external-identifier
-P3791	property_type	external-identifier
-P3792	property_type	quantity
-P3793	property_type	string
-P3794	property_type	external-identifier
-P3795	property_type	external-identifier
-P3796	property_type	external-identifier
-P3797	property_type	external-identifier
-P3798	property_type	external-identifier
-P3799	property_type	external-identifier
-P3800	property_type	external-identifier
-P3801	property_type	external-identifier
-P3802	property_type	external-identifier
-P3803	property_type	item
-P3804	property_type	external-identifier
-P3805	property_type	external-identifier
-P3806	property_type	external-identifier
-P3807	property_type	external-identifier
-P3808	property_type	external-identifier
-P3809	property_type	external-identifier
-P3810	property_type	external-identifier
-P3811	property_type	external-identifier
-P3812	property_type	external-identifier
-P3813	property_type	external-identifier
-P3814	property_type	external-identifier
-P3815	property_type	item
-P3816	property_type	item
-P3817	property_type	external-identifier
-P3818	property_type	item
-P3819	property_type	external-identifier
-P3820	property_type	external-identifier
-P3821	property_type	external-identifier
-P3822	property_type	item
-P3823	property_type	item
-P3824	property_type	external-identifier
-P3825	property_type	external-identifier
-P3826	property_type	external-identifier
-P3827	property_type	external-identifier
-P5804	property_type	item
-P5805	property_type	item
-P5806	property_type	external-identifier
-P5807	property_type	external-identifier
-P5808	property_type	external-identifier
-P5809	property_type	external-identifier
-P5810	property_type	string
-P5811	property_type	quantity
-P5813	property_type	external-identifier
-P5814	property_type	external-identifier
-P5815	property_type	external-identifier
-P5816	property_type	item
-P5817	property_type	item
-P5818	property_type	external-identifier
-P5819	property_type	external-identifier
-P5820	property_type	external-identifier
-P5821	property_type	external-identifier
-P5822	property_type	quantity
-P5823	property_type	external-identifier
-P5824	property_type	item
-P5825	property_type	string
-P5826	property_type	item
-P5827	property_type	external-identifier
-P5828	property_type	item
-P5829	property_type	external-identifier
-P5830	property_type	string
-P5831	property_type	monolingualtext
-P5832	property_type	item
-P5833	property_type	external-identifier
-P5834	property_type	external-identifier
-P5835	property_type	external-identifier
-P5836	property_type	external-identifier
-P5838	property_type	external-identifier
-P5839	property_type	external-identifier
-P5840	property_type	external-identifier
-P5841	property_type	item
-P5842	property_type	external-identifier
-P5843	property_type	external-identifier
-P5844	property_type	external-identifier
-P5845	property_type	external-identifier
-P5846	property_type	external-identifier
-P5847	property_type	external-identifier
-P5848	property_type	external-identifier
-P5849	property_type	external-identifier
-P5851	property_type	external-identifier
-P5852	property_type	item
-P5858	property_type	string
-P5859	property_type	external-identifier
-P5860	property_type	external-identifier
-P5862	property_type	external-identifier
-P5863	property_type	external-identifier
-P5864	property_type	external-identifier
-P5865	property_type	external-identifier
-P5866	property_type	external-identifier
-P5867	property_type	external-identifier
-P5868	property_type	external-identifier
-P5869	property_type	item
-P5870	property_type	external-identifier
-P5871	property_type	external-identifier
-P5872	property_type	item
-P5873	property_type	item
-P5874	property_type	external-identifier
-P5875	property_type	external-identifier
-P5876	property_type	external-identifier
-P5877	property_type	external-identifier
-P5878	property_type	string
-P5879	property_type	external-identifier
-P5880	property_type	item
-P5881	property_type	item
-P5882	property_type	external-identifier
-P5883	property_type	external-identifier
-P5884	property_type	external-identifier
-P5885	property_type	external-identifier
-P5886	property_type	item
-P5887	property_type	external-identifier
-P5888	property_type	external-identifier
-P5890	property_type	external-identifier
-P5891	property_type	external-identifier
-P5892	property_type	external-identifier
-P5893	property_type	quantity
-P5894	property_type	quantity
-P5895	property_type	quantity
-P5896	property_type	quantity
-P5897	property_type	quantity
-P5898	property_type	quantity
-P5899	property_type	quantity
-P5900	property_type	quantity
-P5901	property_type	string
-P5902	property_type	external-identifier
-P5903	property_type	external-identifier
-P5904	property_type	external-identifier
-P5905	property_type	external-identifier
-P5906	property_type	external-identifier
-P5908	property_type	external-identifier
-P5909	property_type	external-identifier
-P5910	property_type	string
-P5911	property_type	item
-P5912	property_type	external-identifier
-P5913	property_type	item
-P5914	property_type	external-identifier
-P1670	property_type	external-identifier
-P1671	property_type	string
-P1672	property_type	item
-P1673	property_type	string
-P1674	property_type	quantity
-P1675	property_type	quantity
-P1676	property_type	quantity
-P1677	property_type	item
-P1678	property_type	item
-P1679	property_type	external-identifier
-P1680	property_type	monolingualtext
-P1683	property_type	monolingualtext
-P1684	property_type	monolingualtext
-P1685	property_type	string
-P1686	property_type	item
-P1687	property_type	string
-P1689	property_type	quantity
-P1690	property_type	external-identifier
-P1691	property_type	external-identifier
-P1692	property_type	string
-P1693	property_type	external-identifier
-P1694	property_type	external-identifier
-P1695	property_type	external-identifier
-P1696	property_type	string
-P1697	property_type	quantity
-P1699	property_type	external-identifier
-P1700	property_type	external-identifier
-P1702	property_type	external-identifier
-P1703	property_type	item
-P1704	property_type	item
-P1705	property_type	monolingualtext
-P1706	property_type	item
-P1707	property_type	external-identifier
-P1708	property_type	external-identifier
-P1709	property_type	url
-P1710	property_type	external-identifier
-P1711	property_type	external-identifier
-P1712	property_type	external-identifier
-P1713	property_type	url
-P1714	property_type	external-identifier
-P1715	property_type	external-identifier
-P1716	property_type	item
-P1717	property_type	external-identifier
-P1721	property_type	string
-P1725	property_type	quantity
-P1726	property_type	external-identifier
-P1727	property_type	external-identifier
-P1728	property_type	external-identifier
-P1729	property_type	external-identifier
-P1730	property_type	external-identifier
-P1731	property_type	item
-P1732	property_type	external-identifier
-P1733	property_type	external-identifier
-P1734	property_type	time
-P1735	property_type	external-identifier
-P1736	property_type	external-identifier
-P1738	property_type	external-identifier
-P1739	property_type	external-identifier
-P1740	property_type	item
-P1741	property_type	external-identifier
-P1743	property_type	external-identifier
-P1744	property_type	external-identifier
-P1745	property_type	external-identifier
-P1746	property_type	external-identifier
-P1747	property_type	external-identifier
-P1748	property_type	string
-P1749	property_type	external-identifier
-P1750	property_type	item
-P1751	property_type	external-identifier
-P1752	property_type	quantity
-P1753	property_type	item
-P1754	property_type	item
-P1755	property_type	external-identifier
-P1760	property_type	external-identifier
-P1761	property_type	external-identifier
-P1762	property_type	string
-P1763	property_type	external-identifier
-P1764	property_type	external-identifier
-P1766	property_type	string
-P1769	property_type	external-identifier
-P1770	property_type	external-identifier
-P1771	property_type	external-identifier
-P1772	property_type	external-identifier
-P1774	property_type	item
-P1775	property_type	item
-P1776	property_type	item
-P1777	property_type	item
-P1778	property_type	item
-P1779	property_type	item
-P1780	property_type	item
-P1782	property_type	string
-P1785	property_type	string
-P1786	property_type	string
-P1787	property_type	string
-P1788	property_type	external-identifier
-P1789	property_type	item
-P1791	property_type	item
-P1792	property_type	item
-P1793	property_type	string
-P1794	property_type	external-identifier
-P7486	property_type	item
-P7487	property_type	external-identifier
-P7488	property_type	external-identifier
-P7489	property_type	external-identifier
-P7490	property_type	external-identifier
-P7491	property_type	external-identifier
-P7492	property_type	external-identifier
-P7493	property_type	external-identifier
-P7494	property_type	external-identifier
-P7495	property_type	external-identifier
-P7496	property_type	external-identifier
-P7497	property_type	external-identifier
-P7498	property_type	external-identifier
-P7499	property_type	external-identifier
-P7500	property_type	item
-P7501	property_type	item
-P7502	property_type	external-identifier
-P7503	property_type	external-identifier
-P7504	property_type	external-identifier
-P7505	property_type	external-identifier
-P7506	property_type	external-identifier
-P7507	property_type	external-identifier
-P7508	property_type	item
-P7509	property_type	external-identifier
-P7510	property_type	url
-P7511	property_type	external-identifier
-P7512	property_type	external-identifier
-P7513	property_type	external-identifier
-P7514	property_type	item
-P7515	property_type	external-identifier
-P7516	property_type	external-identifier
-P7517	property_type	external-identifier
-P7518	property_type	external-identifier
-P7519	property_type	external-identifier
-P7520	property_type	external-identifier
-P7521	property_type	external-identifier
-P7522	property_type	external-identifier
-P7523	property_type	external-identifier
-P7524	property_type	external-identifier
-P7525	property_type	external-identifier
-P7526	property_type	external-identifier
-P7527	property_type	quantity
-P7528	property_type	item
-P7529	property_type	external-identifier
-P7530	property_type	external-identifier
-P7531	property_type	external-identifier
-P7532	property_type	string
-P7533	property_type	external-identifier
-P7534	property_type	external-identifier
-P7535	property_type	monolingualtext
-P7536	property_type	external-identifier
-P7537	property_type	external-identifier
-P7538	property_type	external-identifier
-P7539	property_type	external-identifier
-P7540	property_type	external-identifier
-P7541	property_type	external-identifier
-P7542	property_type	external-identifier
-P7543	property_type	external-identifier
-P7544	property_type	external-identifier
-P7545	property_type	external-identifier
-P7546	property_type	external-identifier
-P7547	property_type	external-identifier
-P7548	property_type	external-identifier
-P7549	property_type	external-identifier
-P7550	property_type	external-identifier
-P7551	property_type	external-identifier
-P7552	property_type	external-identifier
-P7553	property_type	external-identifier
-P7554	property_type	external-identifier
-P7555	property_type	external-identifier
-P7556	property_type	external-identifier
-P7558	property_type	external-identifier
-P7559	property_type	external-identifier
-P7560	property_type	external-identifier
-P7561	property_type	item
-P7562	property_type	external-identifier
-P7563	property_type	external-identifier
-P7564	property_type	external-identifier
-P7565	property_type	external-identifier
-P7566	property_type	external-identifier
-P7567	property_type	external-identifier
-P7568	property_type	external-identifier
-P7569	property_type	url
-P7570	property_type	external-identifier
-P7571	property_type	external-identifier
-P7572	property_type	external-identifier
-P7573	property_type	item
-P7574	property_type	external-identifier
-P7575	property_type	external-identifier
-P7576	property_type	external-identifier
-P7577	property_type	external-identifier
-P7578	property_type	external-identifier
-P7579	property_type	external-identifier
-P7580	property_type	external-identifier
-P7581	property_type	string
-P7582	property_type	item
-P7583	property_type	external-identifier
-P7584	property_type	quantity
-P7585	property_type	external-identifier
-P7586	property_type	external-identifier
-P3828	property_type	item
-P3829	property_type	external-identifier
-P3830	property_type	external-identifier
-P3831	property_type	item
-P3832	property_type	external-identifier
-P3833	property_type	item
-P3834	property_type	item
-P3835	property_type	external-identifier
-P3836	property_type	external-identifier
-P3837	property_type	external-identifier
-P3838	property_type	external-identifier
-P3839	property_type	external-identifier
-P3840	property_type	quantity
-P3841	property_type	external-identifier
-P3842	property_type	item
-P3843	property_type	external-identifier
-P3844	property_type	external-identifier
-P3845	property_type	external-identifier
-P3846	property_type	external-identifier
-P3847	property_type	external-identifier
-P3848	property_type	external-identifier
-P3849	property_type	external-identifier
-P3850	property_type	external-identifier
-P3851	property_type	external-identifier
-P3852	property_type	external-identifier
-P3853	property_type	external-identifier
-P3854	property_type	external-identifier
-P3855	property_type	external-identifier
-P3856	property_type	external-identifier
-P3857	property_type	external-identifier
-P3858	property_type	item
-P3859	property_type	external-identifier
-P3860	property_type	external-identifier
-P3861	property_type	external-identifier
-P3862	property_type	external-identifier
-P3863	property_type	external-identifier
-P3864	property_type	quantity
-P3865	property_type	item
-P3866	property_type	external-identifier
-P3867	property_type	external-identifier
-P3868	property_type	external-identifier
-P3869	property_type	external-identifier
-P3870	property_type	external-identifier
-P3871	property_type	item
-P3872	property_type	quantity
-P3874	property_type	external-identifier
-P3875	property_type	external-identifier
-P3876	property_type	item
-P3877	property_type	external-identifier
-P3878	property_type	string
-P3879	property_type	string
-P3880	property_type	string
-P3881	property_type	external-identifier
-P3882	property_type	external-identifier
-P3883	property_type	external-identifier
-P3884	property_type	external-identifier
-P3885	property_type	external-identifier
-P3886	property_type	quantity
-P3887	property_type	external-identifier
-P3888	property_type	external-identifier
-P3889	property_type	external-identifier
-P3890	property_type	external-identifier
-P3891	property_type	quantity
-P3892	property_type	external-identifier
-P3893	property_type	time
-P3894	property_type	external-identifier
-P3895	property_type	external-identifier
-P3896	property_type	string
-P3897	property_type	external-identifier
-P3898	property_type	external-identifier
-P3899	property_type	external-identifier
-P3900	property_type	external-identifier
-P3901	property_type	external-identifier
-P3902	property_type	item
-P3903	property_type	string
-P3904	property_type	external-identifier
-P3906	property_type	external-identifier
-P3907	property_type	external-identifier
-P3908	property_type	external-identifier
-P3909	property_type	monolingualtext
-P3910	property_type	external-identifier
-P3911	property_type	external-identifier
-P3912	property_type	item
-P3913	property_type	external-identifier
-P3914	property_type	external-identifier
-P3915	property_type	external-identifier
-P3916	property_type	external-identifier
-P3917	property_type	quantity
-P3918	property_type	external-identifier
-P3919	property_type	item
-P3920	property_type	external-identifier
-P3921	property_type	string
-P3922	property_type	string
-P3923	property_type	external-identifier
-P3924	property_type	external-identifier
-P3925	property_type	external-identifier
-P3926	property_type	external-identifier
-P3927	property_type	external-identifier
-P3928	property_type	external-identifier
-P3929	property_type	external-identifier
-P6437	property_type	item
-P6438	property_type	quantity
-P6439	property_type	item
-P6440	property_type	item
-P6441	property_type	external-identifier
-P6442	property_type	external-identifier
-P6443	property_type	external-identifier
-P6444	property_type	external-identifier
-P6445	property_type	external-identifier
-P6446	property_type	external-identifier
-P6447	property_type	external-identifier
-P6448	property_type	external-identifier
-P6449	property_type	external-identifier
-P6450	property_type	external-identifier
-P6451	property_type	external-identifier
-P6452	property_type	item
-P6453	property_type	external-identifier
-P6454	property_type	external-identifier
-P6455	property_type	external-identifier
-P6456	property_type	external-identifier
-P6457	property_type	external-identifier
-P6458	property_type	external-identifier
-P6459	property_type	external-identifier
-P6460	property_type	external-identifier
-P6461	property_type	external-identifier
-P6462	property_type	external-identifier
-P6463	property_type	external-identifier
-P6464	property_type	external-identifier
-P6465	property_type	external-identifier
-P6466	property_type	external-identifier
-P6467	property_type	external-identifier
-P6468	property_type	external-identifier
-P6469	property_type	external-identifier
-P6470	property_type	external-identifier
-P6471	property_type	external-identifier
-P6472	property_type	external-identifier
-P6473	property_type	external-identifier
-P6474	property_type	external-identifier
-P6475	property_type	external-identifier
-P6476	property_type	external-identifier
-P6477	property_type	item
-P6478	property_type	external-identifier
-P6479	property_type	external-identifier
-P6480	property_type	external-identifier
-P6481	property_type	external-identifier
-P6482	property_type	external-identifier
-P6483	property_type	external-identifier
-P6484	property_type	external-identifier
-P6485	property_type	external-identifier
-P6486	property_type	external-identifier
-P6487	property_type	external-identifier
-P6488	property_type	external-identifier
-P6489	property_type	external-identifier
-P6490	property_type	external-identifier
-P6491	property_type	external-identifier
-P6492	property_type	external-identifier
-P6493	property_type	external-identifier
-P6494	property_type	external-identifier
-P6495	property_type	external-identifier
-P6496	property_type	external-identifier
-P6497	property_type	quantity
-P6498	property_type	quantity
-P6499	property_type	quantity
-P6500	property_type	url
-P6501	property_type	external-identifier
-P6502	property_type	external-identifier
-P6503	property_type	external-identifier
-P6504	property_type	external-identifier
-P6506	property_type	external-identifier
-P6507	property_type	string
-P6509	property_type	quantity
-P6510	property_type	quantity
-P6512	property_type	external-identifier
-P6513	property_type	external-identifier
-P6514	property_type	external-identifier
-P6515	property_type	external-identifier
-P6516	property_type	external-identifier
-P6517	property_type	external-identifier
-P6518	property_type	external-identifier
-P6519	property_type	external-identifier
-P6520	property_type	external-identifier
-P6521	property_type	external-identifier
-P6524	property_type	item
-P6525	property_type	external-identifier
-P6526	property_type	external-identifier
-P6527	property_type	external-identifier
-P6528	property_type	external-identifier
-P6529	property_type	string
-P6530	property_type	item
-P6531	property_type	item
-P6532	property_type	item
-P6533	property_type	item
-P6534	property_type	item
-P6535	property_type	external-identifier
-P6536	property_type	external-identifier
-P6537	property_type	external-identifier
-P6538	property_type	external-identifier
-P6539	property_type	external-identifier
-P6540	property_type	item
-P6541	property_type	url
-P3930	property_type	external-identifier
-P3931	property_type	item
-P3932	property_type	external-identifier
-P3933	property_type	external-identifier
-P3934	property_type	quantity
-P3935	property_type	external-identifier
-P3936	property_type	external-identifier
-P3937	property_type	external-identifier
-P3938	property_type	item
-P3939	property_type	external-identifier
-P3940	property_type	external-identifier
-P3941	property_type	external-identifier
-P3942	property_type	external-identifier
-P3943	property_type	external-identifier
-P3944	property_type	external-identifier
-P3945	property_type	external-identifier
-P3946	property_type	external-identifier
-P3948	property_type	external-identifier
-P3949	property_type	external-identifier
-P3950	property_type	url
-P3951	property_type	external-identifier
-P3952	property_type	external-identifier
-P3953	property_type	external-identifier
-P3954	property_type	external-identifier
-P3955	property_type	external-identifier
-P3956	property_type	external-identifier
-P3957	property_type	external-identifier
-P3958	property_type	external-identifier
-P3959	property_type	external-identifier
-P3960	property_type	external-identifier
-P3961	property_type	external-identifier
-P3962	property_type	external-identifier
-P3963	property_type	external-identifier
-P3964	property_type	external-identifier
-P3965	property_type	external-identifier
-P3966	property_type	item
-P3967	property_type	item
-P3968	property_type	external-identifier
-P3969	property_type	item
-P3970	property_type	string
-P3971	property_type	external-identifier
-P3972	property_type	external-identifier
-P3973	property_type	external-identifier
-P3974	property_type	external-identifier
-P3975	property_type	item
-P3976	property_type	external-identifier
-P3977	property_type	external-identifier
-P3978	property_type	external-identifier
-P3979	property_type	external-identifier
-P3980	property_type	external-identifier
-P3981	property_type	external-identifier
-P3982	property_type	external-identifier
-P3983	property_type	quantity
-P3984	property_type	external-identifier
-P3985	property_type	item
-P3986	property_type	external-identifier
-P3987	property_type	external-identifier
-P3988	property_type	external-identifier
-P3989	property_type	item
-P3990	property_type	external-identifier
-P3991	property_type	external-identifier
-P3992	property_type	external-identifier
-P3993	property_type	external-identifier
-P3994	property_type	string
-P3995	property_type	external-identifier
-P3996	property_type	external-identifier
-P3997	property_type	external-identifier
-P3998	property_type	external-identifier
-P3999	property_type	time
-P4000	property_type	item
-P4001	property_type	url
-P4002	property_type	item
-P4003	property_type	external-identifier
-P4004	property_type	string
-P4005	property_type	external-identifier
-P4006	property_type	item
-P4007	property_type	external-identifier
-P4008	property_type	external-identifier
-P4009	property_type	external-identifier
-P4010	property_type	quantity
-P4011	property_type	external-identifier
-P4012	property_type	external-identifier
-P4013	property_type	external-identifier
-P4014	property_type	external-identifier
-P4015	property_type	external-identifier
-P4016	property_type	external-identifier
-P4017	property_type	external-identifier
-P4018	property_type	external-identifier
-P4019	property_type	external-identifier
-P4020	property_type	string
-P4021	property_type	external-identifier
-P4022	property_type	external-identifier
-P4023	property_type	external-identifier
-P4024	property_type	external-identifier
-P4025	property_type	external-identifier
-P4026	property_type	external-identifier
-P4027	property_type	external-identifier
-P4028	property_type	external-identifier
-P4029	property_type	external-identifier
-P4030	property_type	external-identifier
-P5915	property_type	external-identifier
-P5916	property_type	external-identifier
-P5917	property_type	external-identifier
-P5918	property_type	external-identifier
-P5920	property_type	string
-P5921	property_type	external-identifier
-P5922	property_type	external-identifier
-P5923	property_type	item
-P5925	property_type	external-identifier
-P5926	property_type	external-identifier
-P5927	property_type	external-identifier
-P5928	property_type	external-identifier
-P5929	property_type	quantity
-P5930	property_type	external-identifier
-P5931	property_type	external-identifier
-P5932	property_type	external-identifier
-P5933	property_type	external-identifier
-P5934	property_type	external-identifier
-P5935	property_type	external-identifier
-P5936	property_type	external-identifier
-P5937	property_type	external-identifier
-P5938	property_type	external-identifier
-P5939	property_type	external-identifier
-P5940	property_type	item
-P5941	property_type	external-identifier
-P5942	property_type	external-identifier
-P5944	property_type	external-identifier
-P5945	property_type	external-identifier
-P5946	property_type	external-identifier
-P5947	property_type	quantity
-P5948	property_type	external-identifier
-P5949	property_type	string
-P5950	property_type	external-identifier
-P5951	property_type	external-identifier
-P5952	property_type	external-identifier
-P5953	property_type	external-identifier
-P5954	property_type	external-identifier
-P5955	property_type	external-identifier
-P5956	property_type	external-identifier
-P5957	property_type	external-identifier
-P5958	property_type	external-identifier
-P5959	property_type	external-identifier
-P5960	property_type	external-identifier
-P5961	property_type	item
-P5962	property_type	string
-P5963	property_type	external-identifier
-P5964	property_type	external-identifier
-P5965	property_type	external-identifier
-P5966	property_type	external-identifier
-P5967	property_type	item
-P5968	property_type	external-identifier
-P5969	property_type	external-identifier
-P5970	property_type	item
-P5971	property_type	external-identifier
-P5972	property_type	string
-P5973	property_type	string
-P5974	property_type	string
-P5975	property_type	string
-P5976	property_type	string
-P5977	property_type	string
-P5978	property_type	string
-P5979	property_type	string
-P5980	property_type	string
-P5981	property_type	external-identifier
-P5982	property_type	quantity
-P5983	property_type	external-identifier
-P5984	property_type	external-identifier
-P5985	property_type	external-identifier
-P5986	property_type	external-identifier
-P5987	property_type	external-identifier
-P5988	property_type	external-identifier
-P5989	property_type	external-identifier
-P5990	property_type	external-identifier
-P5991	property_type	quantity
-P5992	property_type	quantity
-P5993	property_type	quantity
-P5994	property_type	string
-P5995	property_type	item
-P5996	property_type	item
-P5997	property_type	string
-P5998	property_type	item
-P5999	property_type	external-identifier
-P6000	property_type	quantity
-P6001	property_type	item
-P6002	property_type	external-identifier
-P6003	property_type	external-identifier
-P6004	property_type	external-identifier
-P6005	property_type	external-identifier
-P6006	property_type	external-identifier
-P6007	property_type	external-identifier
-P6008	property_type	external-identifier
-P6009	property_type	external-identifier
-P6010	property_type	external-identifier
-P6011	property_type	external-identifier
-P6012	property_type	external-identifier
-P6013	property_type	external-identifier
-P6014	property_type	quantity
-P6015	property_type	external-identifier
-P6016	property_type	external-identifier
-P6017	property_type	external-identifier
-P1795	property_type	external-identifier
-P1796	property_type	string
-P1798	property_type	external-identifier
-P1799	property_type	external-identifier
-P1800	property_type	external-identifier
-P1801	property_type	string
-P1802	property_type	external-identifier
-P1803	property_type	external-identifier
-P1804	property_type	external-identifier
-P1806	property_type	external-identifier
-P1807	property_type	external-identifier
-P1808	property_type	external-identifier
-P1809	property_type	item
-P1810	property_type	string
-P1811	property_type	item
-P1813	property_type	monolingualtext
-P1814	property_type	string
-P1815	property_type	string
-P1816	property_type	external-identifier
-P1817	property_type	item
-P1818	property_type	external-identifier
-P1819	property_type	external-identifier
-P1820	property_type	string
-P1821	property_type	external-identifier
-P1822	property_type	external-identifier
-P1823	property_type	external-identifier
-P1824	property_type	string
-P1825	property_type	external-identifier
-P1826	property_type	external-identifier
-P1827	property_type	external-identifier
-P1828	property_type	external-identifier
-P1829	property_type	external-identifier
-P1830	property_type	item
-P1831	property_type	quantity
-P1832	property_type	external-identifier
-P1833	property_type	quantity
-P1836	property_type	string
-P1837	property_type	external-identifier
-P1838	property_type	external-identifier
-P1839	property_type	external-identifier
-P1840	property_type	item
-P1841	property_type	external-identifier
-P1842	property_type	external-identifier
-P1843	property_type	monolingualtext
-P1844	property_type	external-identifier
-P1845	property_type	string
-P1846	property_type	string
-P1847	property_type	external-identifier
-P1848	property_type	external-identifier
-P1849	property_type	external-identifier
-P1850	property_type	external-identifier
-P1851	property_type	item
-P1852	property_type	external-identifier
-P1853	property_type	item
-P1854	property_type	external-identifier
-P1855	property_type	item
-P1866	property_type	external-identifier
-P1867	property_type	quantity
-P1868	property_type	quantity
-P1869	property_type	external-identifier
-P1870	property_type	external-identifier
-P1871	property_type	external-identifier
-P1872	property_type	quantity
-P1873	property_type	quantity
-P1874	property_type	external-identifier
-P1875	property_type	item
-P1876	property_type	item
-P1877	property_type	item
-P1878	property_type	item
-P1879	property_type	item
-P1880	property_type	item
-P1881	property_type	item
-P1882	property_type	external-identifier
-P1883	property_type	external-identifier
-P1884	property_type	item
-P1885	property_type	item
-P1886	property_type	external-identifier
-P1887	property_type	item
-P1888	property_type	external-identifier
-P1889	property_type	item
-P1890	property_type	external-identifier
-P1891	property_type	item
-P1893	property_type	external-identifier
-P1894	property_type	external-identifier
-P1895	property_type	external-identifier
-P1896	property_type	url
-P1897	property_type	item
-P1898	property_type	item
-P1899	property_type	external-identifier
-P1900	property_type	external-identifier
-P1901	property_type	external-identifier
-P1902	property_type	external-identifier
-P1903	property_type	item
-P1906	property_type	item
-P1907	property_type	external-identifier
-P1908	property_type	external-identifier
-P1909	property_type	item
-P1910	property_type	item
-P1911	property_type	item
-P1912	property_type	item
-P7587	property_type	external-identifier
-P7588	property_type	time
-P7589	property_type	time
-P7590	property_type	external-identifier
-P7591	property_type	external-identifier
-P7592	property_type	external-identifier
-P7593	property_type	external-identifier
-P7594	property_type	external-identifier
-P7595	property_type	external-identifier
-P7596	property_type	external-identifier
-P7597	property_type	external-identifier
-P7598	property_type	string
-P7599	property_type	string
-P7600	property_type	item
-P7601	property_type	item
-P7602	property_type	external-identifier
-P7603	property_type	item
-P7604	property_type	item
-P7605	property_type	string
-P7606	property_type	external-identifier
-P7607	property_type	external-identifier
-P7608	property_type	external-identifier
-P7609	property_type	external-identifier
-P7610	property_type	external-identifier
-P7611	property_type	external-identifier
-P7612	property_type	external-identifier
-P7613	property_type	external-identifier
-P7614	property_type	external-identifier
-P7615	property_type	external-identifier
-P7616	property_type	external-identifier
-P7617	property_type	external-identifier
-P7618	property_type	external-identifier
-P7619	property_type	external-identifier
-P7620	property_type	external-identifier
-P7621	property_type	external-identifier
-P7622	property_type	external-identifier
-P7623	property_type	external-identifier
-P7624	property_type	external-identifier
-P7625	property_type	external-identifier
-P7626	property_type	external-identifier
-P7627	property_type	external-identifier
-P7630	property_type	external-identifier
-P7631	property_type	external-identifier
-P7632	property_type	external-identifier
-P7633	property_type	external-identifier
-P7634	property_type	external-identifier
-P7635	property_type	external-identifier
-P7636	property_type	external-identifier
-P7637	property_type	external-identifier
-P7638	property_type	external-identifier
-P7639	property_type	external-identifier
-P7641	property_type	external-identifier
-P7642	property_type	external-identifier
-P7643	property_type	item
-P7644	property_type	external-identifier
-P7645	property_type	external-identifier
-P7646	property_type	external-identifier
-P7647	property_type	external-identifier
-P7648	property_type	external-identifier
-P7649	property_type	external-identifier
-P7650	property_type	external-identifier
-P7651	property_type	external-identifier
-P7652	property_type	external-identifier
-P7653	property_type	external-identifier
-P7654	property_type	external-identifier
-P7655	property_type	external-identifier
-P7656	property_type	external-identifier
-P7657	property_type	external-identifier
-P7658	property_type	external-identifier
-P7659	property_type	external-identifier
-P7660	property_type	external-identifier
-P7661	property_type	external-identifier
-P7662	property_type	external-identifier
-P7663	property_type	external-identifier
-P7665	property_type	external-identifier
-P7666	property_type	external-identifier
-P7667	property_type	external-identifier
-P7668	property_type	quantity
-P7669	property_type	external-identifier
-P7670	property_type	external-identifier
-P7671	property_type	external-identifier
-P7672	property_type	external-identifier
-P7673	property_type	external-identifier
-P7674	property_type	external-identifier
-P7675	property_type	external-identifier
-P7676	property_type	external-identifier
-P7677	property_type	external-identifier
-P7678	property_type	external-identifier
-P7679	property_type	external-identifier
-P7680	property_type	external-identifier
-P7681	property_type	external-identifier
-P7682	property_type	external-identifier
-P7683	property_type	external-identifier
-P7684	property_type	external-identifier
-P7685	property_type	external-identifier
-P7686	property_type	external-identifier
-P7687	property_type	external-identifier
-P7688	property_type	external-identifier
-P7689	property_type	external-identifier
-P7690	property_type	external-identifier
-P6542	property_type	external-identifier
-P6543	property_type	quantity
-P6544	property_type	quantity
-P6545	property_type	quantity
-P6546	property_type	quantity
-P6547	property_type	quantity
-P6548	property_type	external-identifier
-P6549	property_type	external-identifier
-P6550	property_type	external-identifier
-P6551	property_type	external-identifier
-P6552	property_type	external-identifier
-P6553	property_type	string
-P6554	property_type	external-identifier
-P6555	property_type	external-identifier
-P6556	property_type	external-identifier
-P6557	property_type	external-identifier
-P6558	property_type	external-identifier
-P6559	property_type	external-identifier
-P6560	property_type	external-identifier
-P6561	property_type	external-identifier
-P6562	property_type	external-identifier
-P6563	property_type	item
-P6564	property_type	external-identifier
-P6565	property_type	external-identifier
-P6566	property_type	external-identifier
-P6567	property_type	external-identifier
-P6568	property_type	item
-P6569	property_type	item
-P6570	property_type	quantity
-P6571	property_type	string
-P6572	property_type	external-identifier
-P6573	property_type	external-identifier
-P6574	property_type	external-identifier
-P6575	property_type	external-identifier
-P6576	property_type	external-identifier
-P6577	property_type	external-identifier
-P6578	property_type	external-identifier
-P6579	property_type	external-identifier
-P6580	property_type	external-identifier
-P6581	property_type	external-identifier
-P6582	property_type	external-identifier
-P6583	property_type	external-identifier
-P6584	property_type	external-identifier
-P6585	property_type	external-identifier
-P6586	property_type	string
-P6587	property_type	item
-P6589	property_type	quantity
-P6590	property_type	quantity
-P6591	property_type	quantity
-P6592	property_type	string
-P6593	property_type	string
-P6594	property_type	external-identifier
-P6595	property_type	external-identifier
-P6596	property_type	external-identifier
-P6597	property_type	external-identifier
-P6598	property_type	external-identifier
-P6599	property_type	external-identifier
-P6600	property_type	external-identifier
-P6601	property_type	external-identifier
-P6602	property_type	external-identifier
-P6603	property_type	external-identifier
-P6604	property_type	string
-P6605	property_type	external-identifier
-P6606	property_type	item
-P6607	property_type	monolingualtext
-P6608	property_type	item
-P6609	property_type	string
-P6610	property_type	external-identifier
-P6611	property_type	external-identifier
-P6612	property_type	external-identifier
-P6613	property_type	external-identifier
-P6614	property_type	external-identifier
-P6615	property_type	external-identifier
-P6616	property_type	external-identifier
-P6617	property_type	external-identifier
-P6618	property_type	external-identifier
-P6619	property_type	external-identifier
-P6620	property_type	external-identifier
-P6621	property_type	external-identifier
-P6622	property_type	external-identifier
-P6623	property_type	external-identifier
-P6624	property_type	external-identifier
-P6625	property_type	external-identifier
-P6626	property_type	external-identifier
-P6627	property_type	external-identifier
-P6628	property_type	external-identifier
-P6629	property_type	external-identifier
-P6630	property_type	external-identifier
-P6631	property_type	external-identifier
-P6632	property_type	external-identifier
-P6633	property_type	external-identifier
-P6634	property_type	external-identifier
-P6635	property_type	external-identifier
-P6636	property_type	external-identifier
-P6637	property_type	external-identifier
-P6639	property_type	quantity
-P6640	property_type	external-identifier
-P6641	property_type	external-identifier
-P6643	property_type	external-identifier
-P6644	property_type	external-identifier
-P7792	property_type	external-identifier
-P7793	property_type	string
-P7794	property_type	external-identifier
-P7795	property_type	external-identifier
-P7796	property_type	external-identifier
-P7797	property_type	external-identifier
-P7798	property_type	external-identifier
-P7799	property_type	external-identifier
-P7800	property_type	external-identifier
-P7801	property_type	external-identifier
-P7802	property_type	external-identifier
-P7803	property_type	external-identifier
-P7804	property_type	external-identifier
-P7805	property_type	external-identifier
-P7806	property_type	external-identifier
-P7807	property_type	external-identifier
-P7808	property_type	external-identifier
-P7809	property_type	external-identifier
-P7810	property_type	external-identifier
-P7811	property_type	external-identifier
-P7812	property_type	external-identifier
-P7813	property_type	external-identifier
-P7814	property_type	external-identifier
-P7815	property_type	external-identifier
-P7816	property_type	external-identifier
-P7817	property_type	external-identifier
-P7818	property_type	external-identifier
-P7819	property_type	external-identifier
-P7820	property_type	external-identifier
-P7821	property_type	external-identifier
-P7822	property_type	external-identifier
-P7823	property_type	external-identifier
-P7824	property_type	external-identifier
-P7825	property_type	external-identifier
-P7826	property_type	external-identifier
-P7827	property_type	external-identifier
-P7828	property_type	external-identifier
-P7829	property_type	external-identifier
-P7830	property_type	external-identifier
-P7831	property_type	external-identifier
-P7832	property_type	external-identifier
-P7834	property_type	external-identifier
-P7835	property_type	external-identifier
-P7836	property_type	external-identifier
-P7837	property_type	external-identifier
-P7838	property_type	external-identifier
-P7839	property_type	external-identifier
-P7840	property_type	external-identifier
-P7841	property_type	external-identifier
-P7842	property_type	external-identifier
-P7843	property_type	external-identifier
-P7844	property_type	external-identifier
-P7845	property_type	external-identifier
-P7846	property_type	external-identifier
-P7847	property_type	external-identifier
-P7848	property_type	external-identifier
-P7849	property_type	external-identifier
-P7850	property_type	external-identifier
-P7851	property_type	external-identifier
-P7852	property_type	external-identifier
-P7853	property_type	external-identifier
-P7854	property_type	external-identifier
-P7855	property_type	string
-P7856	property_type	external-identifier
-P7857	property_type	external-identifier
-P7858	property_type	external-identifier
-P7859	property_type	external-identifier
-P7860	property_type	external-identifier
-P7861	property_type	item
-P7862	property_type	quantity
-P7863	property_type	quantity
-P7864	property_type	external-identifier
-P7865	property_type	external-identifier
-P7866	property_type	external-identifier
-P7867	property_type	item
-P7868	property_type	external-identifier
-P7869	property_type	external-identifier
-P7870	property_type	external-identifier
-P7871	property_type	external-identifier
-P7872	property_type	external-identifier
-P7873	property_type	external-identifier
-P7874	property_type	external-identifier
-P7875	property_type	external-identifier
-P7876	property_type	external-identifier
-P7877	property_type	external-identifier
-P7878	property_type	external-identifier
-P7879	property_type	external-identifier
-P7880	property_type	external-identifier
-P7881	property_type	external-identifier
-P7882	property_type	external-identifier
-P7883	property_type	external-identifier
-P7884	property_type	external-identifier
-P7885	property_type	external-identifier
-P7886	property_type	external-identifier
-P7887	property_type	quantity
-P7888	property_type	item
-P7889	property_type	external-identifier
-P7890	property_type	url
-P7891	property_type	external-identifier
-P7892	property_type	external-identifier
-P6018	property_type	external-identifier
-P6019	property_type	external-identifier
-P6020	property_type	external-identifier
-P6021	property_type	external-identifier
-P6022	property_type	item
-P6023	property_type	external-identifier
-P6024	property_type	external-identifier
-P6025	property_type	external-identifier
-P6028	property_type	external-identifier
-P6030	property_type	external-identifier
-P6032	property_type	external-identifier
-P6033	property_type	external-identifier
-P6034	property_type	external-identifier
-P6035	property_type	external-identifier
-P6036	property_type	external-identifier
-P6037	property_type	external-identifier
-P6038	property_type	external-identifier
-P6039	property_type	external-identifier
-P6040	property_type	external-identifier
-P6041	property_type	external-identifier
-P6042	property_type	external-identifier
-P6043	property_type	external-identifier
-P6044	property_type	external-identifier
-P6045	property_type	external-identifier
-P6046	property_type	external-identifier
-P6047	property_type	external-identifier
-P6048	property_type	external-identifier
-P6049	property_type	external-identifier
-P6050	property_type	external-identifier
-P6051	property_type	external-identifier
-P6052	property_type	external-identifier
-P6053	property_type	external-identifier
-P6054	property_type	external-identifier
-P6055	property_type	external-identifier
-P6056	property_type	external-identifier
-P6057	property_type	external-identifier
-P6058	property_type	external-identifier
-P6059	property_type	external-identifier
-P6060	property_type	external-identifier
-P6061	property_type	external-identifier
-P6062	property_type	external-identifier
-P6063	property_type	external-identifier
-P6064	property_type	external-identifier
-P6065	property_type	external-identifier
-P6066	property_type	external-identifier
-P6067	property_type	external-identifier
-P6068	property_type	external-identifier
-P6069	property_type	quantity
-P6070	property_type	external-identifier
-P6071	property_type	external-identifier
-P6072	property_type	string
-P6073	property_type	quantity
-P6075	property_type	quantity
-P6076	property_type	quantity
-P6077	property_type	external-identifier
-P6078	property_type	external-identifier
-P6079	property_type	external-identifier
-P6080	property_type	external-identifier
-P6081	property_type	external-identifier
-P6082	property_type	external-identifier
-P6083	property_type	external-identifier
-P6084	property_type	item
-P6086	property_type	item
-P6087	property_type	item
-P6088	property_type	quantity
-P6089	property_type	quantity
-P6090	property_type	external-identifier
-P6091	property_type	external-identifier
-P6092	property_type	external-identifier
-P6093	property_type	external-identifier
-P6094	property_type	external-identifier
-P6095	property_type	item
-P6096	property_type	external-identifier
-P6097	property_type	external-identifier
-P6098	property_type	external-identifier
-P6099	property_type	item
-P6100	property_type	external-identifier
-P6101	property_type	external-identifier
-P6102	property_type	external-identifier
-P6103	property_type	external-identifier
-P6104	property_type	item
-P6105	property_type	external-identifier
-P6106	property_type	item
-P6107	property_type	url
-P6108	property_type	url
-P6109	property_type	external-identifier
-P6110	property_type	external-identifier
-P6111	property_type	external-identifier
-P6112	property_type	item
-P6113	property_type	external-identifier
-P6114	property_type	external-identifier
-P6115	property_type	external-identifier
-P6116	property_type	item
-P6117	property_type	external-identifier
-P6118	property_type	item
-P6119	property_type	external-identifier
-P6120	property_type	external-identifier
-P6122	property_type	external-identifier
-P6123	property_type	external-identifier
-P6124	property_type	external-identifier
-P7691	property_type	external-identifier
-P7692	property_type	external-identifier
-P7693	property_type	external-identifier
-P7694	property_type	external-identifier
-P7695	property_type	external-identifier
-P7696	property_type	external-identifier
-P7697	property_type	external-identifier
-P7698	property_type	external-identifier
-P7699	property_type	external-identifier
-P7700	property_type	external-identifier
-P7701	property_type	external-identifier
-P7702	property_type	external-identifier
-P7703	property_type	external-identifier
-P7704	property_type	external-identifier
-P7705	property_type	url
-P7706	property_type	string
-P7707	property_type	url
-P7708	property_type	external-identifier
-P7709	property_type	external-identifier
-P7710	property_type	external-identifier
-P7711	property_type	external-identifier
-P7712	property_type	external-identifier
-P7713	property_type	external-identifier
-P7714	property_type	external-identifier
-P7715	property_type	external-identifier
-P7716	property_type	external-identifier
-P7717	property_type	external-identifier
-P7718	property_type	external-identifier
-P7719	property_type	item
-P7720	property_type	external-identifier
-P7721	property_type	external-identifier
-P7722	property_type	external-identifier
-P7723	property_type	external-identifier
-P7724	property_type	external-identifier
-P7725	property_type	quantity
-P7726	property_type	external-identifier
-P7727	property_type	item
-P7729	property_type	external-identifier
-P7730	property_type	external-identifier
-P7731	property_type	external-identifier
-P7732	property_type	external-identifier
-P7733	property_type	external-identifier
-P7734	property_type	external-identifier
-P7735	property_type	external-identifier
-P7736	property_type	external-identifier
-P7737	property_type	external-identifier
-P7738	property_type	external-identifier
-P7739	property_type	external-identifier
-P7740	property_type	external-identifier
-P7741	property_type	external-identifier
-P7742	property_type	external-identifier
-P7743	property_type	external-identifier
-P7744	property_type	external-identifier
-P7745	property_type	external-identifier
-P7746	property_type	external-identifier
-P7747	property_type	external-identifier
-P7748	property_type	external-identifier
-P7749	property_type	external-identifier
-P7750	property_type	external-identifier
-P7751	property_type	external-identifier
-P7752	property_type	external-identifier
-P7753	property_type	external-identifier
-P7754	property_type	external-identifier
-P7755	property_type	external-identifier
-P7756	property_type	external-identifier
-P7757	property_type	external-identifier
-P7758	property_type	external-identifier
-P7759	property_type	external-identifier
-P7760	property_type	external-identifier
-P7761	property_type	external-identifier
-P7762	property_type	external-identifier
-P7763	property_type	item
-P7764	property_type	external-identifier
-P7765	property_type	external-identifier
-P7766	property_type	external-identifier
-P7767	property_type	item
-P7768	property_type	external-identifier
-P7769	property_type	external-identifier
-P7770	property_type	quantity
-P7771	property_type	external-identifier
-P7772	property_type	external-identifier
-P7773	property_type	external-identifier
-P7774	property_type	external-identifier
-P7775	property_type	external-identifier
-P7776	property_type	external-identifier
-P7777	property_type	external-identifier
-P7778	property_type	external-identifier
-P7779	property_type	item
-P7780	property_type	external-identifier
-P7781	property_type	item
-P7782	property_type	item
-P7783	property_type	external-identifier
-P7784	property_type	external-identifier
-P7785	property_type	external-identifier
-P7786	property_type	external-identifier
-P7787	property_type	quantity
-P7788	property_type	external-identifier
-P7789	property_type	external-identifier
-P7790	property_type	external-identifier
-P7791	property_type	external-identifier
-P7893	property_type	external-identifier
-P7894	property_type	external-identifier
-P7895	property_type	external-identifier
-P7896	property_type	external-identifier
-P7897	property_type	external-identifier
-P7898	property_type	external-identifier
-P7899	property_type	external-identifier
-P7900	property_type	external-identifier
-P7901	property_type	external-identifier
-P7902	property_type	external-identifier
-P7903	property_type	item
-P7904	property_type	item
-P7905	property_type	external-identifier
-P7906	property_type	external-identifier
-P7907	property_type	external-identifier
-P7908	property_type	external-identifier
-P7909	property_type	external-identifier
-P7910	property_type	external-identifier
-P7911	property_type	external-identifier
-P7912	property_type	external-identifier
-P7913	property_type	external-identifier
-P7914	property_type	external-identifier
-P7915	property_type	external-identifier
-P7916	property_type	external-identifier
-P7917	property_type	external-identifier
-P7918	property_type	external-identifier
-P7919	property_type	external-identifier
-P7920	property_type	external-identifier
-P7921	property_type	external-identifier
-P7922	property_type	external-identifier
-P7923	property_type	external-identifier
-P7924	property_type	external-identifier
-P7925	property_type	external-identifier
-P7926	property_type	external-identifier
-P7927	property_type	external-identifier
-P7928	property_type	external-identifier
-P7929	property_type	external-identifier
-P7930	property_type	url
-P7931	property_type	external-identifier
-P7932	property_type	external-identifier
-P7934	property_type	external-identifier
-P7935	property_type	external-identifier
-P7936	property_type	item
-P7937	property_type	item
-P7938	property_type	item
-P7939	property_type	external-identifier
-P7940	property_type	external-identifier
-P7941	property_type	external-identifier
-P7942	property_type	external-identifier
-P7943	property_type	external-identifier
-P7944	property_type	external-identifier
-P7945	property_type	external-identifier
-P7946	property_type	external-identifier
-P7947	property_type	external-identifier
-P7948	property_type	external-identifier
-P7949	property_type	external-identifier
-P7950	property_type	external-identifier
-P7951	property_type	external-identifier
-P7952	property_type	external-identifier
-P7953	property_type	external-identifier
-P7954	property_type	external-identifier
-P7955	property_type	external-identifier
-P7956	property_type	external-identifier
-P7957	property_type	external-identifier
-P7958	property_type	external-identifier
-P7959	property_type	item
-P7960	property_type	external-identifier
-P7961	property_type	external-identifier
-P7962	property_type	external-identifier
-P7963	property_type	external-identifier
-P7964	property_type	string
-P7965	property_type	external-identifier
-P7966	property_type	external-identifier
-P7967	property_type	external-identifier
-P7968	property_type	external-identifier
-P7969	property_type	external-identifier
-P7970	property_type	external-identifier
-P7971	property_type	quantity
-P7972	property_type	external-identifier
-P7973	property_type	string
-P7974	property_type	external-identifier
-P7975	property_type	external-identifier
-P7976	property_type	external-identifier
-P7977	property_type	external-identifier
-P7978	property_type	external-identifier
-P7979	property_type	external-identifier
-P7980	property_type	external-identifier
-P7981	property_type	external-identifier
-P7982	property_type	external-identifier
-P7983	property_type	external-identifier
-P7984	property_type	item
-P7985	property_type	external-identifier
-P7986	property_type	external-identifier
-P7987	property_type	external-identifier
-P7988	property_type	external-identifier
-P7989	property_type	external-identifier
-P7990	property_type	external-identifier
-P7991	property_type	external-identifier
-P7992	property_type	external-identifier
-P7993	property_type	external-identifier
-P6125	property_type	quantity
-P6126	property_type	external-identifier
-P6127	property_type	external-identifier
-P6128	property_type	external-identifier
-P6130	property_type	external-identifier
-P6131	property_type	external-identifier
-P6132	property_type	external-identifier
-P6133	property_type	external-identifier
-P6134	property_type	external-identifier
-P6135	property_type	external-identifier
-P6136	property_type	external-identifier
-P6137	property_type	external-identifier
-P6138	property_type	external-identifier
-P6139	property_type	external-identifier
-P6140	property_type	external-identifier
-P6141	property_type	external-identifier
-P6142	property_type	external-identifier
-P6143	property_type	external-identifier
-P6144	property_type	external-identifier
-P6145	property_type	external-identifier
-P6146	property_type	external-identifier
-P6147	property_type	external-identifier
-P6148	property_type	external-identifier
-P6149	property_type	item
-P6150	property_type	external-identifier
-P6151	property_type	external-identifier
-P6152	property_type	external-identifier
-P6153	property_type	item
-P6154	property_type	external-identifier
-P6155	property_type	external-identifier
-P6156	property_type	external-identifier
-P6157	property_type	external-identifier
-P6158	property_type	external-identifier
-P6159	property_type	external-identifier
-P6160	property_type	external-identifier
-P6161	property_type	external-identifier
-P6162	property_type	external-identifier
-P6163	property_type	external-identifier
-P6164	property_type	external-identifier
-P6165	property_type	external-identifier
-P6166	property_type	item
-P6167	property_type	external-identifier
-P6168	property_type	external-identifier
-P6169	property_type	external-identifier
-P6170	property_type	external-identifier
-P6171	property_type	external-identifier
-P6172	property_type	external-identifier
-P6173	property_type	external-identifier
-P6174	property_type	external-identifier
-P6175	property_type	external-identifier
-P6176	property_type	external-identifier
-P6177	property_type	external-identifier
-P6178	property_type	external-identifier
-P6179	property_type	external-identifier
-P6180	property_type	external-identifier
-P6181	property_type	external-identifier
-P6182	property_type	external-identifier
-P6183	property_type	external-identifier
-P6184	property_type	item
-P6185	property_type	item
-P6186	property_type	item
-P6187	property_type	external-identifier
-P6188	property_type	external-identifier
-P6189	property_type	external-identifier
-P6190	property_type	external-identifier
-P6191	property_type	item
-P6192	property_type	external-identifier
-P6193	property_type	item
-P6194	property_type	external-identifier
-P6195	property_type	item
-P6196	property_type	external-identifier
-P6197	property_type	external-identifier
-P6198	property_type	external-identifier
-P6199	property_type	external-identifier
-P6200	property_type	external-identifier
-P6201	property_type	external-identifier
-P6202	property_type	external-identifier
-P6204	property_type	external-identifier
-P6205	property_type	external-identifier
-P6206	property_type	external-identifier
-P6208	property_type	monolingualtext
-P6209	property_type	external-identifier
-P6210	property_type	external-identifier
-P6211	property_type	external-identifier
-P6212	property_type	item
-P6213	property_type	external-identifier
-P6214	property_type	external-identifier
-P6215	property_type	external-identifier
-P6216	property_type	item
-P6217	property_type	external-identifier
-P6218	property_type	external-identifier
-P6219	property_type	external-identifier
-P6220	property_type	external-identifier
-P6221	property_type	external-identifier
-P6222	property_type	external-identifier
-P6223	property_type	external-identifier
-P6224	property_type	item
-P6225	property_type	external-identifier
-P6226	property_type	external-identifier
-P6227	property_type	external-identifier
-P6645	property_type	external-identifier
-P6646	property_type	external-identifier
-P6647	property_type	external-identifier
-P6648	property_type	string
-P6649	property_type	external-identifier
-P6650	property_type	external-identifier
-P6652	property_type	external-identifier
-P6653	property_type	external-identifier
-P6654	property_type	external-identifier
-P6655	property_type	string
-P6656	property_type	external-identifier
-P6657	property_type	item
-P6658	property_type	item
-P6659	property_type	external-identifier
-P6660	property_type	external-identifier
-P6661	property_type	external-identifier
-P6662	property_type	external-identifier
-P6663	property_type	external-identifier
-P6664	property_type	external-identifier
-P6665	property_type	external-identifier
-P6666	property_type	external-identifier
-P6667	property_type	external-identifier
-P6668	property_type	external-identifier
-P6669	property_type	external-identifier
-P6670	property_type	string
-P6671	property_type	external-identifier
-P6672	property_type	external-identifier
-P6673	property_type	external-identifier
-P6674	property_type	external-identifier
-P6676	property_type	external-identifier
-P6677	property_type	external-identifier
-P6678	property_type	external-identifier
-P6679	property_type	external-identifier
-P6680	property_type	external-identifier
-P6681	property_type	external-identifier
-P6682	property_type	external-identifier
-P6683	property_type	external-identifier
-P6684	property_type	item
-P6685	property_type	string
-P6686	property_type	string
-P6687	property_type	external-identifier
-P6688	property_type	external-identifier
-P6689	property_type	external-identifier
-P6690	property_type	external-identifier
-P6691	property_type	external-identifier
-P6692	property_type	external-identifier
-P6693	property_type	external-identifier
-P6694	property_type	external-identifier
-P6695	property_type	quantity
-P6696	property_type	external-identifier
-P6697	property_type	quantity
-P6698	property_type	external-identifier
-P6699	property_type	external-identifier
-P6700	property_type	external-identifier
-P6701	property_type	external-identifier
-P6702	property_type	external-identifier
-P6703	property_type	external-identifier
-P6704	property_type	external-identifier
-P6705	property_type	external-identifier
-P6706	property_type	external-identifier
-P6707	property_type	quantity
-P6708	property_type	quantity
-P6709	property_type	string
-P6710	property_type	quantity
-P6711	property_type	quantity
-P6712	property_type	string
-P6713	property_type	external-identifier
-P6714	property_type	external-identifier
-P6715	property_type	external-identifier
-P6716	property_type	external-identifier
-P6717	property_type	external-identifier
-P6718	property_type	item
-P6719	property_type	string
-P6720	property_type	external-identifier
-P6721	property_type	external-identifier
-P6722	property_type	external-identifier
-P6723	property_type	external-identifier
-P6724	property_type	external-identifier
-P6725	property_type	external-identifier
-P6726	property_type	external-identifier
-P6727	property_type	external-identifier
-P6728	property_type	external-identifier
-P6729	property_type	external-identifier
-P6730	property_type	external-identifier
-P6731	property_type	item
-P6732	property_type	external-identifier
-P6733	property_type	string
-P6734	property_type	external-identifier
-P6735	property_type	external-identifier
-P6736	property_type	external-identifier
-P6737	property_type	external-identifier
-P6738	property_type	external-identifier
-P6739	property_type	external-identifier
-P6740	property_type	external-identifier
-P6741	property_type	external-identifier
-P6742	property_type	external-identifier
-P6743	property_type	external-identifier
-P6744	property_type	external-identifier
-P6745	property_type	external-identifier
-P6746	property_type	external-identifier
-P7994	property_type	external-identifier
-P7995	property_type	external-identifier
-P7996	property_type	external-identifier
-P7997	property_type	external-identifier
-P7998	property_type	external-identifier
-P7999	property_type	external-identifier
-P8000	property_type	string
-P8001	property_type	item
-P8002	property_type	external-identifier
-P8003	property_type	external-identifier
-P8004	property_type	item
-P8005	property_type	item
-P8006	property_type	item
-P8007	property_type	external-identifier
-P8008	property_type	external-identifier
-P8009	property_type	string
-P8010	property_type	quantity
-P8011	property_type	quantity
-P8012	property_type	external-identifier
-P8013	property_type	external-identifier
-P8014	property_type	external-identifier
-P8015	property_type	external-identifier
-P8016	property_type	external-identifier
-P8017	property_type	string
-P8018	property_type	external-identifier
-P8019	property_type	external-identifier
-P8020	property_type	external-identifier
-P8021	property_type	external-identifier
-P8022	property_type	external-identifier
-P8023	property_type	external-identifier
-P8024	property_type	external-identifier
-P8025	property_type	external-identifier
-P8026	property_type	item
-P8027	property_type	external-identifier
-P8028	property_type	external-identifier
-P8029	property_type	external-identifier
-P8030	property_type	item
-P8031	property_type	item
-P8032	property_type	item
-P8033	property_type	external-identifier
-P8034	property_type	external-identifier
-P8035	property_type	external-identifier
-P8036	property_type	external-identifier
-P8037	property_type	external-identifier
-P8038	property_type	external-identifier
-P8039	property_type	external-identifier
-P8040	property_type	external-identifier
-P8041	property_type	external-identifier
-P8042	property_type	external-identifier
-P8043	property_type	external-identifier
-P8044	property_type	external-identifier
-P8045	property_type	item
-P8046	property_type	string
-P8047	property_type	item
-P8048	property_type	external-identifier
-P8049	property_type	quantity
-P8050	property_type	external-identifier
-P8051	property_type	external-identifier
-P8052	property_type	external-identifier
-P8053	property_type	external-identifier
-P8054	property_type	string
-P8055	property_type	external-identifier
-P8056	property_type	external-identifier
-P8057	property_type	external-identifier
-P8058	property_type	item
-P8059	property_type	external-identifier
-P8060	property_type	external-identifier
-P8061	property_type	external-identifier
-P8062	property_type	external-identifier
-P8063	property_type	external-identifier
-P8064	property_type	external-identifier
-P8065	property_type	external-identifier
-P8066	property_type	external-identifier
-P8067	property_type	external-identifier
-P8068	property_type	external-identifier
-P8069	property_type	external-identifier
-P8070	property_type	external-identifier
-P8071	property_type	external-identifier
-P8072	property_type	external-identifier
-P8073	property_type	external-identifier
-P8074	property_type	external-identifier
-P8075	property_type	external-identifier
-P8076	property_type	external-identifier
-P8077	property_type	external-identifier
-P8078	property_type	external-identifier
-P8079	property_type	external-identifier
-P8080	property_type	external-identifier
-P8081	property_type	external-identifier
-P8082	property_type	external-identifier
-P8083	property_type	external-identifier
-P8084	property_type	external-identifier
-P8085	property_type	external-identifier
-P8086	property_type	external-identifier
-P8087	property_type	external-identifier
-P8088	property_type	external-identifier
-P8089	property_type	external-identifier
-P8090	property_type	external-identifier
-P8091	property_type	external-identifier
-P8092	property_type	external-identifier
-P8093	property_type	quantity
-P8094	property_type	external-identifier
-P8095	property_type	external-identifier
-P8096	property_type	external-identifier
-P8097	property_type	item
-P8098	property_type	external-identifier
-P8099	property_type	external-identifier
-P8100	property_type	external-identifier
-P8101	property_type	external-identifier
-P8102	property_type	external-identifier
-P8103	property_type	external-identifier
-P8104	property_type	external-identifier
-P8105	property_type	external-identifier
-P8106	property_type	external-identifier
-P8107	property_type	item
-P8108	property_type	external-identifier
-P8109	property_type	external-identifier
-P8110	property_type	external-identifier
-P8111	property_type	item
-P8112	property_type	string
-P8113	property_type	external-identifier
-P8114	property_type	external-identifier
-P8115	property_type	item
-P8116	property_type	external-identifier
-P8117	property_type	external-identifier
-P8118	property_type	external-identifier
-P8119	property_type	external-identifier
-P8120	property_type	external-identifier
-P8121	property_type	external-identifier
-P8122	property_type	external-identifier
-P8123	property_type	external-identifier
-P8124	property_type	external-identifier
-P8125	property_type	external-identifier
-P8126	property_type	external-identifier
-P8127	property_type	item
-P8128	property_type	external-identifier
-P8129	property_type	external-identifier
-P8130	property_type	external-identifier
-P8131	property_type	item
-P8132	property_type	external-identifier
-P6228	property_type	external-identifier
-P6229	property_type	external-identifier
-P6230	property_type	external-identifier
-P6231	property_type	external-identifier
-P6232	property_type	external-identifier
-P6233	property_type	external-identifier
-P6234	property_type	external-identifier
-P6235	property_type	external-identifier
-P6237	property_type	item
-P6238	property_type	external-identifier
-P6239	property_type	external-identifier
-P6240	property_type	external-identifier
-P6241	property_type	item
-P6242	property_type	external-identifier
-P6243	property_type	item
-P6244	property_type	external-identifier
-P6245	property_type	external-identifier
-P6246	property_type	external-identifier
-P6247	property_type	external-identifier
-P6248	property_type	external-identifier
-P6249	property_type	quantity
-P6250	property_type	external-identifier
-P6251	property_type	monolingualtext
-P6252	property_type	external-identifier
-P6253	property_type	external-identifier
-P6254	property_type	string
-P6255	property_type	external-identifier
-P6256	property_type	external-identifier
-P6257	property_type	quantity
-P6258	property_type	quantity
-P6259	property_type	item
-P6260	property_type	quantity
-P6261	property_type	quantity
-P6262	property_type	external-identifier
-P6263	property_type	external-identifier
-P6264	property_type	external-identifier
-P6265	property_type	external-identifier
-P6266	property_type	external-identifier
-P6267	property_type	external-identifier
-P6268	property_type	external-identifier
-P6269	property_type	url
-P6271	property_type	item
-P6272	property_type	quantity
-P6274	property_type	quantity
-P6275	property_type	item
-P6276	property_type	external-identifier
-P6277	property_type	external-identifier
-P6278	property_type	external-identifier
-P6279	property_type	external-identifier
-P6280	property_type	string
-P6281	property_type	external-identifier
-P6282	property_type	external-identifier
-P6283	property_type	external-identifier
-P6284	property_type	external-identifier
-P6285	property_type	external-identifier
-P6286	property_type	external-identifier
-P6287	property_type	external-identifier
-P6288	property_type	external-identifier
-P6289	property_type	external-identifier
-P6290	property_type	external-identifier
-P6291	property_type	item
-P6292	property_type	external-identifier
-P6293	property_type	external-identifier
-P6294	property_type	external-identifier
-P6295	property_type	external-identifier
-P6296	property_type	external-identifier
-P6297	property_type	external-identifier
-P6298	property_type	external-identifier
-P6299	property_type	external-identifier
-P6300	property_type	external-identifier
-P6301	property_type	external-identifier
-P6302	property_type	external-identifier
-P6303	property_type	external-identifier
-P6304	property_type	external-identifier
-P6305	property_type	external-identifier
-P6306	property_type	external-identifier
-P6307	property_type	external-identifier
-P6308	property_type	external-identifier
-P6309	property_type	external-identifier
-P6310	property_type	external-identifier
-P6311	property_type	external-identifier
-P6312	property_type	external-identifier
-P6313	property_type	external-identifier
-P6314	property_type	external-identifier
-P6315	property_type	external-identifier
-P6316	property_type	external-identifier
-P6317	property_type	external-identifier
-P6318	property_type	external-identifier
-P6319	property_type	external-identifier
-P6320	property_type	external-identifier
-P6321	property_type	external-identifier
-P6322	property_type	external-identifier
-P6323	property_type	external-identifier
-P6324	property_type	external-identifier
-P6325	property_type	external-identifier
-P6326	property_type	external-identifier
-P6327	property_type	external-identifier
-P6328	property_type	external-identifier
-P6329	property_type	external-identifier
-P6330	property_type	external-identifier
-P6747	property_type	external-identifier
-P6748	property_type	external-identifier
-P6749	property_type	external-identifier
-P6750	property_type	external-identifier
-P6751	property_type	external-identifier
-P6752	property_type	external-identifier
-P6753	property_type	quantity
-P6754	property_type	external-identifier
-P6756	property_type	external-identifier
-P6757	property_type	quantity
-P6758	property_type	item
-P6759	property_type	external-identifier
-P6760	property_type	external-identifier
-P6761	property_type	external-identifier
-P6762	property_type	external-identifier
-P6763	property_type	external-identifier
-P6764	property_type	external-identifier
-P6765	property_type	external-identifier
-P6766	property_type	external-identifier
-P6767	property_type	external-identifier
-P6768	property_type	external-identifier
-P6769	property_type	external-identifier
-P6770	property_type	external-identifier
-P6771	property_type	external-identifier
-P6772	property_type	external-identifier
-P6773	property_type	external-identifier
-P6774	property_type	external-identifier
-P6775	property_type	external-identifier
-P6776	property_type	external-identifier
-P6777	property_type	external-identifier
-P6778	property_type	external-identifier
-P6780	property_type	external-identifier
-P6781	property_type	external-identifier
-P6782	property_type	external-identifier
-P6783	property_type	external-identifier
-P6784	property_type	external-identifier
-P6785	property_type	external-identifier
-P6786	property_type	external-identifier
-P6787	property_type	external-identifier
-P6788	property_type	external-identifier
-P6789	property_type	quantity
-P6790	property_type	quantity
-P6791	property_type	external-identifier
-P6792	property_type	external-identifier
-P6793	property_type	string
-P6794	property_type	quantity
-P6795	property_type	external-identifier
-P6796	property_type	external-identifier
-P6797	property_type	external-identifier
-P6798	property_type	string
-P6799	property_type	external-identifier
-P6800	property_type	url
-P6801	property_type	quantity
-P6802	property_type	string
-P6803	property_type	item
-P6804	property_type	external-identifier
-P6805	property_type	external-identifier
-P6806	property_type	external-identifier
-P6807	property_type	external-identifier
-P6808	property_type	external-identifier
-P6809	property_type	external-identifier
-P6810	property_type	external-identifier
-P6811	property_type	external-identifier
-P6812	property_type	external-identifier
-P6813	property_type	external-identifier
-P6814	property_type	external-identifier
-P6815	property_type	external-identifier
-P6816	property_type	external-identifier
-P6817	property_type	external-identifier
-P6818	property_type	url
-P6819	property_type	item
-P6820	property_type	external-identifier
-P6821	property_type	external-identifier
-P6822	property_type	external-identifier
-P6823	property_type	external-identifier
-P6824	property_type	string
-P6825	property_type	external-identifier
-P6826	property_type	quantity
-P6827	property_type	external-identifier
-P6828	property_type	external-identifier
-P6829	property_type	external-identifier
-P6830	property_type	external-identifier
-P6831	property_type	external-identifier
-P6832	property_type	external-identifier
-P6833	property_type	monolingualtext
-P6835	property_type	string
-P6836	property_type	external-identifier
-P6837	property_type	external-identifier
-P6838	property_type	external-identifier
-P6839	property_type	external-identifier
-P6840	property_type	item
-P6841	property_type	external-identifier
-P6842	property_type	external-identifier
-P6843	property_type	external-identifier
-P6844	property_type	external-identifier
-P6845	property_type	external-identifier
-P6846	property_type	external-identifier
-P6847	property_type	external-identifier
-P6848	property_type	external-identifier
-P6849	property_type	external-identifier
-P6331	property_type	external-identifier
-P6332	property_type	external-identifier
-P6333	property_type	monolingualtext
-P6334	property_type	external-identifier
-P6335	property_type	external-identifier
-P6336	property_type	external-identifier
-P6337	property_type	external-identifier
-P6338	property_type	item
-P6339	property_type	item
-P6340	property_type	external-identifier
-P6341	property_type	external-identifier
-P6342	property_type	external-identifier
-P6343	property_type	quantity
-P6344	property_type	quantity
-P6346	property_type	monolingualtext
-P6347	property_type	external-identifier
-P6348	property_type	external-identifier
-P6349	property_type	external-identifier
-P6350	property_type	external-identifier
-P6351	property_type	external-identifier
-P6352	property_type	external-identifier
-P6353	property_type	external-identifier
-P6354	property_type	quantity
-P6355	property_type	external-identifier
-P6356	property_type	external-identifier
-P6357	property_type	external-identifier
-P6358	property_type	external-identifier
-P6359	property_type	external-identifier
-P6360	property_type	external-identifier
-P6361	property_type	external-identifier
-P6362	property_type	external-identifier
-P6363	property_type	url
-P6364	property_type	item
-P6365	property_type	item
-P6366	property_type	external-identifier
-P6367	property_type	external-identifier
-P6368	property_type	external-identifier
-P6369	property_type	external-identifier
-P6370	property_type	external-identifier
-P6371	property_type	external-identifier
-P6372	property_type	external-identifier
-P6373	property_type	external-identifier
-P6374	property_type	external-identifier
-P6375	property_type	monolingualtext
-P6376	property_type	external-identifier
-P6377	property_type	external-identifier
-P6378	property_type	url
-P6379	property_type	item
-P6381	property_type	external-identifier
-P6382	property_type	external-identifier
-P6383	property_type	external-identifier
-P6384	property_type	external-identifier
-P6385	property_type	external-identifier
-P6386	property_type	external-identifier
-P6387	property_type	external-identifier
-P6388	property_type	external-identifier
-P6389	property_type	external-identifier
-P6390	property_type	external-identifier
-P6391	property_type	external-identifier
-P6392	property_type	external-identifier
-P6394	property_type	external-identifier
-P6395	property_type	external-identifier
-P6398	property_type	external-identifier
-P6399	property_type	external-identifier
-P6400	property_type	external-identifier
-P6401	property_type	external-identifier
-P6402	property_type	external-identifier
-P6403	property_type	external-identifier
-P6404	property_type	external-identifier
-P6405	property_type	external-identifier
-P6406	property_type	external-identifier
-P6407	property_type	external-identifier
-P6408	property_type	external-identifier
-P6409	property_type	external-identifier
-P6410	property_type	external-identifier
-P6411	property_type	external-identifier
-P6412	property_type	external-identifier
-P6413	property_type	external-identifier
-P6414	property_type	external-identifier
-P6415	property_type	external-identifier
-P6416	property_type	external-identifier
-P6417	property_type	external-identifier
-P6418	property_type	external-identifier
-P6419	property_type	external-identifier
-P6420	property_type	external-identifier
-P6421	property_type	external-identifier
-P6422	property_type	external-identifier
-P6423	property_type	external-identifier
-P6424	property_type	string
-P6425	property_type	external-identifier
-P6426	property_type	item
-P6427	property_type	monolingualtext
-P6428	property_type	external-identifier
-P6429	property_type	external-identifier
-P6430	property_type	external-identifier
-P6431	property_type	external-identifier
-P6432	property_type	string
-P6433	property_type	external-identifier
-P6434	property_type	external-identifier
-P6436	property_type	external-identifier
-P6850	property_type	external-identifier
-P6851	property_type	external-identifier
-P6852	property_type	external-identifier
-P6853	property_type	external-identifier
-P6854	property_type	external-identifier
-P6855	property_type	item
-P6856	property_type	quantity
-P6857	property_type	external-identifier
-P6858	property_type	external-identifier
-P6859	property_type	external-identifier
-P6861	property_type	external-identifier
-P6862	property_type	external-identifier
-P6863	property_type	external-identifier
-P6864	property_type	external-identifier
-P6865	property_type	external-identifier
-P6866	property_type	external-identifier
-P6867	property_type	external-identifier
-P6868	property_type	external-identifier
-P6869	property_type	external-identifier
-P6870	property_type	external-identifier
-P6871	property_type	external-identifier
-P6872	property_type	item
-P6873	property_type	external-identifier
-P6874	property_type	external-identifier
-P6875	property_type	item
-P6876	property_type	quantity
-P6877	property_type	external-identifier
-P6878	property_type	external-identifier
-P6879	property_type	quantity
-P6880	property_type	external-identifier
-P6881	property_type	external-identifier
-P6882	property_type	external-identifier
-P6883	property_type	string
-P6884	property_type	item
-P6885	property_type	item
-P6886	property_type	item
-P6887	property_type	item
-P6888	property_type	external-identifier
-P6889	property_type	item
-P6890	property_type	external-identifier
-P6891	property_type	external-identifier
-P6892	property_type	external-identifier
-P6893	property_type	external-identifier
-P6894	property_type	external-identifier
-P6895	property_type	external-identifier
-P6896	property_type	external-identifier
-P6897	property_type	quantity
-P6898	property_type	external-identifier
-P6899	property_type	external-identifier
-P6900	property_type	external-identifier
-P6901	property_type	external-identifier
-P6902	property_type	item
-P6903	property_type	external-identifier
-P6904	property_type	external-identifier
-P6905	property_type	external-identifier
-P6906	property_type	external-identifier
-P6907	property_type	external-identifier
-P6908	property_type	external-identifier
-P6909	property_type	external-identifier
-P6910	property_type	external-identifier
-P6911	property_type	external-identifier
-P6912	property_type	external-identifier
-P6913	property_type	external-identifier
-P6914	property_type	external-identifier
-P6915	property_type	external-identifier
-P6916	property_type	external-identifier
-P6917	property_type	external-identifier
-P6918	property_type	external-identifier
-P6919	property_type	external-identifier
-P6920	property_type	external-identifier
-P6921	property_type	external-identifier
-P6922	property_type	external-identifier
-P6923	property_type	external-identifier
-P6924	property_type	external-identifier
-P6925	property_type	external-identifier
-P6926	property_type	external-identifier
-P6927	property_type	external-identifier
-P6928	property_type	external-identifier
-P6929	property_type	external-identifier
-P6930	property_type	external-identifier
-P6931	property_type	external-identifier
-P6932	property_type	external-identifier
-P6933	property_type	external-identifier
-P6934	property_type	external-identifier
-P6935	property_type	external-identifier
-P6936	property_type	external-identifier
-P6937	property_type	external-identifier
-P6938	property_type	item
-P6939	property_type	item
-P6940	property_type	external-identifier
-P6941	property_type	external-identifier
-P6942	property_type	item
-P6943	property_type	external-identifier
-P6944	property_type	external-identifier
-P6945	property_type	external-identifier
-P6946	property_type	external-identifier
-P6947	property_type	external-identifier
-P6948	property_type	item
-P6949	property_type	time
-P6950	property_type	external-identifier
-P6953	property_type	external-identifier
-P6954	property_type	item
-P6955	property_type	external-identifier
-P6956	property_type	external-identifier
-P6957	property_type	external-identifier
-P6958	property_type	external-identifier
-P6959	property_type	external-identifier
-P6960	property_type	external-identifier
-P6962	property_type	item
-P6963	property_type	external-identifier
-P6964	property_type	external-identifier
-P6965	property_type	external-identifier
-P6966	property_type	external-identifier
-P6967	property_type	external-identifier
-P6968	property_type	external-identifier
-P6969	property_type	external-identifier
-P6970	property_type	external-identifier
-P6971	property_type	external-identifier
-P6972	property_type	external-identifier
-P6973	property_type	external-identifier
-P6975	property_type	external-identifier
-P6976	property_type	external-identifier
-P6977	property_type	item
-P6978	property_type	item
-P6979	property_type	external-identifier
-P6980	property_type	external-identifier
-P6981	property_type	external-identifier
-P6982	property_type	external-identifier
-P6983	property_type	external-identifier
-P6984	property_type	external-identifier
-P6985	property_type	external-identifier
-P6987	property_type	external-identifier
-P6988	property_type	external-identifier
-P6989	property_type	external-identifier
-P6992	property_type	external-identifier
-P6993	property_type	external-identifier
-P6994	property_type	external-identifier
-P6995	property_type	external-identifier
-P6996	property_type	external-identifier
-P6997	property_type	external-identifier
-P6998	property_type	external-identifier
-P6999	property_type	external-identifier
-P7000	property_type	external-identifier
-P7001	property_type	external-identifier
-P7002	property_type	external-identifier
-P7003	property_type	external-identifier
-P7004	property_type	external-identifier
-P7005	property_type	external-identifier
-P7006	property_type	external-identifier
-P7007	property_type	external-identifier
-P7008	property_type	monolingualtext
-P7009	property_type	string
-P7010	property_type	item
-P7011	property_type	external-identifier
-P7012	property_type	external-identifier
-P7013	property_type	external-identifier
-P7014	property_type	url
-P7015	property_type	quantity
-P7017	property_type	external-identifier
-P7018	property_type	string
-P7019	property_type	external-identifier
-P7020	property_type	external-identifier
-P7021	property_type	external-identifier
-P7022	property_type	external-identifier
-P7023	property_type	external-identifier
-P7024	property_type	external-identifier
-P7025	property_type	external-identifier
-P7026	property_type	external-identifier
-P7027	property_type	external-identifier
-P7028	property_type	external-identifier
-P7029	property_type	external-identifier
-P7030	property_type	external-identifier
-P7031	property_type	external-identifier
-P7032	property_type	external-identifier
-P7033	property_type	external-identifier
-P7034	property_type	external-identifier
-P7035	property_type	external-identifier
-P7036	property_type	external-identifier
-P7037	property_type	external-identifier
-P7038	property_type	external-identifier
-P7039	property_type	external-identifier
-P7040	property_type	external-identifier
-P7041	property_type	external-identifier
-P7042	property_type	external-identifier
-P7043	property_type	external-identifier
-P7044	property_type	external-identifier
-P7045	property_type	item
-P7046	property_type	external-identifier
-P7047	property_type	item
-P7048	property_type	external-identifier
-P7049	property_type	external-identifier
-P7050	property_type	external-identifier
-P7051	property_type	external-identifier
-P7052	property_type	external-identifier
-P7053	property_type	external-identifier
-P7054	property_type	external-identifier
-P7055	property_type	external-identifier
-P7056	property_type	external-identifier
-P7057	property_type	external-identifier
-P7058	property_type	external-identifier
-P7059	property_type	external-identifier
-P7060	property_type	external-identifier
-P7063	property_type	external-identifier
-P7064	property_type	external-identifier
-P7065	property_type	external-identifier
-P7066	property_type	external-identifier
-P7067	property_type	external-identifier
-P7068	property_type	external-identifier
-P7069	property_type	string
-P7070	property_type	external-identifier
-P7071	property_type	external-identifier
-P7072	property_type	external-identifier
-P7073	property_type	external-identifier
-P7074	property_type	external-identifier
-P7075	property_type	item
-P7076	property_type	external-identifier
-P7077	property_type	external-identifier
-P7078	property_type	item
-P7079	property_type	quantity
-P7080	property_type	quantity
-P7081	property_type	monolingualtext
-P7083	property_type	quantity
-P7084	property_type	item
-P7085	property_type	external-identifier
-P7086	property_type	item
-P7087	property_type	item
-P7089	property_type	external-identifier
-P7090	property_type	external-identifier
-P7091	property_type	external-identifier
-P7092	property_type	external-identifier
-P7093	property_type	external-identifier
-P7094	property_type	external-identifier
-P7095	property_type	item
-P7100	property_type	external-identifier
-P7101	property_type	url
-P7102	property_type	external-identifier
-P7103	property_type	time
-P7104	property_type	time
-P7105	property_type	external-identifier
-P7106	property_type	external-identifier
-P7107	property_type	external-identifier
-P7108	property_type	item
-P7109	property_type	external-identifier
-P7110	property_type	external-identifier
-P7111	property_type	external-identifier
-P7112	property_type	external-identifier
-P7113	property_type	external-identifier
-P7114	property_type	external-identifier
-P7115	property_type	external-identifier
-P7116	property_type	external-identifier
-P7117	property_type	external-identifier
-P7118	property_type	external-identifier
-P7119	property_type	external-identifier
-P7120	property_type	external-identifier
-P7121	property_type	external-identifier
-P7122	property_type	item
-P7124	property_type	time
-P7125	property_type	time
-P7126	property_type	string
-P7127	property_type	external-identifier
-P7128	property_type	external-identifier
-P7129	property_type	external-identifier
-P7130	property_type	external-identifier
-P7131	property_type	external-identifier
-P7132	property_type	external-identifier
-P7133	property_type	external-identifier
-P7134	property_type	external-identifier
-P7135	property_type	external-identifier
-P7136	property_type	external-identifier
-P7137	property_type	item
-P7138	property_type	external-identifier
-P7139	property_type	external-identifier
-P7140	property_type	external-identifier
-P7141	property_type	string
-P7142	property_type	external-identifier
-P7143	property_type	external-identifier
-P7144	property_type	external-identifier
-P7145	property_type	external-identifier
-P7146	property_type	external-identifier
-P7148	property_type	external-identifier
-P7149	property_type	external-identifier
-P7150	property_type	monolingualtext
-P7151	property_type	external-identifier
-P7152	property_type	item
-P7153	property_type	item
-P7154	property_type	external-identifier
-P7155	property_type	external-identifier
-P7156	property_type	external-identifier
-P7157	property_type	external-identifier
-P7159	property_type	external-identifier
-P7160	property_type	item
-P7161	property_type	external-identifier
-P7162	property_type	item
-P7163	property_type	item
-P7164	property_type	external-identifier
-P7165	property_type	item
-P7166	property_type	external-identifier
-P7167	property_type	item
-P7168	property_type	external-identifier
-P7169	property_type	item
-P7170	property_type	external-identifier
-P7171	property_type	external-identifier
-P7172	property_type	external-identifier
-P7173	property_type	external-identifier
-P7174	property_type	item
-P7175	property_type	external-identifier
-P7176	property_type	external-identifier
-P7177	property_type	external-identifier
-P7178	property_type	external-identifier
-P7179	property_type	external-identifier
-P7180	property_type	external-identifier
-P7181	property_type	external-identifier
-P7182	property_type	external-identifier
-P7183	property_type	external-identifier
-P7184	property_type	external-identifier
-P7185	property_type	external-identifier
-P7186	property_type	external-identifier
-P7187	property_type	external-identifier
-P7188	property_type	external-identifier
-P7189	property_type	external-identifier
-P7190	property_type	external-identifier
-P7191	property_type	external-identifier
-P7192	property_type	external-identifier
-P7193	property_type	external-identifier
-P7194	property_type	external-identifier
-P7195	property_type	external-identifier
-P7196	property_type	external-identifier
-P7197	property_type	external-identifier
-P7198	property_type	external-identifier
-P7199	property_type	external-identifier
-P7200	property_type	external-identifier
-P7201	property_type	external-identifier
-P7202	property_type	external-identifier
-P7203	property_type	external-identifier
-P7204	property_type	external-identifier
-P7205	property_type	external-identifier
-P7206	property_type	external-identifier
-P7207	property_type	external-identifier
-P7208	property_type	external-identifier
-P7209	property_type	item
-P7210	property_type	external-identifier
-P7211	property_type	external-identifier
-P7212	property_type	external-identifier
-P7213	property_type	url
-P7214	property_type	external-identifier
-P7215	property_type	external-identifier
-P7216	property_type	external-identifier
-P7217	property_type	external-identifier
-P7218	property_type	external-identifier
-P7219	property_type	string
-P7220	property_type	string
-P7221	property_type	string
-P7222	property_type	external-identifier
-P7223	property_type	external-identifier
-P7224	property_type	external-identifier
-P7225	property_type	external-identifier
-P7226	property_type	external-identifier
-P7227	property_type	external-identifier
-P7228	property_type	item
-P7229	property_type	external-identifier
-P7230	property_type	external-identifier
-P7231	property_type	external-identifier
-P7232	property_type	external-identifier
-P7233	property_type	external-identifier
-P7234	property_type	external-identifier
-P7235	property_type	string
-P7236	property_type	external-identifier
-P7237	property_type	external-identifier
-P7238	property_type	external-identifier
-P7241	property_type	external-identifier
-P7242	property_type	external-identifier
-P7243	property_type	monolingualtext
-P7250	property_type	string
-P7251	property_type	external-identifier
-P7252	property_type	item
-P7253	property_type	item
-P7254	property_type	external-identifier
-P7255	property_type	external-identifier
-P7256	property_type	quantity
-P7257	property_type	external-identifier
-P7258	property_type	external-identifier
-P7259	property_type	external-identifier
-P7260	property_type	external-identifier
-P7261	property_type	item
-P7262	property_type	external-identifier
-P7263	property_type	external-identifier
-P7264	property_type	external-identifier
-P7265	property_type	external-identifier
-P7266	property_type	external-identifier
-P7267	property_type	external-identifier
-P7268	property_type	external-identifier
-P7269	property_type	external-identifier
-P7270	property_type	external-identifier
-P7271	property_type	external-identifier
-P7272	property_type	external-identifier
-P7273	property_type	external-identifier
-P7274	property_type	external-identifier
-P7275	property_type	external-identifier
-P7276	property_type	external-identifier
-P7277	property_type	external-identifier
-P7278	property_type	external-identifier
-P7279	property_type	external-identifier
-P7280	property_type	external-identifier
-P7281	property_type	external-identifier
-P7282	property_type	external-identifier
-P7283	property_type	external-identifier
-P7284	property_type	external-identifier
-P7285	property_type	external-identifier
-P7286	property_type	external-identifier
-P7287	property_type	external-identifier
-P7288	property_type	external-identifier
-P7289	property_type	external-identifier
-P7290	property_type	string
-P7291	property_type	external-identifier
-P7292	property_type	external-identifier
-P7293	property_type	external-identifier
-P7294	property_type	url
-P7295	property_type	time
-P7296	property_type	external-identifier
-P7297	property_type	quantity
-P7298	property_type	external-identifier
-P7299	property_type	external-identifier
-P7300	property_type	external-identifier
-P7301	property_type	external-identifier
-P7302	property_type	external-identifier
-P7303	property_type	external-identifier
-P7304	property_type	external-identifier
-P7305	property_type	external-identifier
-P7306	property_type	external-identifier
-P7307	property_type	external-identifier
-P7308	property_type	external-identifier
-P7309	property_type	item
-P7310	property_type	external-identifier
-P7311	property_type	external-identifier
-P7312	property_type	external-identifier
-P7313	property_type	external-identifier
-P7314	property_type	external-identifier
-P7315	property_type	string
-P7316	property_type	quantity
-P7317	property_type	external-identifier
-P7318	property_type	external-identifier
-P7319	property_type	external-identifier
-P7320	property_type	external-identifier
-P7321	property_type	external-identifier
-P7322	property_type	external-identifier
-P7323	property_type	external-identifier
-P7324	property_type	external-identifier
-P7325	property_type	external-identifier
-P7326	property_type	external-identifier
-P7327	property_type	item
-P7328	property_type	quantity
-P7329	property_type	external-identifier
-P7330	property_type	string
-P7331	property_type	external-identifier
-P7332	property_type	external-identifier
-P7333	property_type	external-identifier
-P7334	property_type	external-identifier
-P7335	property_type	external-identifier
-P7336	property_type	external-identifier
-P7337	property_type	external-identifier
-P7338	property_type	string
-P7339	property_type	external-identifier
-P7340	property_type	external-identifier
-P7341	property_type	external-identifier
-P7342	property_type	external-identifier
-P7343	property_type	external-identifier
-P7344	property_type	external-identifier
-P7345	property_type	external-identifier
-P7346	property_type	external-identifier
-P7347	property_type	url
-P7348	property_type	external-identifier
-P7349	property_type	external-identifier
-P7350	property_type	external-identifier
-P7351	property_type	external-identifier
-P7352	property_type	external-identifier
-P7353	property_type	external-identifier
-P7354	property_type	external-identifier
-P7355	property_type	external-identifier
-P7356	property_type	external-identifier
-P7357	property_type	external-identifier
-P7358	property_type	external-identifier
-P7359	property_type	external-identifier
-P7360	property_type	external-identifier
-P7361	property_type	external-identifier
-P7362	property_type	external-identifier
-P7363	property_type	external-identifier
-P7364	property_type	external-identifier
-P7365	property_type	external-identifier
-P7366	property_type	external-identifier
-P7367	property_type	item
-P7368	property_type	external-identifier
-P7369	property_type	external-identifier
-P7370	property_type	external-identifier
-P7371	property_type	external-identifier
-P7372	property_type	external-identifier
-P7374	property_type	item
-P7375	property_type	url
-P7376	property_type	item
-P7377	property_type	item
-P7378	property_type	item
-P7379	property_type	quantity
-P7380	property_type	string
-P7381	property_type	external-identifier
-P7382	property_type	external-identifier
-P7383	property_type	string
-P7384	property_type	external-identifier
-P7387	property_type	external-identifier
-P7388	property_type	external-identifier
-P7389	property_type	external-identifier
-P7390	property_type	external-identifier
-P7391	property_type	quantity
-P7395	property_type	external-identifier
-P7396	property_type	external-identifier
-P7397	property_type	external-identifier
-P7398	property_type	external-identifier
-P7399	property_type	external-identifier
-P7400	property_type	external-identifier
-P7401	property_type	external-identifier
-P7402	property_type	external-identifier
-P7403	property_type	external-identifier
-P7404	property_type	external-identifier
-P7405	property_type	external-identifier
-P7406	property_type	item
-P7407	property_type	string
-P7408	property_type	external-identifier
-P7409	property_type	external-identifier
-P7410	property_type	external-identifier
-P7411	property_type	external-identifier
-P7412	property_type	external-identifier
-P7413	property_type	external-identifier
-P7414	property_type	external-identifier
-P7415	property_type	string
-P7416	property_type	string
-P7417	property_type	string
-P7418	property_type	string
-P7419	property_type	item
-P7420	property_type	string
-P7421	property_type	string
-P7422	property_type	quantity
-P7423	property_type	external-identifier
-P7425	property_type	external-identifier
-P7427	property_type	external-identifier
-P7428	property_type	external-identifier
-P7429	property_type	external-identifier
-P7430	property_type	external-identifier
-P7431	property_type	external-identifier
-P7432	property_type	external-identifier
-P7433	property_type	external-identifier
-P7434	property_type	external-identifier
-P7435	property_type	external-identifier
-P7436	property_type	external-identifier
-P7437	property_type	external-identifier
-P7438	property_type	external-identifier
-P7439	property_type	external-identifier
-P7440	property_type	external-identifier
-P7441	property_type	external-identifier
-P7442	property_type	item
-P7443	property_type	quantity
-P7444	property_type	external-identifier
-P7445	property_type	external-identifier
-P7446	property_type	external-identifier
-P7447	property_type	external-identifier
-P7448	property_type	external-identifier
-P7449	property_type	external-identifier
-P7450	property_type	external-identifier
-P7451	property_type	external-identifier
-P7452	property_type	item
-P7453	property_type	external-identifier
-P7454	property_type	external-identifier
-P7455	property_type	external-identifier
-P7456	property_type	external-identifier
-P7457	property_type	string
-P7458	property_type	external-identifier
-P7459	property_type	external-identifier
-P7460	property_type	external-identifier
-P7461	property_type	external-identifier
-P7462	property_type	quantity
-P7463	property_type	external-identifier
-P7464	property_type	external-identifier
-P7465	property_type	external-identifier
-P7466	property_type	external-identifier
-P7467	property_type	external-identifier
-P7468	property_type	external-identifier
-P7469	property_type	item
-P7470	property_type	string
-P7471	property_type	external-identifier
-P7472	property_type	external-identifier
-P7473	property_type	external-identifier
-P7474	property_type	external-identifier
-P7475	property_type	external-identifier
-P7476	property_type	external-identifier
-P7477	property_type	external-identifier
-P7478	property_type	string
-P7479	property_type	item
-P7480	property_type	external-identifier
-P7481	property_type	item
-P7482	property_type	item
-P7483	property_type	external-identifier
-P7484	property_type	external-identifier
-P7485	property_type	external-identifier

From f1e246cf320e1a7c37e1a40a03b3aa2391ff66f9 Mon Sep 17 00:00:00 2001
From: filievski <filip.dbrsk@gmail.com>
Date: Tue, 19 May 2020 20:11:44 -0700
Subject: [PATCH 228/278] cleaning up obsolete/outdated files

---
 .gitignore                 |  2 +-
 docs/merge.md              | 21 ---------------------
 install.sh                 |  2 --
 kgtk/gt/embedding_utils.py |  4 ++--
 4 files changed, 3 insertions(+), 26 deletions(-)
 delete mode 100644 docs/merge.md
 delete mode 100644 install.sh

diff --git a/.gitignore b/.gitignore
index 3d7011937..8fdd5fa84 100644
--- a/.gitignore
+++ b/.gitignore
@@ -135,4 +135,4 @@ dmypy.json
 .vscode/
 
 # MacOS file system hidden file
-.DS_Store
\ No newline at end of file
+.DS_Store
diff --git a/docs/merge.md b/docs/merge.md
deleted file mode 100644
index bf115d3a4..000000000
--- a/docs/merge.md
+++ /dev/null
@@ -1,21 +0,0 @@
-!!! warning
-    This command is under testing
-
-Given a nodes and an edges file (!) in TSV format, collapse the nodes that are connected with a same-as relation. Reflect these changes both in the nodes and in the edges file. Remove the same-as relations from the edge file.
-
-After the merge, the identical rows are deduplicated.
-
-## Usage
-```
-kgtk merge_identical_nodes -ef EDGEFILE -nf NODEFILE [-l SAMEASLBL]
-EDGEFILE is an edge file in TSV format
-NODEFILE is a node file in TSV format
-SAMEASLBL is a same-as relation that is used to indicate identity 
-```
-
-## Examples
-Merge nodes connected with a ‘mw:SameAs’ relation
-```
-kgtk merge_identical_nodes -ef data/edges.tsv -nf data/nodes.tsv -l “mw:SameAs”
-```
-
diff --git a/install.sh b/install.sh
deleted file mode 100644
index f030e9e3b..000000000
--- a/install.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-pip install -r requirements.txt
-conda install -c conda-forge graph-tool
diff --git a/kgtk/gt/embedding_utils.py b/kgtk/gt/embedding_utils.py
index f76c76801..c6cc14e91 100644
--- a/kgtk/gt/embedding_utils.py
+++ b/kgtk/gt/embedding_utils.py
@@ -528,11 +528,11 @@ def attribute_to_sentence(self, attribute_dict: dict, node_id=None):
             temp = [self.get_real_label_name(each) for each in attribute_dict["has_properties_values"]]
             if concated_sentence != "":
                 if not have_isa_properties:
-                    concated_sentence += " is "
+                    concated_sentence += " "
                 else:
                     concated_sentence += ", "
             else:
-                concated_sentence += "It is "
+                concated_sentence += "It "
             concated_sentence += " and ".join(temp)
         if "has_properties" in attribute_dict and len(attribute_dict["has_properties"]) > 0:
             temp = [self.get_real_label_name(each) for each in attribute_dict["has_properties"]]

From 8075d39c228d303185db094db8a1aae62172470b Mon Sep 17 00:00:00 2001
From: filievski <filip.dbrsk@gmail.com>
Date: Tue, 19 May 2020 20:31:34 -0700
Subject: [PATCH 229/278] compute stats and export_gt instead of gt_loader

---
 docs/loader.md            |  34 --------
 docs/stats.md             |  31 +++++++
 kgtk/cli/compute_stats.py | 169 ++++++++++++++++++++++++++++++++++++++
 kgtk/cli/export_gt.py     |  81 ++++++++++++++++++
 kgtk_graph_tooling.py     |  27 ------
 mkdocs.yml                |   2 +-
 6 files changed, 282 insertions(+), 62 deletions(-)
 delete mode 100644 docs/loader.md
 create mode 100644 docs/stats.md
 create mode 100644 kgtk/cli/compute_stats.py
 create mode 100644 kgtk/cli/export_gt.py
 delete mode 100644 kgtk_graph_tooling.py

diff --git a/docs/loader.md b/docs/loader.md
deleted file mode 100644
index c57b945f9..000000000
--- a/docs/loader.md
+++ /dev/null
@@ -1,34 +0,0 @@
-This command loads a TSV edges file into Graph-tool. Optionally, compute centrality metrics, and, optionally, dump the resulting graph-tool (.gt) object to disk. Prints the resulting edge file to stdout.
-
-## Usage
-```
-kgtk gt_loader [-h] [--directed] [--degrees] [--pagerank] [--hits]
-                      [--log LOG_FILE] [-o OUTPUT]
-                      filename
-```
-
-positional arguments:
-```
-  filename              filename here
-```
-
-optional arguments:
-```
-  -h, --help            show this help message and exit
-  --directed            Is the graph directed or not?
-  --degrees             Whether or not to compute degree distribution.
-  --pagerank            Whether or not to compute PageRank centraility.
-  --hits                Whether or not to compute HITS centraility.
-  --log LOG_FILE        Log file for summarized statistics of the graph.
-  -o OUTPUT, --out OUTPUT
-                        Graph tool file to dump the graph too - if empty, it
-                        will not be saved.
-```
-
-## Examples
-
-Import a TSV file into Graph-tool, and compute degrees, pagerank and hits. We store the result to disk, and the statistics to log.txt. 
-
-```
-kgtk gt_loader --directed --degrees --pagerank --hits --out file.gt --log log.txt ./data/conceptnet_first10.tsv
-```
diff --git a/docs/stats.md b/docs/stats.md
new file mode 100644
index 000000000..349db9bc3
--- /dev/null
+++ b/docs/stats.md
@@ -0,0 +1,31 @@
+This command loads a TSV edges file into Graph-tool. Then, it can compute centrality metrics and connectivity statistics. Prints the resulting edge file to stdout.
+
+## Usage
+```
+kgtk compute_stats [-h] [--directed] [--degrees] [--pagerank] [--hits]
+                      [--log LOG_FILE]
+                      filename
+```
+
+positional arguments:
+```
+  filename              filename here
+```
+
+optional arguments:
+```
+  -h, --help            show this help message and exit
+  --directed            Is the graph directed or not?
+  --degrees             Whether or not to compute degree distribution.
+  --pagerank            Whether or not to compute PageRank centraility.
+  --hits                Whether or not to compute HITS centraility.
+  --log LOG_FILE        Log file for summarized statistics of the graph.
+```
+
+## Examples
+
+Import a TSV file into Graph-tool, and compute degrees, pagerank and hits. We store the statistics to log.txt. 
+
+```
+kgtk compute_stats --directed --degrees --pagerank --hits --log log.txt ./data/conceptnet_first10.tsv
+```
diff --git a/kgtk/cli/compute_stats.py b/kgtk/cli/compute_stats.py
new file mode 100644
index 000000000..5abbcc30a
--- /dev/null
+++ b/kgtk/cli/compute_stats.py
@@ -0,0 +1,169 @@
+"""
+Import CSV file in Graph-tool.
+"""
+
+
+def parser():
+    return {
+        'help': 'Import a CSV file in Graph-tool.'
+    }
+
+
+def add_arguments(parser):
+    """
+    Parse arguments
+    Args:
+            parser (argparse.ArgumentParser)
+    """
+    parser.add_argument(action="store", type=str, dest="filename", metavar='filename', help='filename here')
+    parser.add_argument('--directed', action='store_true', dest="directed", help="Is the graph directed or not?")
+    parser.add_argument('--degrees', action='store_true', dest='compute_degrees',
+                        help="Whether or not to compute degree distribution.")
+    parser.add_argument('--pagerank', action='store_true', dest='compute_pagerank',
+                        help="Whether or not to compute PageRank centraility.")
+    parser.add_argument('--hits', action='store_true', dest='compute_hits',
+                        help="Whether or not to compute HITS centraility.")
+    parser.add_argument('--log', action='store', type=str, dest='log_file',
+                        help='Log file for summarized statistics of the graph.', default="./log.txt")
+    parser.add_argument('--output-stats', action='store_true', dest='output_stats',
+                        help='do not output the graph but statistics only')
+    parser.add_argument('--vertex-in-degree-property', action='store', dest='vertex_in_degree',
+                        default='vertex_in_degree',
+                        help='label for edge: vertex in degree property')
+    parser.add_argument('--vertex-out-degree-property', action='store', dest='vertex_out_degree',
+                        default='vertex_out_degree',
+                        help='label for edge: vertex out degree property')
+    parser.add_argument('--page-rank-property', action='store', dest='vertex_pagerank',
+                        default='vertex_pagerank',
+                        help='label for pank rank property')
+    parser.add_argument('--vertex-hits-authority-property', action='store', dest='vertex_auth',
+                        default='vertex_auth',
+                        help='label for edge: vertext hits authority')
+    parser.add_argument('--vertex-hits-hubs-property', action='store', dest='vertex_hubs',
+                        default='vertex_hubs',
+                        help='label for edge: vertex hits hubs')
+
+
+def run(filename, directed, compute_degrees, compute_pagerank, compute_hits, log_file, output_stats,
+        vertex_in_degree, vertex_out_degree, vertex_pagerank, vertex_auth, vertex_hubs):
+    from kgtk.exceptions import KGTKException
+    def infer_index(h, options=[]):
+        for o in options:
+            if o in h:
+                return h.index(o)
+        return -1
+
+    def infer_predicate(h, options=[]):
+        for o in options:
+            if o in h:
+                return o
+        return ''
+
+    v_prop_dict = {
+        'vertex_pagerank': vertex_pagerank,
+        'vertex_hubs': vertex_hubs,
+        'vertex_auth': vertex_auth
+    }
+    try:
+        # import modules locally
+        import socket
+        from graph_tool import load_graph_from_csv
+        from graph_tool import centrality
+        import kgtk.gt.analysis_utils as gtanalysis
+        import sys
+
+        # hardcoded values useful for the script. Perhaps some of them should be exposed as arguments later
+        directions = ['in', 'out', 'total']
+        id_col = 'name'
+
+        with open(filename, 'r') as f:
+            header = next(f).split('\t')
+            subj_index = infer_index(header, options=['node1', 'subject'])
+            obj_index = infer_index(header, options=['node2', 'object', 'value'])
+            predicate = infer_predicate(header, options=['property', 'predicate', 'label'])
+
+            p = []
+            for i, header_col in enumerate(header):
+                if i in [subj_index, obj_index]: continue
+                p.append(header_col)
+
+        with open(log_file, 'w') as writer:
+
+            writer.write('loading the TSV graph now ...\n')
+            G2 = load_graph_from_csv(filename,
+                                     skip_first=True,
+                                     directed=directed,
+                                     hashed=True,
+                                     ecols=[subj_index, obj_index],
+                                     eprop_names=p,
+                                     csv_options={'delimiter': '\t'})
+
+            writer.write('graph loaded! It has %d nodes and %d edges\n' % (G2.num_vertices(), G2.num_edges()))
+            writer.write('\n###Top relations:\n')
+            for rel, freq in gtanalysis.get_topN_relations(G2, pred_property=predicate):
+                writer.write('%s\t%d\n' % (rel, freq))
+
+            if compute_degrees:
+                writer.write('\n###Degrees:\n')
+                for direction in directions:
+                    degree_data = gtanalysis.compute_node_degree_hist(G2, direction)
+                    max_degree = len(degree_data) - 1
+                    mean_degree, std_degree = gtanalysis.compute_avg_node_degree(G2, direction)
+                    writer.write(
+                        '%s degree stats: mean=%f, std=%f, max=%d\n' % (direction, mean_degree, std_degree, max_degree))
+
+            if compute_pagerank:
+                writer.write('\n###PageRank\n')
+                v_pr = G2.new_vertex_property('float')
+                centrality.pagerank(G2, prop=v_pr)
+                G2.properties[('v', 'vertex_pagerank')] = v_pr
+                writer.write('Max pageranks\n')
+                result = gtanalysis.get_topn_indices(G2, 'vertex_pagerank', 5, id_col)
+                for n_id, n_label, pr in result:
+                    writer.write('%s\t%s\t%f\n' % (n_id, n_label, pr))
+
+            if compute_hits:
+                writer.write('\n###HITS\n')
+                hits_eig, G2.vp['vertex_hubs'], G2.vp['vertex_auth'] = gtanalysis.compute_hits(G2)
+                writer.write('HITS hubs\n')
+                main_hubs = gtanalysis.get_topn_indices(G2, 'vertex_hubs', 5, id_col)
+                for n_id, n_label, hubness in main_hubs:
+                    writer.write('%s\t%s\t%f\n' % (n_id, n_label, hubness))
+                writer.write('HITS auth\n')
+                main_auth = gtanalysis.get_topn_indices(G2, 'vertex_auth', 5, id_col)
+                for n_id, n_label, authority in main_auth:
+                    writer.write('%s\t%s\t%f\n' % (n_id, n_label, authority))
+
+            sys.stdout.write('node1\tproperty\tnode2\tid\n')
+            id_count = 0
+            if not output_stats:
+                for e in G2.edges():
+                    sid, oid = e
+                    lbl = G2.ep[predicate][e]
+                    sys.stdout.write(
+                        '%s\t%s\t%s\t%s\n' % (G2.vp[id_col][sid], lbl, G2.vp[id_col][oid],
+                                              '{}-{}-{}'.format(G2.vp[id_col][sid], lbl, id_count)))
+                    id_count += 1
+
+            id_count = 0
+            for v in G2.vertices():
+                v_id = G2.vp[id_col][v]
+
+                sys.stdout.write(
+                    '{}\t{}\t{}\t{}\n'.format(v_id, vertex_in_degree, v.in_degree(),
+                                              '{}-{}-{}'.format(v_id, vertex_in_degree, id_count)))
+                id_count += 1
+                sys.stdout.write(
+                    '{}\t{}\t{}\t{}\n'.format(v_id, vertex_out_degree, v.out_degree(),
+                                              '{}-{}-{}'.format(v_id, vertex_out_degree, id_count)))
+                id_count += 1
+
+                for vprop in G2.vertex_properties.keys():
+                    if vprop == id_col: continue
+                    sys.stdout.write(
+                        '%s\t%s\t%s\t%s\n' % (v_id, v_prop_dict[vprop], G2.vp[vprop][v],
+                                              '{}-{}-{}'.format(v_id, v_prop_dict[vprop], id_count)))
+                    id_count += 1
+
+    except Exception as e:
+        raise KGTKException('Error: ' + str(e))
diff --git a/kgtk/cli/export_gt.py b/kgtk/cli/export_gt.py
new file mode 100644
index 000000000..b0b58329f
--- /dev/null
+++ b/kgtk/cli/export_gt.py
@@ -0,0 +1,81 @@
+"""
+Export a KGTK file to Graph-tool format.
+"""
+
+
+def parser():
+    return {
+        'help': 'Export a KGTK file to Graph-tool format.'
+    }
+
+
+def add_arguments(parser):
+    """
+    Parse arguments
+    Args:
+            parser (argparse.ArgumentParser)
+    """
+    parser.add_argument(action="store", type=str, dest="filename", metavar='filename', help='filename here')
+    parser.add_argument('--directed', action='store_true', dest="directed", help="Is the graph directed or not?")
+    parser.add_argument('--log', action='store', type=str, dest='log_file',
+                        help='Log file for summarized statistics of the graph.', default="./log.txt")
+
+    parser.add_argument('-o', '--out', action='store', type=str, dest='output',
+                        help='Graph tool file to dump the graph too - if empty, it will not be saved.')
+
+
+def run(filename, directed, log_file, output):
+    from kgtk.exceptions import KGTKException
+    def infer_index(h, options=[]):
+        for o in options:
+            if o in h:
+                return h.index(o)
+        return -1
+
+    def infer_predicate(h, options=[]):
+        for o in options:
+            if o in h:
+                return o
+        return ''
+
+    try:
+        # import modules locally
+        import socket
+        from graph_tool import load_graph_from_csv
+        from graph_tool import centrality
+        import kgtk.gt.analysis_utils as gtanalysis
+        import sys
+
+        with open(filename, 'r') as f:
+            header = next(f).split('\t')
+            subj_index = infer_index(header, options=['node1', 'subject'])
+            obj_index = infer_index(header, options=['node2', 'object', 'value'])
+            predicate = infer_predicate(header, options=['property', 'predicate', 'label'])
+
+            p = []
+            for i, header_col in enumerate(header):
+                if i in [subj_index, obj_index]: continue
+                p.append(header_col)
+
+        with open(log_file, 'w') as writer:
+            writer.write('loading the TSV graph now ...\n')
+            G2 = load_graph_from_csv(filename,
+                                     skip_first=True,
+                                     directed=directed,
+                                     hashed=True,
+                                     ecols=[subj_index, obj_index],
+                                     eprop_names=p,
+                                     csv_options={'delimiter': '\t'})
+
+            writer.write('graph loaded! It has %d nodes and %d edges\n' % (G2.num_vertices(), G2.num_edges()))
+            writer.write('\n###Top relations:\n')
+            for rel, freq in gtanalysis.get_topN_relations(G2, pred_property=predicate):
+                writer.write('%s\t%d\n' % (rel, freq))
+
+            
+
+            if output:
+                writer.write('now saving the graph to %s\n' % output)
+                G2.save(output)
+    except Exception as e:
+        raise KGTKException('Error: ' + str(e))
diff --git a/kgtk_graph_tooling.py b/kgtk_graph_tooling.py
deleted file mode 100644
index 0be09964d..000000000
--- a/kgtk_graph_tooling.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import graph_tool
-from kgtk.gt import analysis_utils, topology_utils
-
-#input_file="P279_sorted_by_node.csv"
-input_file="first100k_P279.csv"
-direction='total'
-
-G=graph_tool.load_graph_from_csv(input_file,
-                                 directed=True,
-                                 skip_first=True,
-                                 ecols=(0,2),
-                                 csv_options={'delimiter': ',', 'quotechar': '"'})
-
-print(analysis_utils.get_num_nodes(G))
-
-print(analysis_utils.get_num_edges(G))
-
-print(analysis_utils.compute_stats(G, direction))
-print('now computing transitive closure')
-G2=topology_utils.compute_transitive_closure(G)
-print('transitive closure computed')
-
-print(analysis_utils.get_num_nodes(G2))
-
-print(analysis_utils.get_num_edges(G2))
-
-print(analysis_utils.compute_stats(G2, direction))
diff --git a/mkdocs.yml b/mkdocs.yml
index 9b525d737..b5da2e5c9 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -6,7 +6,7 @@ nav:
   - 'KGTK commands':
       - 'filter': 'filter.md'
       - 'generate_wikidata_triples': 'wikidata.md'
-      - 'gtloader': 'loader.md'
+      - 'compute_stats': 'stats.md'
       - 'instances': 'instances.md'  
       - 'merge_identical_nodes': 'merge.md'
       - 'reachable_nodes': 'rnodes.md'

From d88789956ff513d8127e75697522e7f17a56a943 Mon Sep 17 00:00:00 2001
From: filievski <filip.dbrsk@gmail.com>
Date: Tue, 19 May 2020 20:53:31 -0700
Subject: [PATCH 230/278] centrality analysis and docs

---
 docs/export_gt.md                             | 31 +++++++++++++++++++
 docs/stats.md                                 |  4 +--
 ...ompute_stats.py => centrality_analysis.py} |  0
 mkdocs.yml                                    |  4 +--
 4 files changed, 35 insertions(+), 4 deletions(-)
 create mode 100644 docs/export_gt.md
 rename kgtk/cli/{compute_stats.py => centrality_analysis.py} (100%)

diff --git a/docs/export_gt.md b/docs/export_gt.md
new file mode 100644
index 000000000..40dec5646
--- /dev/null
+++ b/docs/export_gt.md
@@ -0,0 +1,31 @@
+This command loads a TSV edges file into Graph-tool, and exports it to Graph-tool (.gt) format. 
+
+## Usage
+```
+kgtk export_gt [-h] [--directed]
+                [--log LOG_FILE]  [-o OUTPUT] 
+                      filename
+```
+
+positional arguments:
+```
+  filename              filename here
+```
+
+optional arguments:
+```
+  -h, --help            show this help message and exit
+  --directed            Is the graph directed or not?
+  -o OUTPUT, --out OUTPUT
+						Graph tool file to dump the graph too - if empty, it
+                        will not be saved.
+  --log LOG_FILE        Log file for summarized statistics of the graph.
+```
+
+## Examples
+
+Import a TSV file into Graph-tool, and store the result to disk. We store the statistics to log.txt. 
+
+```
+kgtk graph_statistics --directed --log log.txt --out graph.gt ./data/conceptnet_first10.tsv
+```
diff --git a/docs/stats.md b/docs/stats.md
index 349db9bc3..363281903 100644
--- a/docs/stats.md
+++ b/docs/stats.md
@@ -2,7 +2,7 @@ This command loads a TSV edges file into Graph-tool. Then, it can compute centra
 
 ## Usage
 ```
-kgtk compute_stats [-h] [--directed] [--degrees] [--pagerank] [--hits]
+kgtk graph_statistics [-h] [--directed] [--degrees] [--pagerank] [--hits]
                       [--log LOG_FILE]
                       filename
 ```
@@ -27,5 +27,5 @@ optional arguments:
 Import a TSV file into Graph-tool, and compute degrees, pagerank and hits. We store the statistics to log.txt. 
 
 ```
-kgtk compute_stats --directed --degrees --pagerank --hits --log log.txt ./data/conceptnet_first10.tsv
+kgtk graph_statistics --directed --degrees --pagerank --hits --log log.txt ./data/conceptnet_first10.tsv
 ```
diff --git a/kgtk/cli/compute_stats.py b/kgtk/cli/centrality_analysis.py
similarity index 100%
rename from kgtk/cli/compute_stats.py
rename to kgtk/cli/centrality_analysis.py
diff --git a/mkdocs.yml b/mkdocs.yml
index b5da2e5c9..fed0dc1d5 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -4,11 +4,11 @@ nav:
   - Getting started: install.md
   - KGTK file specification: specification.md
   - 'KGTK commands':
+      - 'export_gt': 'export_gt.md'
       - 'filter': 'filter.md'
       - 'generate_wikidata_triples': 'wikidata.md'
-      - 'compute_stats': 'stats.md'
+      - 'graph_statistics': 'stats.md'
       - 'instances': 'instances.md'  
-      - 'merge_identical_nodes': 'merge.md'
       - 'reachable_nodes': 'rnodes.md'
       - 'remove_columns': 'remove_col.md'
       - 'sort': 'sort.md'

From bed4f913d253bfcaa7e0cd28debbe282bffd99b1 Mon Sep 17 00:00:00 2001
From: filievski <filip.dbrsk@gmail.com>
Date: Tue, 19 May 2020 23:15:13 -0700
Subject: [PATCH 231/278] import conceptnet with docs

---
 docs/import_conceptnet.md                     | 26 ++++++++++
 examples/download_cn.sh                       |  2 +
 examples/gt.py                                | 12 -----
 examples/obtain_stats.py                      | 17 -------
 ...rality_analysis.py => graph_statistics.py} |  0
 kgtk/cli/import_conceptnet.py                 | 50 +++++++++++++++++++
 kgtk/cli/import_visual_genome.py              | 28 -----------
 mkdocs.yml                                    |  1 +
 8 files changed, 79 insertions(+), 57 deletions(-)
 create mode 100644 docs/import_conceptnet.md
 create mode 100644 examples/download_cn.sh
 delete mode 100644 examples/gt.py
 delete mode 100644 examples/obtain_stats.py
 rename kgtk/cli/{centrality_analysis.py => graph_statistics.py} (100%)
 create mode 100644 kgtk/cli/import_conceptnet.py
 delete mode 100644 kgtk/cli/import_visual_genome.py

diff --git a/docs/import_conceptnet.md b/docs/import_conceptnet.md
new file mode 100644
index 000000000..b997b5ca9
--- /dev/null
+++ b/docs/import_conceptnet.md
@@ -0,0 +1,26 @@
+Import the entire ConceptNet, or just its English part, into KGTK format. 
+
+## Usage
+```
+kgtk import_conceptnet [-h] [--english_only] [--sort] filename
+```
+
+positional arguments:
+```
+  filename        filename here
+```
+
+optional arguments:
+```
+  -h, --help      show this help message and exit
+  --english_only  Only english conceptnet?
+  --sort          Should we sort the file on s-p-o?
+```
+
+## Examples
+
+Import the English part of ConceptNet into KGTK, without sorting it. 
+
+```
+kgtk import_conceptnet --english_only examples/conceptnet-assertions-5.7.0.csv
+```
diff --git a/examples/download_cn.sh b/examples/download_cn.sh
new file mode 100644
index 000000000..c7c0e7504
--- /dev/null
+++ b/examples/download_cn.sh
@@ -0,0 +1,2 @@
+wget https://s3.amazonaws.com/conceptnet/downloads/2019/edges/conceptnet-assertions-5.7.0.csv.gz
+gunzip conceptnet-assertions-5.7.0.csv.gz
diff --git a/examples/gt.py b/examples/gt.py
deleted file mode 100644
index cc8052aa6..000000000
--- a/examples/gt.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import kgtk.gt.io_utils as gtio
-import kgtk.gt.analysis_utils as gtanalysis
-
-#datadir='data/'
-mowgli_nodes=f'{datadir}nodes_v002.csv'
-mowgli_edges=f'{datadir}edges_v002.csv'
-output_gml=f'{datadir}graph.graphml'
-
-gtio.transform_to_graphtool_format(mowgli_nodes, mowgli_edges, output_gml, True)
-g=gtio.load_gt_graph(output_gml.replace(".graphml", '.gt'))
-
-print(g.num_edges())
diff --git a/examples/obtain_stats.py b/examples/obtain_stats.py
deleted file mode 100644
index e9cf30e7c..000000000
--- a/examples/obtain_stats.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import kgtk.gt.io_utils as gtio
-import kgtk.gt.analysis_utils as gtanalysis
-
-datadir='data/'
-mowgli_nodes=f'{datadir}nodes_v002.csv'
-mowgli_edges=f'{datadir}edges_v002.csv'
-output_gml=f'{datadir}graph.graphml'
-
-g=gtio.load_gt_graph(output_gml.replace(".graphml", '.gt'))
-
-print('graph loaded. now computing centrality.')
-node_pagerank=gtanalysis.compute_pagerank(g)
-print('pagerank computed')
-hits=gtanalysis.compute_hits(g)
-print('hits computed')
-bt=gtanalysis.compute_betweenness(g)
-print('bt computed')
diff --git a/kgtk/cli/centrality_analysis.py b/kgtk/cli/graph_statistics.py
similarity index 100%
rename from kgtk/cli/centrality_analysis.py
rename to kgtk/cli/graph_statistics.py
diff --git a/kgtk/cli/import_conceptnet.py b/kgtk/cli/import_conceptnet.py
new file mode 100644
index 000000000..3df2acbcd
--- /dev/null
+++ b/kgtk/cli/import_conceptnet.py
@@ -0,0 +1,50 @@
+"""
+Import ConceptNet file to KGTK.
+"""
+
+
+import sys
+
+def parser():
+    return {
+        'help': 'Import ConceptNet into KGTK.' 
+    }
+
+
+def add_arguments(parser):
+	"""
+	Parse arguments
+	Args:
+		parser (argparse.ArgumentParser)
+	"""
+	# '$label == "/r/DefinedAs" && $node2=="/c/en/number_zero"'
+	parser.add_argument(action="store", type=str, dest="filename", metavar='filename', help='filename here')
+	parser.add_argument('--english_only', action="store_true", help="Only english conceptnet?")
+	parser.add_argument('--sort', action="store_true", help="Should we sort the file on s-p-o?")
+
+
+def run(filename, english_only, sort):
+	# import modules locally
+	import sys # type: ignore
+	from kgtk.exceptions import kgtk_exception_auto_handler
+	import pandas as pd
+
+	try:
+		df=pd.read_csv(filename, sep='\t', header=None)
+		df.columns=['assertion','rel','subj','obj','metadata']
+		df.drop(columns=['assertion', 'metadata'], inplace=True)
+		df=df[['subj', 'rel', 'obj']]
+		df.columns=['node1', 'label', 'node2']
+
+		if sort:
+			df=df.sort_values(by=['node1', 'label','node2'])
+
+		# writing of the output
+		sys.stdout.write('node1\tlabel\tnode2\n')
+		for i, row in df.iterrows():
+			if not english_only or (row[0].startswith('/c/en/') and row[2].startswith('/c/en/')):
+				sys.stdout.write('%s\n' % '\t'.join(row))
+
+
+	except Exception as e:
+		kgtk_exception_auto_handler(e)
diff --git a/kgtk/cli/import_visual_genome.py b/kgtk/cli/import_visual_genome.py
deleted file mode 100644
index 2aca20f7d..000000000
--- a/kgtk/cli/import_visual_genome.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""
-Import CSV file module
-"""
-
-
-def parser():
-    return {
-        'help': 'Import Visual Genome into KGTK format'
-    }
-
-
-def add_arguments(parser):
-    """
-    Parse arguments
-    Args:
-        parser (argparse.ArgumentParser)
-    """
-    parser.add_argument("-sgf", "--scene-graphs-file", action="store", type=str, dest="scene_graphs_file", required=True, help='scene graphs json file')
-    parser.add_argument("-rgf", "--region-graphs-file", action="store", type=str, dest="region_graphs_file", required=True, help='region graphs json file')
-    parser.add_argument("-asf", "--attribute-synsets-file", action="store", type=str, dest="attr_synsets_file", required=True, help='attribute synsets json file')
-    parser.add_argument("-e", "--header", action="store", type=str, dest="delimiter")
-    parser.add_argument( "-nc", "--node-columns", action="store", nargs="*", dest="NODE_COLS")
-    parser.add_argument("-ec", "--edge-columns", action="store", nargs="*", dest="EDGE_COLS")
-
-def run(filename, delimiter):
-    # import modules locally
-    import socket
-    print(f'Now importing {filename} with a delimiter {delimiter} and header {header}')
diff --git a/mkdocs.yml b/mkdocs.yml
index fed0dc1d5..6cdaacc4e 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -8,6 +8,7 @@ nav:
       - 'filter': 'filter.md'
       - 'generate_wikidata_triples': 'wikidata.md'
       - 'graph_statistics': 'stats.md'
+      - 'import_conceptnet': 'import_conceptnet.md'
       - 'instances': 'instances.md'  
       - 'reachable_nodes': 'rnodes.md'
       - 'remove_columns': 'remove_col.md'

From 6f7ac60784f790f79cab7fea8abd60b2c38669cd Mon Sep 17 00:00:00 2001
From: naren954 <naren954@gmail.com>
Date: Wed, 20 May 2020 00:02:11 -0700
Subject: [PATCH 232/278] Create import_wikidata.md

---
 docs/import_wikidata.md | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 docs/import_wikidata.md

diff --git a/docs/import_wikidata.md b/docs/import_wikidata.md
new file mode 100644
index 000000000..cf5eb6f65
--- /dev/null
+++ b/docs/import_wikidata.md
@@ -0,0 +1,35 @@
+This command will import Wikidata into KGTK format and generates 3 files.
+
+- A nodes file containing all Qnodes and Pnodes in Wikidata
+- An edges file containing all the statements in Wikidata
+- A qualifiers file containing all qualifiers on statements in Wikidata
+
+## Usage
+```
+kgtk import_wikidata OPTIONS
+```
+**OPTIONS**: 
+
+`-i {string}`: The wikidata dump file in bz2 format
+
+`--procs {integer}`: The number of processes to run in parallel. Defualt: 2.
+
+`--node {string}`: The path to the output node file. If not given, nodes will not be written out.
+
+`--edge {string}`: The path to the output edge file. If not given, edges will not be written out.
+
+`--qual {string}`: The path to the output qualifiers file. If not given, qualifiers will not be written out.
+
+`--limit {integer}`: The number of lines of the wikidata dump to import. Defualt: imports whole dump.
+
+`--lang {l1, l2, ...}`: The languages to extract from the wikidata dump, for labels, aliases and descriptions. Default: en
+
+`--deprecated`: Default is not to include deprecated statements. Use this option to include them.
+
+### Examples
+
+Import the entire wikidata dump into kgtk format, extracting english labels, descriptions and aliases.
+
+```
+kgtk import_wikidata -i wikidata-all-20200504.json.bz2 --node nodefile.tsv --edge edgefile.tsv --qual qualfile.tsv 
+```

From 2d1adb3a99852d79c3308f48d265df42c45ff9fd Mon Sep 17 00:00:00 2001
From: naren954 <naren954@gmail.com>
Date: Wed, 20 May 2020 00:09:06 -0700
Subject: [PATCH 233/278] Create import_ntriples.md

---
 docs/import_ntriples.md | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 docs/import_ntriples.md

diff --git a/docs/import_ntriples.md b/docs/import_ntriples.md
new file mode 100644
index 000000000..d0548d2ba
--- /dev/null
+++ b/docs/import_ntriples.md
@@ -0,0 +1,21 @@
+This command will import an ntriple file into KGTK format
+
+## Usage
+```
+kgtk import_ntriples OPTIONS
+```
+**OPTIONS**:
+
+`-i {string}`: The ntriple file that needs to be imported
+
+`-o {string}`: Path to the output KGTK file
+
+`--limit`: The number of lines of the ntriple file to import. Defualt: imports whole file.
+
+### Examples
+
+Import the entire given ntriple file into kgtk format
+
+```
+kgtk import_ntriples -i dbpedia_wikipedia_links.ttl -o DbpediaWikipediaLinks.tsv
+```

From c9c5c0e7d439e77057745678f284bd690f96a341 Mon Sep 17 00:00:00 2001
From: naren954 <naren954@gmail.com>
Date: Wed, 20 May 2020 00:27:59 -0700
Subject: [PATCH 234/278] Create connected_components.md

---
 docs/connected_components.md | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 docs/connected_components.md

diff --git a/docs/connected_components.md b/docs/connected_components.md
new file mode 100644
index 000000000..17d666397
--- /dev/null
+++ b/docs/connected_components.md
@@ -0,0 +1,35 @@
+This command will find the connected components in a KGTK edge file. The output file is an edge file which contains the following columns:
+
+- `node1`: this column contains the nodes in the graph
+- `label`: this column contains only 'connected_component'
+- `node2`: this column contains an integer which represents the component that a node belongs to. Nodes belonging to a connected component will have the same value in this column
+
+## Usage
+```
+kgtk connected_components filename OPTIONS
+```
+***OPTIONS***:
+
+`-o {string}`: Path to the output edge file.
+
+`--noheader`: Option to specify that the input file does not contain a header.
+
+`--subj {integer}`: Column in which the subject is given. Default: 0.
+
+`--pred {integer}`: Column in which the predicate is given. Default: 1.
+
+`--obj {integer}`: Column in which the object is given. Default: 2.
+
+`--props {p1, p2, ...}`: Properties to consider while finding connected components. Default: All properties are considered. 
+
+`--undirected`: Option to specify that input file contains undirected graph.
+
+`--strong`: Option to find strongly connected components (If graph is directed), or to treat graph as undirected and find connected components.
+
+### Examples
+
+Find connected URI's that redirect to the same page
+
+```
+kgtk connected_components Dbpedia_redirects.tsv -o connected_dbpedia_uris.tsv
+```

From 3af9d56ffc3797f397b9adfe863e8e2917ae5ae4 Mon Sep 17 00:00:00 2001
From: naren954 <naren954@gmail.com>
Date: Wed, 20 May 2020 00:42:55 -0700
Subject: [PATCH 235/278] Create reachable_nodes.md

---
 docs/reachable_nodes.md | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 docs/reachable_nodes.md

diff --git a/docs/reachable_nodes.md b/docs/reachable_nodes.md
new file mode 100644
index 000000000..a5df14a74
--- /dev/null
+++ b/docs/reachable_nodes.md
@@ -0,0 +1,40 @@
+This command will find all nodes reachable from given root nodes in a KGTK edge file. The output file is an edge file that contains the following columns:
+
+- `node1`: this column contains a root node
+- `label`: this column contains only 'reachable'
+- `node2`: this column contains node that is reachable from a root node
+
+## Usage
+```
+kgtk reachable_nodes filename OPTIONS
+```
+***OPTIONS***:
+`--root {r1, r2, ...}`: Root nodes to be considered specified as a command line argument.
+
+`--rootfile {string}`: edge file that contains the root nodes in some column.
+
+`--rootfilecolumn`: column of the root file that contains the root nodes. Default: 0.
+
+`--norootheader' Option to specify that file containing root nodes does not contain a header.
+
+`-o {string}`: Path to the output edge file.
+
+`--noheader`: Option to specify that the input file does not contain a header.
+
+`--subj {integer}`: Column in which the subject is given. Default: 0.
+
+`--pred {integer}`: Column in which the predicate is given. Default: 1.
+
+`--obj {integer}`: Column in which the object is given. Default: 2.
+
+`--props {p1, p2, ...}`: Properties to consider while finding reachable nodes. Default: All properties are considered. 
+
+`--undirected`: Option to specify that input file contains undirected graph.
+
+### Examples
+
+Find all the classes that rootnodes are a subclass of (transitive closure). Root nodes are obtained from node2 of P31.tsv (instance of) file. Command is run on P279.tsv (subclass of) file. Generates P279*.tsv. Note that example file P279.tsv contains an initial 'id' column so we need to specify the columns for subject(node1), predicate(label), and object(node2)
+
+```
+kgtk reachable_nodes P279.tsv --subj 1 --pred 2 --obj 3 --rootfile P31.tsv --rootfilecolumn 3 -o P279*.tsv
+```

From 6369ee3190ae0e17ee8084cb838ea85afc34b542 Mon Sep 17 00:00:00 2001
From: naren954 <naren954@gmail.com>
Date: Wed, 20 May 2020 00:47:33 -0700
Subject: [PATCH 236/278] Update reachable_nodes.md

---
 docs/reachable_nodes.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/reachable_nodes.md b/docs/reachable_nodes.md
index a5df14a74..876fc9212 100644
--- a/docs/reachable_nodes.md
+++ b/docs/reachable_nodes.md
@@ -9,6 +9,7 @@ This command will find all nodes reachable from given root nodes in a KGTK edge
 kgtk reachable_nodes filename OPTIONS
 ```
 ***OPTIONS***:
+
 `--root {r1, r2, ...}`: Root nodes to be considered specified as a command line argument.
 
 `--rootfile {string}`: edge file that contains the root nodes in some column.

From f3d2bd3e42da88c741bfbecb66e642fb96b48350 Mon Sep 17 00:00:00 2001
From: naren954 <naren954@gmail.com>
Date: Wed, 20 May 2020 00:48:37 -0700
Subject: [PATCH 237/278] Update reachable_nodes.md

---
 docs/reachable_nodes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/reachable_nodes.md b/docs/reachable_nodes.md
index 876fc9212..ee23701ff 100644
--- a/docs/reachable_nodes.md
+++ b/docs/reachable_nodes.md
@@ -16,7 +16,7 @@ kgtk reachable_nodes filename OPTIONS
 
 `--rootfilecolumn`: column of the root file that contains the root nodes. Default: 0.
 
-`--norootheader' Option to specify that file containing root nodes does not contain a header.
+`--norootheader` Option to specify that file containing root nodes does not contain a header.
 
 `-o {string}`: Path to the output edge file.
 

From 776ed3dd0978df0c2d7f04a4d7c3f56469617fca Mon Sep 17 00:00:00 2001
From: naren954 <naren954@gmail.com>
Date: Wed, 20 May 2020 00:49:55 -0700
Subject: [PATCH 238/278] Update reachable_nodes.md

---
 docs/reachable_nodes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/reachable_nodes.md b/docs/reachable_nodes.md
index ee23701ff..5816b45b8 100644
--- a/docs/reachable_nodes.md
+++ b/docs/reachable_nodes.md
@@ -34,7 +34,7 @@ kgtk reachable_nodes filename OPTIONS
 
 ### Examples
 
-Find all the classes that rootnodes are a subclass of (transitive closure). Root nodes are obtained from node2 of P31.tsv (instance of) file. Command is run on P279.tsv (subclass of) file. Generates P279*.tsv. Note that example file P279.tsv contains an initial 'id' column so we need to specify the columns for subject(node1), predicate(label), and object(node2)
+Find all the classes that given root nodes are a subclass of (transitive closure). Root nodes are obtained from node2 of P31.tsv (instance of) file. Command is run on P279.tsv (subclass of) file. Generates P279*.tsv. Note that example file P279.tsv contains an initial 'id' column so we need to specify the columns for subject(node1), predicate(label), and object(node2)
 
 ```
 kgtk reachable_nodes P279.tsv --subj 1 --pred 2 --obj 3 --rootfile P31.tsv --rootfilecolumn 3 -o P279*.tsv

From 5a74056c080176f9e695f40af02fcb1312b1b49b Mon Sep 17 00:00:00 2001
From: rongpenl <45610532+rongpenl@users.noreply.github.com>
Date: Wed, 20 May 2020 08:35:49 -0700
Subject: [PATCH 239/278] Update generate_wikidata_triples.md

---
 docs/generate_wikidata_triples.md | 43 +++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/docs/generate_wikidata_triples.md b/docs/generate_wikidata_triples.md
index 984db2541..b6d3b05cf 100644
--- a/docs/generate_wikidata_triples.md
+++ b/docs/generate_wikidata_triples.md
@@ -1,6 +1,9 @@
-## The generate_wikidata_triples command converts a kgtk file to a ttl file that can be loaded into a wikidata Blazegraph.
+The `generate_wikidata_triples` command generate triple files from a kgtk files. The generated triple files can be loaded into Blazegraph directly.
+
+The triple generator take a tab-separated kgtk file from standard input. The kgtk file is required to have at least the following 4 fields: `node1`, `property`, `node2` and `id`. The `node1` field is the subject; `property` is the predicate and `node2` is the object. 
+
+The following tsv file is a minimal sample input file.
 
-The triple generator take a tab-separated kgtk file from standard input.
 ```
 node1	property	node2	id
 Q2140726727_mag_author	P6366	2140726727	id1
@@ -8,7 +11,7 @@ Q2140726727_mag_author	label	Zunyou Wu@en	id2
 Q2140726727_mag_author	P1416	Q184490438_mag_affiliation	id3
 Q184490438_mag_affiliation	label	Chinese Center For Disease Control And Prevention@en	id4
 ```
-to an rdf file like this.
+The generated triple file (without prefix) is below. The built-in prefix can be found [here](https://github.com/usc-isi-i2/etk/blob/master/etk/wikidata/__init__.py).
 
 ```
 rdfs:label "Zunyou Wu"@en ;
@@ -21,6 +24,27 @@ wdt:P6366 "2140726727"^^xsd:string .
 
 ```
 
+`generate_wikidata_triples` currently supports qualifiers. Reuse the `id` of a record as next record's `node1`, then this next record will be treated as a qualifier for previous record. For example, the following sample input is legitmate.
+
+```
+node1 property  node2 id
+Q1  P1  Q2	id1
+id1 P2  Q3  id3
+id1 P3  Q4  id4
+Q2  P5  "string"@en id5
+```
+
+However, the following sample input is not legal and will be converted to incorrect triples..
+
+```
+node1 property  node2 id
+Q1  P1  Q2	id1
+id1 P2  Q3  id2
+Q2  P5  "string"@en id3
+id1 P3  Q4  id4
+```
+`generate_wikidata_triples` is **memoryless**, the qualifers has to follow the statement **immediately**. In the example above, the `id1` (in column `node1`) in 5th line will be treated as a new subject rather than an id of previous statement. Users should sort the kgtk file in a way such that qualifiers follow corresponding statement immediately. This can be done by creating meaningful ids.
+
 
 ## Required Option
 
@@ -58,7 +82,14 @@ P500	property_type	item
 P501	property_type	item
 P502	property_type	string
 ```
-The header line is necessary. If property *P493* is used in the input kgtk file, then the edge `P493	property_type	external-identifier` must exists in the `example_prop.tsv` to tell triple generator that the object of `P493` is an external-identifier. If `p495` is used in the input kgtk file, then the object of `P495` will be treated as an entity.
+
+The header line is necessary. If property *P493* is used in the input kgtk file, then the edge `P493	property_type	external-identifier` must exists in the `example_prop.tsv` to tell triple generator that the object of `P493` is an `external-identifier`. On another hand If `p495` is used in the input kgtk file, then the object of `P495` will be treated as an entity.
+
+User can also define properties in the input kgtk file with the following syntax. The `data_type` syntax indicates a new property is defined. Note that any usage of `P20200101` must appear after the definition in the kgtk file or `P20200101` will be incorrectly treated as `item`.
+
+```
+P20200101 data_type string
+```
 
 ### label, aliases and descriptions
 
@@ -103,7 +134,7 @@ kgtk generate_wikidata_triples -pf example_prop.tsv < input_file.tsv > output_fi
 
 ```
 
-### Run in parallel
+### Parallel Usage
 
 You can split the input files into several smaller pieces and run the command simultaneuously. 
 
@@ -112,3 +143,5 @@ Let's say you are in a directory which contains the `tsv` files. The following c
 ```bash
 ls *tsv | parallel -j+0 --eta 'kgtk generate_wikidata_triples -pf example_props.tsv -n 1000 -ig no --debug -gt yes < {} > {.}.ttl'
 ```
+
+Attention: split a large tsv file into small tsv files directly may make qualifier edges statementless. The header `node1 property  node2 id` needs to be inserted at the beginning of splited files as well.

From 09db980f257484424f943b51aaf75a72522af8fc Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Wed, 20 May 2020 08:58:42 -0700
Subject: [PATCH 240/278] updated most recent docs

---
 docs/embedding.md                 |   1 +
 docs/generate_wikidata_triples.md |   2 +-
 docs/reachable_nodes.md           |   9 +-
 docs/rnodes.md                    |  29 -----
 docs/wikidata.md                  | 192 ------------------------------
 mkdocs.yml                        |   7 +-
 6 files changed, 14 insertions(+), 226 deletions(-)
 delete mode 100644 docs/rnodes.md
 delete mode 100644 docs/wikidata.md

diff --git a/docs/embedding.md b/docs/embedding.md
index 95aefb80d..c9c9fe3f9 100644
--- a/docs/embedding.md
+++ b/docs/embedding.md
@@ -1,6 +1,7 @@
 Computes embeddings of nodes using properties of nodes. The values are concatenated into sentences defined by a template, and embedded using a pre-trained language model.
 
 The following language models are supported:
+
 - bert-base-nli-cls-token
 - bert-base-nli-max-tokens
 - bert-base-nli-mean-tokens
diff --git a/docs/generate_wikidata_triples.md b/docs/generate_wikidata_triples.md
index b6d3b05cf..e4eb68085 100644
--- a/docs/generate_wikidata_triples.md
+++ b/docs/generate_wikidata_triples.md
@@ -1,4 +1,4 @@
-The `generate_wikidata_triples` command generate triple files from a kgtk files. The generated triple files can be loaded into Blazegraph directly.
+The `generate_wikidata_triples` command generates triple files from a kgtk files. The generated triple files can be loaded into Blazegraph directly.
 
 The triple generator take a tab-separated kgtk file from standard input. The kgtk file is required to have at least the following 4 fields: `node1`, `property`, `node2` and `id`. The `node1` field is the subject; `property` is the predicate and `node2` is the object. 
 
diff --git a/docs/reachable_nodes.md b/docs/reachable_nodes.md
index 5816b45b8..e0231aeaa 100644
--- a/docs/reachable_nodes.md
+++ b/docs/reachable_nodes.md
@@ -1,4 +1,6 @@
-This command will find all nodes reachable from given root nodes in a KGTK edge file. The output file is an edge file that contains the following columns:
+This command will find all nodes reachable from given root nodes in a KGTK edge file. That is, given a set of nodes N and a set of properties P, this command computes the set of nodes R that can be reached from N via paths containing any of the properties in P.
+
+The output file is an edge file that contains the following columns:
 
 - `node1`: this column contains a root node
 - `label`: this column contains only 'reachable'
@@ -34,7 +36,10 @@ kgtk reachable_nodes filename OPTIONS
 
 ### Examples
 
-Find all the classes that given root nodes are a subclass of (transitive closure). Root nodes are obtained from node2 of P31.tsv (instance of) file. Command is run on P279.tsv (subclass of) file. Generates P279*.tsv. Note that example file P279.tsv contains an initial 'id' column so we need to specify the columns for subject(node1), predicate(label), and object(node2)
+Find all the classes that given root nodes are a subclass of (transitive closure). Root nodes are obtained from node2 of P31.tsv (instance of) file. Command is run on P279.tsv (subclass of) file. Generates P279*.tsv. 
+
+!!! info
+    Note that example file P279.tsv contains an initial 'id' column so we need to specify the columns for subject(node1), predicate(label), and object(node2)
 
 ```
 kgtk reachable_nodes P279.tsv --subj 1 --pred 2 --obj 3 --rootfile P31.tsv --rootfilecolumn 3 -o P279*.tsv
diff --git a/docs/rnodes.md b/docs/rnodes.md
deleted file mode 100644
index 85a0e4a66..000000000
--- a/docs/rnodes.md
+++ /dev/null
@@ -1,29 +0,0 @@
-Given a set of nodes N and a set of properties P, this command computes the set of nodes R that can be reached from N via paths containing any of the properties in P.
-
-The output is an edge file with three columns:
-- subject: a node in the input set N
-- property: reachable, or whatever property is provided as the closure property
-- object: a node reachable from N via the input properties
-
-## Usage:
-```
-kgtk reachable_nodes OPTIONS
-Options:
---root {n1, n2, …}: the starting nodes
---property {p1, p2, …}: properties to traverse to compute closure. A property preceded with a minus sign (e.g., -p1) must be followed in the reverse direction.
---output-property {p}: the name of the property to represent the closure, default reachable.
---output-object: when supplied, the output will contain only the distinct values in the third column (object).
-```
-
-## Examples
-
-All nodes reachable vi p1 and p2 starting from n1, n2, n3
-```
-kgtk reachable_nodes --property p1,p2 --root n1,n2,n3
-```
-
-Closure of the subclass property starting from every node in roots.tsv
-```
-kgtk reachable_nodes --property kgtk:subclass-of <(cat roots.tsv)
-```
-
diff --git a/docs/wikidata.md b/docs/wikidata.md
deleted file mode 100644
index 3692ad6fb..000000000
--- a/docs/wikidata.md
+++ /dev/null
@@ -1,192 +0,0 @@
-This command will generate wikidata triples from two edge files:
-
-- A statement and qualifier edge file that contains an edge id, node1, label, and node2
-- A kgtk file that contains the mapping information from property identifier to its datatype
-
-## Usage
-
-```bash
-KGTK generate_wikidata_triples OPTIONS
-```
-**OPTIONS**:
-
-`--uri-prefix {string}`: in this version we will use the wikidata prefixes. In a future version we will allow the user to specify a prefix.
-
-`--label-property {p1, p2, ...}`: the properties that will produce Wikidata labels
-
-`--alias-property {p1, p2, …}`: the properties that will produce Wikidata aliases
-
-`--description-property {p1, p2, …}`: the properties that will produce Wikidata descriptions
-
-`--property-types {file}`: a file that provides the type of each property present in the edge file 
-
-`--generate-truthy`: the default is to not generate truthy triples. Specify this option to generate truthy triples (future version)
-
-`--ignore {yes|no}` :  if set to yes, ignore various kinds of exceptions and mistakes and log them to a log file with line number in input file, rather than stopping.
-
-`--output-n-lines: {int}`: output triples approximately every N lines of reading stdin. Because of the calling of ETK API, it is inefficient or impossible (consider potential statementless qualifier edge) to pipe after reading/processing every line. Set this number to improve the efficiency and the algorithm will make sure there is no stateless qualifier. Note that different n may give slightly different output if label edge, description edge and alias edges happen to be splitted.  
-
-`--generate-truthy -gt {yes|no}` If Set to true, generate the truthy statements
-
-`--line-by-line -lbl {yes|no}`
-
-`--use-gz -gz {yes|no}`
-
-
-## Properties File
-The properties file is an edge file with the following format:
-```
-node1   label           node2
-P1      property_type   item
-P2      property_type   quantity
-```
-
-The type of a property is called datatype in the json dump. 
-
-The code supports several data type: External identifier and URLValue. Currently the code support 8 property types:
-
-1- Item
-2- Quantity
-3- Globe-coordinate
-4- Time
-5- Monolingualtext
-6- Url
-7- External identifier
-8- String
-
-Note: for now, the prop_types.tsv must use the following node2 column values to specify the property type. For example, globe_coordinate will be illegal. It must be globe-coordinate.
-
-```
-node1   label               node2
-P1      property_type       item
-P2      property_type       quantity
-P3      property_type       globe-coordinate
-P4      property_type       time
-P5      property_type       monolingualtext
-P6      property_type       string
-P7      property_type       url
-P8      property_type       external-identifier
-```
-
-In ETK the possible property types are defined in 
-```
-class Datatype(Enum):
-    Item = Item
-    Property = Property
-    ExternalIdentifier = ExternalIdentifier
-    QuantityValue = QuantityValue
-    TimeValue = TimeValue
-    StringValue = StringValue
-    URLValue = URLValue
-    GlobeCoordinate = GlobeCoordinate
-    MonolingualText = MonolingualText
-```
-
-## Handling Different Types Of Edges
-
-### Label Properties
-This case applies to all edge labels in the label-property option. For example:
-```
-Q123     label     ‘Hello’@en
-```
-
-Expected output that ETK should generate:
-```
- wd:Q123 rdfs:label     "Hello"@en .
- wd:Q123 skos:prefLabel "Hello"@en .
- wd:Q123 schema:name    "Hello"@en .
-```
-
-Note: should check that there is a single label statement for each language.
-
-
-### Alias Properties
-This case applies to all edge labels in the alias-property option. For example:
-```
-Q123    alias   ‘Howdy’@en
-Q123    alias   ‘Hola’@sp
-```
-Expected output that ETK should generate:
-
-```
- wd:Q123 skos:altLabel "Howdy"@en .
- wd:Q123 skos:altLabel "Hola"@sp .
-```
-
-### Description Properties
-This case applies to all edge labels in the description-property option. For example:
-
-```
-Q123    description     ‘A form of salutation’@en
-Q123    description     ‘Saludo’@sp
-```
-
-Expected output that ETK should generate:
-
-```
- wd:Q123 schema:description "A form of salutation"@en .
- wd:Q123 schema:description "Saludo"@sp .
-```
-
-### Property Declarations
-This case applies to edges whose type is property. For example:
-
-Obtain additional property_type information from property_types.tsv to enhance the output.
-```
-P22     type    property
-```
-
-Expected output that ETK should generate:
-
-```
-  wd:P22 a wikibase:Property ;
-     wikibase:directClaim wdt:P22 ;
-     wikibase:claim p:P22 ;
-     wikibase:statementProperty ps:P22 ;
-     wikibase:statementValue psv:P22 ;
-     wikibase:qualifier pq:P22 ;
-     wikibase:qualifierValue pqv:P22 ;
-     wikibase:reference pr:P22 ;
-     wikibase:referenceValue prv:P22 ;
-     wikibase:novalue wdno:P22 .
-```
-
-## Regular Edges
-This case applies to edges not covered by the previous cases. For example:
-```
-Q3  P2  Q123
-Q3  P7  123[-1.0,+1.0]
-Q3  P7  89
-```
-
-Expected output that ETK should generate. The highlighted parts are created when the truthy option is set to “yes”.
-```
-  wd:Q3 
-     wdt:P7 "value1", "value2" ;
-     wdt:P2 wd:Q3 ;
-     p:P2 wds:Q3-4cc1f2d1-490e-c9c7-4560-46c3cce05bb7 ;
-     p:P7 wds:Q3-24bf3704-4c5d-083a-9b59-1881f82b6b37,
-          wds:Q3-45abf5ca-4ebf-eb52-ca26-811152eb067c .
-```
-
-The triples generated for each statement depend on the property type of the property. ETK has an API to handle all the different types of property values. Here are some examples taken from the RDF Dump Format page in the Wikidata documentation.
-
-```
-wds:Q3-24bf3704-4c5d-083a-9b59-1881f82b6b37 a wikibase:Statement, wikibase:BestRank ;
-     ps:P7 "123"^^xsd:decimal ;
-     psv:P7 wdv:382603eaa501e15688076291fc47ae54 ;
-     psn:P7 wdv:85374998f22bda54efb44a5617d76e51 .
-
- wdv:382603eaa501e15688076291fc47ae54 a wikibase:QuantityValue ;
-     wikibase:quantityAmount "+123"^^xsd:decimal ;
-     wikibase:quantityUpperBound "+124"^^xsd:decimal ;
-     wikibase:quantityLowerBound "+122"^^xsd:decimal ;
-     wikibase:quantityUnit <http://www.wikidata.org/entity/Q218593> ;
-     wikibase:quantityNormalized wdv:85374998f22bda54efb44a5617d76e51.
-```
-
-
-
-
-
-
diff --git a/mkdocs.yml b/mkdocs.yml
index 6cdaacc4e..30d09ee1f 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -4,13 +4,16 @@ nav:
   - Getting started: install.md
   - KGTK file specification: specification.md
   - 'KGTK commands':
+      - 'connected_components': 'connected_components.md'
       - 'export_gt': 'export_gt.md'
       - 'filter': 'filter.md'
-      - 'generate_wikidata_triples': 'wikidata.md'
+      - 'generate_wikidata_triples': 'generate_wikidata_triples.md'
       - 'graph_statistics': 'stats.md'
       - 'import_conceptnet': 'import_conceptnet.md'
+      - 'import_ntriples': 'import_ntriples.md'
+      - 'import_wikidata': 'import_wikidata.md'
       - 'instances': 'instances.md'  
-      - 'reachable_nodes': 'rnodes.md'
+      - 'reachable_nodes': 'reachable_nodes.md'
       - 'remove_columns': 'remove_col.md'
       - 'sort': 'sort.md'
       - 'text_embeddings': 'embedding.md'

From 1897aec542e5db344ef2c7494bd15d14893a45b9 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 11:38:22 -0700
Subject: [PATCH 241/278] Remove unused helper function.

---
 kgtk/cli/ifempty.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/kgtk/cli/ifempty.py b/kgtk/cli/ifempty.py
index 29d28e7d0..69dd3be9c 100644
--- a/kgtk/cli/ifempty.py
+++ b/kgtk/cli/ifempty.py
@@ -33,15 +33,6 @@ def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Names
 
     _expert: bool = parsed_shared_args._expert
 
-    # This helper function makes it easy to suppress options from
-    # The help message.  The options are still there, and initialize
-    # what they need to initialize.
-    def h(msg: str)->str:
-        if _expert:
-            return msg
-        else:
-            return SUPPRESS
-
     parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
 
     parser.add_argument(      "--columns", dest="filter_column_names",

From acf5fda9731eeef0df56f7df325f58da02b02c7e Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 11:38:44 -0700
Subject: [PATCH 242/278] Remove unused helper function.

---
 kgtk/cli/ifnotempty.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/kgtk/cli/ifnotempty.py b/kgtk/cli/ifnotempty.py
index c10dd3d01..045d0ecba 100644
--- a/kgtk/cli/ifnotempty.py
+++ b/kgtk/cli/ifnotempty.py
@@ -33,15 +33,6 @@ def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Names
 
     _expert: bool = parsed_shared_args._expert
 
-    # This helper function makes it easy to suppress options from
-    # The help message.  The options are still there, and initialize
-    # what they need to initialize.
-    def h(msg: str)->str:
-        if _expert:
-            return msg
-        else:
-            return SUPPRESS
-
     parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
 
     parser.add_argument(      "--columns", dest="filter_column_names",

From f7965c9f4fb4bb96d11d19febbaed5ab4900be16 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 11:39:18 -0700
Subject: [PATCH 243/278] Note work to do.

---
 kgtk/join/ifempty.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kgtk/join/ifempty.py b/kgtk/join/ifempty.py
index f55facb76..9949f8811 100644
--- a/kgtk/join/ifempty.py
+++ b/kgtk/join/ifempty.py
@@ -6,9 +6,7 @@
 
 from argparse import ArgumentParser, Namespace
 import attr
-import gzip
 from pathlib import Path
-from multiprocessing import Queue
 import sys
 import typing
 
@@ -167,6 +165,7 @@ def main():
 
    # Show the final option structures for debugging and documentation.                                                                                             
     if args.show_options:
+        # TODO: show ifempty-specific options.
         reader_options.show(out=error_file)
         value_options.show(out=error_file)
 

From 3ee84120e549f8534755222110c776c07ca23782 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 11:39:48 -0700
Subject: [PATCH 244/278] Add additional options to --show-options.

---
 kgtk/join/ifexists.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/kgtk/join/ifexists.py b/kgtk/join/ifexists.py
index eee03959b..1baa95b56 100644
--- a/kgtk/join/ifexists.py
+++ b/kgtk/join/ifexists.py
@@ -18,9 +18,7 @@
 
 from argparse import ArgumentParser, Namespace
 import attr
-import gzip
 from pathlib import Path
-from multiprocessing import Queue
 import sys
 import typing
 
@@ -236,6 +234,15 @@ def main():
 
    # Show the final option structures for debugging and documentation.                                                                                             
     if args.show_options:
+        print("input: %s" % (str(args.input_file_path) if args.input_file_path is not None else "-"), file=error_file)
+        print("--filter-on=%s" % str(args.filter_file_path), file=error_file)
+        print("--output-file=%s" % str(args.output_file_path), file=error_file)
+        print("--field-separator=%s" % repr(args.field_separator), file=error_file)
+        print("--invert=%s" % str(args.invert), file=error_file)
+        if args.input_keys is not None:
+            print("--input-keys %s" % " ".join(args.input_keys), file=error_file)
+        if args.filter_keys is not None:
+            print("--filter-keys %s" % " ".join(args.filter_keys), file=error_file)
         input_reader_options.show(out=error_file, who="input")
         filter_reader_options.show(out=error_file, who="filter")
         value_options.show(out=error_file)

From 14546a573808eeca5d416eb776982b94ea27d1f1 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 11:42:47 -0700
Subject: [PATCH 245/278] Count unique values in a column, producing a KGTK
 edge file.

---
 kgtk/cli/unique.py  | 108 +++++++++++++++++++++++++++++++
 kgtk/join/unique.py | 152 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 260 insertions(+)
 create mode 100644 kgtk/cli/unique.py
 create mode 100644 kgtk/join/unique.py

diff --git a/kgtk/cli/unique.py b/kgtk/cli/unique.py
new file mode 100644
index 000000000..81afc648c
--- /dev/null
+++ b/kgtk/cli/unique.py
@@ -0,0 +1,108 @@
+"""Filter a KGTK file based on whether one or more records exist in a second
+KGTK file with matching values for one or more fields.
+
+TODO: Need KgtkWriterOptions
+"""
+
+from argparse import Namespace, SUPPRESS
+from pathlib import Path
+import sys
+import typing
+
+from kgtk.cli_argparse import KGTKArgumentParser
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
+from kgtk.io.kgtkwriter import KgtkWriter
+from kgtk.join.unique import Unique
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
+
+def parser():
+    return {
+        'help': 'Count unique values',
+        'description': 'Count the unique value in a column in a KGTK file. Write the unique values and counts as a new KGTK edge file.'
+    }
+
+
+def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace):
+    """
+    Parse arguments
+    Args:
+        parser (argparse.ArgumentParser)
+    """
+
+    _expert: bool = parsed_shared_args._expert
+
+    parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
+
+    parser.add_argument(      "--column", dest="column_name",
+                              help="The column to count unique values (required).", required=True)
+
+    parser.add_argument(      "--empty", dest="empty_value", help="A value to substitute for empty values (default=%(default)s).", default="")
+
+    parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write (required).", type=Path, default=None)
+
+    parser.add_argument(      "--label", dest="label_value", help="The output file label column value (default=%(default)s).", default="count")
+
+    KgtkReader.add_debug_arguments(parser, expert=_expert)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
+    KgtkValueOptions.add_arguments(parser, expert=_expert)
+
+def run(input_kgtk_file: typing.Optional[Path],
+        output_kgtk_file: typing.Optional[Path],
+
+        column_name: str,
+        empty_value: str = "",
+        label_value: str = "count",
+
+        errors_to_stdout: bool = False,
+        errors_to_stderr: bool = True,
+        show_options: bool = False,
+        verbose: bool = False,
+        very_verbose: bool = False,
+
+        **kwargs # Whatever KgtkFileOptions and KgtkValueOptions want.
+)->int:
+    # import modules locally
+    from kgtk.exceptions import KGTKException
+
+
+    # Select where to send error messages, defaulting to stderr.
+    error_file: typing.TextIO = sys.stdout if errors_to_stdout else sys.stderr
+
+    # Build the option structures.
+    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_dict(kwargs)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_dict(kwargs)
+
+    # Show the final option structures for debugging and documentation.
+    if show_options:
+        print("input: %s" % (str(input_kgtk_file) if input_kgtk_file is not None else "-"), file=error_file)
+        print("--output-file=%s" % (str(output_kgtk_file) if output_kgtk_file is not None else "-"), file=error_file)
+        print("--column=%s" % str(column_name), file=error_file)
+        print("--empty=%s" % str(empty_value), file=error_file)
+        print("--label=%s" % str(label_value), file=error_file)
+        reader_options.show(out=error_file)
+        value_options.show(out=error_file)
+        print("=======", file=error_file, flush=True)
+
+    try:
+        uniq: Unique = Unique(
+            input_file_path=input_kgtk_file,
+            output_file_path=output_kgtk_file,
+            column_name=column_name,
+            label_value=label_value,
+            empty_value=empty_value,
+            reader_options=reader_options,
+            value_options=value_options,
+            error_file=error_file,
+            verbose=verbose,
+            very_verbose=very_verbose,
+        )
+        
+        uniq.process()
+
+        return 0
+
+    except SystemExit as e:
+        raise KGTKException("Exit requested")
+    except Exception as e:
+        raise KGTKException(str(e))
+
diff --git a/kgtk/join/unique.py b/kgtk/join/unique.py
new file mode 100644
index 000000000..19f94e9e4
--- /dev/null
+++ b/kgtk/join/unique.py
@@ -0,0 +1,152 @@
+"""
+Count the unique values in a column in an KGTK file.
+Generate an output KGTK edge file with the counts.
+Empty values are omitted from the output KGTK edge file
+unless a non-empty substitute value is provided.
+
+"""
+
+from argparse import ArgumentParser, Namespace
+import attr
+from pathlib import Path
+import sys
+import typing
+
+from kgtk.kgtkformat import KgtkFormat
+from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
+from kgtk.io.kgtkwriter import KgtkWriter
+from kgtk.utils.argparsehelpers import optional_bool
+from kgtk.value.kgtkvalueoptions import KgtkValueOptions
+
+@attr.s(slots=True, frozen=True)
+class Unique(KgtkFormat):
+    input_file_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
+
+    column_name: str = attr.ib(validator=attr.validators.instance_of(str))
+
+    output_file_path: typing.Optional[Path] = attr.ib(validator=attr.validators.optional(attr.validators.instance_of(Path)))
+
+    empty_value: str = attr.ib(validator=attr.validators.instance_of(str), default="")
+
+    label_value: str = attr.ib(validator=attr.validators.instance_of(str), default="count")
+
+    # TODO: find working validators
+    # value_options: typing.Optional[KgtkValueOptions] = attr.ib(attr.validators.optional(attr.validators.instance_of(KgtkValueOptions)), default=None)
+    reader_options: typing.Optional[KgtkReaderOptions]= attr.ib(default=None)
+    value_options: typing.Optional[KgtkValueOptions] = attr.ib(default=None)
+
+    error_file: typing.TextIO = attr.ib(default=sys.stderr)
+    verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+    very_verbose: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+
+    def process(self):
+        # Open the input file.
+        if self.verbose:
+            if self.input_file_path is not None:
+                print("Opening the input file: %s" % self.input_file_path, file=self.error_file, flush=True)
+            else:
+                print("Reading the input data from stdin", file=self.error_file, flush=True)
+
+        kr: KgtkReader =  KgtkReader.open(self.input_file_path,
+                                          error_file=self.error_file,
+                                          options=self.reader_options,
+                                          value_options = self.value_options,
+                                          verbose=self.verbose,
+                                          very_verbose=self.very_verbose,
+        )
+
+        if self.column_name not in kr.column_name_map:
+            raise ValueError("Column %s is not in the input file" % (self.column_name))
+        column_idx: int = kr.column_name_map[self.column_name]
+
+        if self.verbose:
+            print("Opening the output file: %s" % self.output_file_path, file=self.error_file, flush=True)
+        ew: KgtkWriter = KgtkWriter.open(["node1", "label", "node2"],
+                                         self.output_file_path,
+                                         require_all_columns=False,
+                                         prohibit_extra_columns=True,
+                                         fill_missing_columns=True,
+                                         gzip_in_parallel=False,
+                                         verbose=self.verbose,
+                                         very_verbose=self.very_verbose)        
+
+        if self.verbose:
+            print("Counting unique values from the %s column in %s" % (self.column_name, self.input_file_path), file=self.error_file, flush=True)
+        input_line_count: int = 0
+
+        value_counts: typing.MutableMapping[str, int] = { }
+        
+        row: typing.list[str]
+        for row in kr:
+            input_line_count += 1
+            value: str = row[column_idx]
+            if len(value) == 0:
+                value = self.empty_value
+            if len(value) > 0:
+                value_counts[value] = value_counts.get(value, 0) + 1
+                
+        for value in sorted(value_counts.keys()):
+            ew.write([value, self.label_value, str(value_counts[value])])
+
+        if self.verbose:
+            print("Read %d records, found %d unique non-empty values, %d empty values." % (input_line_count,
+                                                                                           len(value_counts),
+                                                                                           input_line_count - len(value_counts)),
+                  file=self.error_file, flush=True)
+
+        ew.close()
+       
+def main():
+    """
+    Test the KGTK unique processor.
+    """
+    parser: ArgumentParser = ArgumentParser()
+
+    parser.add_argument(dest="input_file_path", help="The KGTK file with the input data", type=Path, nargs="?")
+
+    parser.add_argument(      "--column", dest="column_name", help="The column to count unique values (required).", required=True)
+
+    parser.add_argument(      "--empty", dest="empty_value", help="A value to substitute for empty values (default=%(default)s).", default="")
+
+    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-")
+
+    parser.add_argument(      "--label", dest="label_value", help="The output file label column value (default=%(default)s).", default="count")
+
+    KgtkReader.add_debug_arguments(parser)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True)
+    KgtkValueOptions.add_arguments(parser)
+
+    args: Namespace = parser.parse_args()
+
+    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr
+
+    # Build the option structures.                                                                                                                          
+    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
+    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)
+
+   # Show the final option structures for debugging and documentation.                                                                                             
+    if args.show_options:
+        print("input: %s" % (str(args.input_file_path) if args.input_file_path is not None else "-"), file=error_file)
+        print("--column=%s" % args.column_name, file=error_file)
+        print("--empty=%s" % args.empty_value, file=error_file)
+        print("--output-file=%s" % str(args.output_file_path), file=error_file)
+        print("--label=%s" % args.label_value, file=error_file)
+        reader_options.show(out=error_file)
+        value_options.show(out=error_file)
+
+    uniq: Unique = Unique(
+        input_file_path=args.input_file_path,
+        column_name=args.column_name,
+        output_file_path=args.output_file_path,
+        label_value=args.label_value,
+        empty_value=args.empty_value,
+        reader_options=reader_options,
+        value_options=value_options,
+        error_file=error_file,
+        verbose=args.verbose,
+        very_verbose=args.very_verbose)
+
+    uniq.process()
+
+if __name__ == "__main__":
+    main()

From 16ef28bbb0442b655ccc8a2e87efc7426d489a55 Mon Sep 17 00:00:00 2001
From: Filip Ilievski <filip.dbrsk@gmail.com>
Date: Wed, 20 May 2020 12:10:46 -0700
Subject: [PATCH 246/278] quicker implementation of Conceptnet importer

---
 docs/import_conceptnet.md     |  3 +-
 examples/download_cn.sh       |  2 --
 kgtk/cli/import_conceptnet.py | 63 +++++++++++++++++------------------
 3 files changed, 31 insertions(+), 37 deletions(-)
 delete mode 100644 examples/download_cn.sh

diff --git a/docs/import_conceptnet.md b/docs/import_conceptnet.md
index b997b5ca9..4957932db 100644
--- a/docs/import_conceptnet.md
+++ b/docs/import_conceptnet.md
@@ -14,12 +14,11 @@ optional arguments:
 ```
   -h, --help      show this help message and exit
   --english_only  Only english conceptnet?
-  --sort          Should we sort the file on s-p-o?
 ```
 
 ## Examples
 
-Import the English part of ConceptNet into KGTK, without sorting it. 
+Import the English part of ConceptNet into KGTK. 
 
 ```
 kgtk import_conceptnet --english_only examples/conceptnet-assertions-5.7.0.csv
diff --git a/examples/download_cn.sh b/examples/download_cn.sh
deleted file mode 100644
index c7c0e7504..000000000
--- a/examples/download_cn.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-wget https://s3.amazonaws.com/conceptnet/downloads/2019/edges/conceptnet-assertions-5.7.0.csv.gz
-gunzip conceptnet-assertions-5.7.0.csv.gz
diff --git a/kgtk/cli/import_conceptnet.py b/kgtk/cli/import_conceptnet.py
index 3df2acbcd..a0a05a10a 100644
--- a/kgtk/cli/import_conceptnet.py
+++ b/kgtk/cli/import_conceptnet.py
@@ -12,39 +12,36 @@ def parser():
 
 
 def add_arguments(parser):
-	"""
-	Parse arguments
-	Args:
-		parser (argparse.ArgumentParser)
-	"""
-	# '$label == "/r/DefinedAs" && $node2=="/c/en/number_zero"'
-	parser.add_argument(action="store", type=str, dest="filename", metavar='filename', help='filename here')
-	parser.add_argument('--english_only', action="store_true", help="Only english conceptnet?")
-	parser.add_argument('--sort', action="store_true", help="Should we sort the file on s-p-o?")
+    """
+    Parse arguments
+    Args:
+            parser (argparse.ArgumentParser)
+    """
+    # '$label == "/r/DefinedAs" && $node2=="/c/en/number_zero"'
+    parser.add_argument(action="store", type=str, dest="filename", metavar='filename', help='filename here')
+    parser.add_argument('--english_only', action="store_true", help="Only english conceptnet?")
+    parser.add_argument('--sort', action="store_true", help="Should we sort the file on s-p-o?")
 
 
 def run(filename, english_only, sort):
-	# import modules locally
-	import sys # type: ignore
-	from kgtk.exceptions import kgtk_exception_auto_handler
-	import pandas as pd
-
-	try:
-		df=pd.read_csv(filename, sep='\t', header=None)
-		df.columns=['assertion','rel','subj','obj','metadata']
-		df.drop(columns=['assertion', 'metadata'], inplace=True)
-		df=df[['subj', 'rel', 'obj']]
-		df.columns=['node1', 'label', 'node2']
-
-		if sort:
-			df=df.sort_values(by=['node1', 'label','node2'])
-
-		# writing of the output
-		sys.stdout.write('node1\tlabel\tnode2\n')
-		for i, row in df.iterrows():
-			if not english_only or (row[0].startswith('/c/en/') and row[2].startswith('/c/en/')):
-				sys.stdout.write('%s\n' % '\t'.join(row))
-
-
-	except Exception as e:
-		kgtk_exception_auto_handler(e)
+    # import modules locally
+    import sys # type: ignore
+    from kgtk.exceptions import kgtk_exception_auto_handler
+    import csv
+
+    def row_to_edge(row):
+        return '\t'.join(row) + '\n'
+
+    try:
+        in_columns=['assertion','rel','subj','obj','metadata']
+        out_columns=['node1', 'label', 'node2']
+        with open(filename, 'r') as f:
+            reader = csv.reader(f, delimiter='\t', quotechar='"')
+            sys.stdout.write(row_to_edge(out_columns))
+            for row in reader:
+                new_row=[row[2], row[1], row[3]]
+                if not english_only or (new_row[0].startswith('/c/en/') and new_row[2].startswith('/c/en/')):
+                    sys.stdout.write(row_to_edge(new_row))
+
+    except Exception as e:
+            kgtk_exception_auto_handler(e)

From a915f701f69e866b9415025e59a56e68f1c83b4e Mon Sep 17 00:00:00 2001
From: rongpenl <45610532+rongpenl@users.noreply.github.com>
Date: Wed, 20 May 2020 13:39:41 -0700
Subject: [PATCH 247/278] Update generate_wikidata_triples.md

Merge the content of old document with the new one.
---
 docs/generate_wikidata_triples.md | 96 +++++++++++++++++++++++--------
 1 file changed, 72 insertions(+), 24 deletions(-)

diff --git a/docs/generate_wikidata_triples.md b/docs/generate_wikidata_triples.md
index e4eb68085..940b229d9 100644
--- a/docs/generate_wikidata_triples.md
+++ b/docs/generate_wikidata_triples.md
@@ -2,7 +2,17 @@ The `generate_wikidata_triples` command generates triple files from a kgtk files
 
 The triple generator take a tab-separated kgtk file from standard input. The kgtk file is required to have at least the following 4 fields: `node1`, `property`, `node2` and `id`. The `node1` field is the subject; `property` is the predicate and `node2` is the object. 
 
-The following tsv file is a minimal sample input file.
+## Usage
+```{shell}
+cat input.tsv > kgtk generate_wikidata_triples OPTIONS > output.ttl
+```
+or 
+```
+kgtk generate_wikidata_triples OPTIONS < input.tsv > output.ttl
+```
+
+
+The following tsv file is a minimal sample `input.tsv` file.
 
 ```
 node1	property	node2	id
@@ -24,7 +34,7 @@ wdt:P6366 "2140726727"^^xsd:string .
 
 ```
 
-`generate_wikidata_triples` currently supports qualifiers. Reuse the `id` of a record as next record's `node1`, then this next record will be treated as a qualifier for previous record. For example, the following sample input is legitmate.
+`generate_wikidata_triples` currently supports qualifiers. Reuse the `id` of an edge as next edge's `node1`, then this next edge will be treated as a qualifier for previous edge. For example, the following sample input is legitmate.
 
 ```
 node1 property  node2 id
@@ -45,12 +55,13 @@ id1 P3  Q4  id4
 ```
 `generate_wikidata_triples` is **memoryless**, the qualifers has to follow the statement **immediately**. In the example above, the `id1` (in column `node1`) in 5th line will be treated as a new subject rather than an id of previous statement. Users should sort the kgtk file in a way such that qualifiers follow corresponding statement immediately. This can be done by creating meaningful ids.
 
+## Options
 
-## Required Option
+### Required Option
 
-- `--pf --property-types {path}`: path to the file which contains the property datatype mapping in kgtk format.
+- `--pf --property-types {path}`: path to the **property file** which contains the property datatype mapping in kgtk format.
 
-## Optional Options
+### Optional Options
 
 - `-lp --label-property {str}`: property identifiers which will create labels, separated by comma','. Default to **label**.
 - `-ap --alias-property {str}`: alias identifiers which will create labels, separated by comma','. Default to **aliases**.
@@ -62,11 +73,11 @@ id1 P3  Q4  id4
 - `-sid --use-id {bool}`: if set to yes, the id in the edge will be used as statement id when creating statement or truthy statement. Default to **False**
 
 
-## Shared Options
+### Shared Options
 
 - `--debug` run the command in debug mode.
 
-### property-types
+## Explanation of Options
 
 **--property-types** is the most important input file. It is also a kgtk file. Here is an example file `example_prop.tsv`
 
@@ -85,23 +96,18 @@ P502	property_type	string
 
 The header line is necessary. If property *P493* is used in the input kgtk file, then the edge `P493	property_type	external-identifier` must exists in the `example_prop.tsv` to tell triple generator that the object of `P493` is an `external-identifier`. On another hand If `p495` is used in the input kgtk file, then the object of `P495` will be treated as an entity.
 
-User can also define properties in the input kgtk file with the following syntax. The `data_type` syntax indicates a new property is defined. Note that any usage of `P20200101` must appear after the definition in the kgtk file or `P20200101` will be incorrectly treated as `item`.
-
-```
-P20200101 data_type string
-```
-
-### label, aliases and descriptions
-
-**-lp**, **-ap**, **-dp** defines how you want the triple generator to identify the label, description and aliases. 
+Currently the following datatypes are supported. The complete list of possible data types can be found [here](https://www.wikidata.org/wiki/Help:Data_type).
 
-For example, if you have `-ap aliases,alias`, then when the following edge is met, both `Alice` and `Alicia` will be treated as aliases to the node `q2020`.
+1. Item 
+2. Quantity
+3. Globe-coordinate
+4. Time 
+5. Monolingualtext 
+6. Url 
+7. External identifier 
+8. String
 
-```
-node1	property	node2	id
-q2020	aliases	Alice@en	id1
-q2020	alias	Alicia@sp	id2
-```
+In ETK, the possible property types are defined [here](https://github.com/usc-isi-i2/etk/blob/9c79a597fa0917b4e4bf78b4acbd863f5a0bb917/etk/wikidata/value.py#L190).
 
 ### truthy
 
@@ -123,8 +129,50 @@ Use compressed file as input.
 
 If `--use-id` is set to true, the `id` column of the kgtk file will be used as the statement id if the corresponding edge is a statement edge. It is the user's responsiblity to make sure there is no duplicated statement id across the whole knowledge graph then.
 
-## Usage
 
+## How Triple Generator handles Different Types of Edges
+
+### label, aliases and descriptions
+
+**-lp**, **-ap**, **-dp** defines properties that triple generator should identify as label, description or aliases creation. There can be multiple choices separated by `,`.
+
+For example, if you have `-ap aliases,alias`, then when the following edge is met, both `Alice` and `Alicia` will be treated as aliases to the node `q2020`.
+
+```
+node1	property	node2	id
+q2020	aliases	Alice@en	id1
+q2020	alias	Alicia@sp	id2
+```
+
+Another example for `label`:
+
+```
+Q123 label ‘Hello’@en
+```
+
+The triple will be:
+
+```
+wd:Q123 rdfs:label "Hello"@en . 
+wd:Q123 skos:prefLabel "Hello"@en . 
+wd:Q123 schema:name "Hello"@en .
+```
+
+`label` should be unique.
+
+### Property Declaration in Input kgtk File
+
+User can also define properties in the input kgtk file with the following syntax. The `data_type` syntax indicates a new property is defined. Note that any usage of `P20200101` must appear after the definition in the kgtk file or `P20200101` will be incorrectly treated as `item`.
+
+```
+P20200101 data_type string
+```
+
+### Regular Edges
+
+Regular edges will be generated according to the data type of the property defined in the property file.
+
+## Examples
 
 ### Standard Usage
 
@@ -144,4 +192,4 @@ Let's say you are in a directory which contains the `tsv` files. The following c
 ls *tsv | parallel -j+0 --eta 'kgtk generate_wikidata_triples -pf example_props.tsv -n 1000 -ig no --debug -gt yes < {} > {.}.ttl'
 ```
 
-Attention: split a large tsv file into small tsv files directly may make qualifier edges statementless. The header `node1 property  node2 id` needs to be inserted at the beginning of splited files as well.
+Splitting a large tsv file into small tsv files directly may make qualifier edges statementless and cause serious mistake. **Do** make sure the splited files start with an statement edge rather than qualifier edge. The header `node1 property  node2 id` needs to be inserted back at the beginning of splited files as well.

From 540c9a413ada3838eaf9320332e59c77876370e0 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 13:41:42 -0700
Subject: [PATCH 248/278] Add node output format.

---
 kgtk/cli/unique.py  | 22 ++++++++++++
 kgtk/join/unique.py | 81 ++++++++++++++++++++++++++++++++++-----------
 2 files changed, 83 insertions(+), 20 deletions(-)

diff --git a/kgtk/cli/unique.py b/kgtk/cli/unique.py
index 81afc648c..9b3498cff 100644
--- a/kgtk/cli/unique.py
+++ b/kgtk/cli/unique.py
@@ -31,6 +31,15 @@ def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Names
 
     _expert: bool = parsed_shared_args._expert
 
+    # This helper function makes it easy to suppress options from
+    # The help message.  The options are still there, and initialize
+    # what they need to initialize.
+    def h(msg: str)->str:
+        if _expert:
+            return msg
+        else:
+            return SUPPRESS
+
     parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
 
     parser.add_argument(      "--column", dest="column_name",
@@ -42,6 +51,12 @@ def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Names
 
     parser.add_argument(      "--label", dest="label_value", help="The output file label column value (default=%(default)s).", default="count")
 
+    # TODO: use an emum
+    parser.add_argument(      "--format", dest="output_format", help=h("The output file format and mode (default=%(default)s)."),
+                              default="edge", choices=["edge", "node"])
+
+    parser.add_argument(      "--prefix", dest="prefix", help=h("The value prefix (default=%(default)s)."), default="")
+
     KgtkReader.add_debug_arguments(parser, expert=_expert)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
     KgtkValueOptions.add_arguments(parser, expert=_expert)
@@ -53,6 +68,9 @@ def run(input_kgtk_file: typing.Optional[Path],
         empty_value: str = "",
         label_value: str = "count",
 
+        output_format: str = "edge",
+        prefix: str = "",
+
         errors_to_stdout: bool = False,
         errors_to_stderr: bool = True,
         show_options: bool = False,
@@ -79,6 +97,8 @@ def run(input_kgtk_file: typing.Optional[Path],
         print("--column=%s" % str(column_name), file=error_file)
         print("--empty=%s" % str(empty_value), file=error_file)
         print("--label=%s" % str(label_value), file=error_file)
+        print("--format=%s" % output_format, file=error_file)
+        print("--prefix=%s" % prefix, file=error_file)
         reader_options.show(out=error_file)
         value_options.show(out=error_file)
         print("=======", file=error_file, flush=True)
@@ -90,6 +110,8 @@ def run(input_kgtk_file: typing.Optional[Path],
             column_name=column_name,
             label_value=label_value,
             empty_value=empty_value,
+            output_format=output_format,
+            prefix=prefix,
             reader_options=reader_options,
             value_options=value_options,
             error_file=error_file,
diff --git a/kgtk/join/unique.py b/kgtk/join/unique.py
index 19f94e9e4..630e6e1d2 100644
--- a/kgtk/join/unique.py
+++ b/kgtk/join/unique.py
@@ -1,8 +1,9 @@
-"""
-Count the unique values in a column in an KGTK file.
-Generate an output KGTK edge file with the counts.
-Empty values are omitted from the output KGTK edge file
-unless a non-empty substitute value is provided.
+"""Count the unique values in a column in an KGTK file.  Generate an output
+KGTK node or edge file with the counts.  Empty values are omitted from the
+output KGTK file unless a non-empty substitute value is provided.
+
+TODO: Consider other output formats. Perhaps seperate counts for each node1
+(node2, etc.) value in the input file?
 
 """
 
@@ -30,6 +31,10 @@ class Unique(KgtkFormat):
 
     label_value: str = attr.ib(validator=attr.validators.instance_of(str), default="count")
 
+    # TODO: make this an enum
+    output_format: str = attr.ib(validator=attr.validators.instance_of(str), default="edge")
+    prefix: str = attr.ib(validator=attr.validators.instance_of(str), default="")
+
     # TODO: find working validators
     # value_options: typing.Optional[KgtkValueOptions] = attr.ib(attr.validators.optional(attr.validators.instance_of(KgtkValueOptions)), default=None)
     reader_options: typing.Optional[KgtkReaderOptions]= attr.ib(default=None)
@@ -59,17 +64,6 @@ def process(self):
             raise ValueError("Column %s is not in the input file" % (self.column_name))
         column_idx: int = kr.column_name_map[self.column_name]
 
-        if self.verbose:
-            print("Opening the output file: %s" % self.output_file_path, file=self.error_file, flush=True)
-        ew: KgtkWriter = KgtkWriter.open(["node1", "label", "node2"],
-                                         self.output_file_path,
-                                         require_all_columns=False,
-                                         prohibit_extra_columns=True,
-                                         fill_missing_columns=True,
-                                         gzip_in_parallel=False,
-                                         verbose=self.verbose,
-                                         very_verbose=self.very_verbose)        
-
         if self.verbose:
             print("Counting unique values from the %s column in %s" % (self.column_name, self.input_file_path), file=self.error_file, flush=True)
         input_line_count: int = 0
@@ -83,17 +77,54 @@ def process(self):
             if len(value) == 0:
                 value = self.empty_value
             if len(value) > 0:
+                value = self.prefix + value
                 value_counts[value] = value_counts.get(value, 0) + 1
                 
-        for value in sorted(value_counts.keys()):
-            ew.write([value, self.label_value, str(value_counts[value])])
-
         if self.verbose:
             print("Read %d records, found %d unique non-empty values, %d empty values." % (input_line_count,
                                                                                            len(value_counts),
                                                                                            input_line_count - len(value_counts)),
                   file=self.error_file, flush=True)
 
+        # No node mode we can't open the output file until we are done reading
+        # the input file, because we need the list of uniqueue values to
+        # build the column list.
+        output_columns: typing.List[str]
+        if self.output_format == "edge":
+            output_columns = ["node1", "label", "node2"]
+        elif self.output_format == "node":
+            output_columns = [ "id" ]
+            for value in sorted(value_counts.keys()):
+                # TODO: provide a way to override this check.
+                if value in KgtkFormat.NODE1_COLUMN_NAMES:
+                    raise ValueError("Cannot write a KGTK node file with a column named '%s'." % value)
+                output_columns.append(value)
+        else:
+            raise ValueError("Unknown output format %s" % str(self.output_format))
+        
+        if self.verbose:
+            print("Opening the output file: %s" % self.output_file_path, file=self.error_file, flush=True)
+
+        ew: KgtkWriter = KgtkWriter.open(output_columns,
+                                         self.output_file_path,
+                                         require_all_columns=False,
+                                         prohibit_extra_columns=True,
+                                         fill_missing_columns=True,
+                                         gzip_in_parallel=False,
+                                         verbose=self.verbose,
+                                         very_verbose=self.very_verbose)        
+
+        if self.output_format == "edge":
+            for value in sorted(value_counts.keys()):
+                ew.write([prefix + value, self.label_value, str(value_counts[value])])
+        elif self.output_format == "node":
+            row = [ self.column_name ]
+            for value in sorted(value_counts.keys()):
+                row.append(str(value_counts[value]))
+            ew.write(row)
+        else:
+            raise ValueError("Unknown output format %s" % str(self.output_format))
+
         ew.close()
        
 def main():
@@ -112,6 +143,12 @@ def main():
 
     parser.add_argument(      "--label", dest="label_value", help="The output file label column value (default=%(default)s).", default="count")
 
+    # TODO: use an enum
+    parser.add_argument(      "--format", dest="output_format", help="The output file format and mode (default=%(default)s).",
+                              default="edge", choices=["edge", "node"])
+
+    parser.add_argument(      "--prefix", dest="prefix", help="The value prefix (default=%(default)s).", default="")
+
     KgtkReader.add_debug_arguments(parser)
     KgtkReaderOptions.add_arguments(parser, mode_options=True)
     KgtkValueOptions.add_arguments(parser)
@@ -131,6 +168,8 @@ def main():
         print("--empty=%s" % args.empty_value, file=error_file)
         print("--output-file=%s" % str(args.output_file_path), file=error_file)
         print("--label=%s" % args.label_value, file=error_file)
+        print("--format=%s" % args.output_format, file=error_file)
+        print("--prefix=%s" % args.prefix, file=error_file)
         reader_options.show(out=error_file)
         value_options.show(out=error_file)
 
@@ -138,8 +177,10 @@ def main():
         input_file_path=args.input_file_path,
         column_name=args.column_name,
         output_file_path=args.output_file_path,
-        label_value=args.label_value,
         empty_value=args.empty_value,
+        label_value=args.label_value,
+        output_format=args.output_format,
+        prefix=args.prefix,
         reader_options=reader_options,
         value_options=value_options,
         error_file=error_file,

From e64cfd2241b81576a22f89bb85f5259e88b3b424 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 14:58:49 -0700
Subject: [PATCH 249/278] Documentation on the kgtk cat command.

---
 docs/cat.md | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)
 create mode 100644 docs/cat.md

diff --git a/docs/cat.md b/docs/cat.md
new file mode 100644
index 000000000..e359d12ab
--- /dev/null
+++ b/docs/cat.md
@@ -0,0 +1,75 @@
+The cat command combines (concatenates) KGTK files, optionally decompressing
+input files and compressing the output file, while managing the KGTK column
+headers appropriately. This differs from the zconcat command, which
+decompresses and concatenates files without managing their KGTK headers.
+
+Input and output files may be (de)compressed using a algorithm selected
+by the file extension: .bz2 .gz .lz4 .xy
+
+When merging the column headers, KGTK's required column aliases are respected,
+which the leftmost alias seen taking priority.  For example, if the first
+input file has a "node1" column and the second has a "from" column, the two
+columns will be combined as the "node1" column in the output file.
+
+Normally, the files being combined must be either all KGTK edge files or all
+KGTK node files, but that constraint can be overridded with --mode=NONE.
+
+## Usage
+
+```bash
+kgtk cat [-h] [-o OUTPUT_FILE_PATH] [-v] input_file_paths [input_file_paths ...]
+```
+- `input_file_paths` are the input file names.  At most one input file may be "-" for data piped from another command.
+- `OUTPUT_FILE_PATH` can be a filename or "-" to pipe data to another command (default is "-").
+- `-v` gives verbose feedback.
+
+Additional options are described in expert help:
+```bash
+kgtk --expert cat --help
+```
+
+## Examples
+
+Combine two KGTK files, sending the output to standard output.
+
+```bash
+kgtk cat file1.tsv file2.tsv
+```
+
+Combine two gzipped KGTK files, sending the output to a bzip2 file.
+
+```bash
+kgtk cat file1.tsv.gz file2.tsv.gz -o ofile.tsv.bz2
+```
+
+Suppose that `file1.tsv` contains the following table in KGTK format:
+
+| node1 | label   | node2 | location |
+| ----- | ------- | ----- | -------- |
+| john  | zipcode | 12345 | home     |
+| john  | zipcode | 12346 | work     |
+| peter | zipcode | 12040 | home     |
+| peter | zipcode | 12040 | work     |
+| steve | zipcode | 45601 | home     |
+| steve | zipcode | 45601 | work     |
+
+and `file2.tsv` contains the following table in KGTK format:
+
+| node1 | label    | node2      | years |
+| ----- | -------- | ---------- | ----- |
+| john  | position | programmer | 3     |
+| peter | position | engineer   | 2     |
+
+The result will be the following table in KGTK format:
+
+| node1 | label    | node2      | location | years |
+| ----- | -------- | ---------- | -------- | ----- |
+| john  | zipcode  | 12345      | home     |       |
+| john  | zipcode  | 12346      | work     |       |
+| peter | zipcode  | 12040      | home     |       |
+| peter | zipcode  | 12040      | work     |       |
+| steve | zipcode  | 45601      | home     |       |
+| steve | zipcode  | 45601      | work     |       |
+| john  | position | programmer |          | 3     |
+| peter | position | engineer   |          | 2     |
+

From 1a7f9c614f677a8ba347ff629408ac4a0da83fd2 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 15:02:43 -0700
Subject: [PATCH 250/278] Provide minimal options by default.

---
 kgtk/cli/clean_data.py | 4 ++--
 kgtk/cli/validate.py   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kgtk/cli/clean_data.py b/kgtk/cli/clean_data.py
index 51ac44f66..a19e487d4 100644
--- a/kgtk/cli/clean_data.py
+++ b/kgtk/cli/clean_data.py
@@ -34,8 +34,8 @@ def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Names
     parser.add_argument(      "output_file", nargs="?", help="The KGTK file to write.  May be omitted or '-' for stdout.", type=Path)
     
     KgtkReader.add_debug_arguments(parser, expert=_expert)
-    KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=True)
-    KgtkValueOptions.add_arguments(parser, expert=True)
+    KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=_expert)
+    KgtkValueOptions.add_arguments(parser, expert=_expert)
 
 
 def run(input_file: typing.Optional[Path],
diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 7d88e2fa2..7bdad59e2 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -43,7 +43,7 @@ def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Names
 
     KgtkReader.add_debug_arguments(parser, expert=_expert)
     KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=_expert)
-    KgtkValueOptions.add_arguments(parser, expert=True)
+    KgtkValueOptions.add_arguments(parser, expert=_expert)
 
 
 def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],

From addf953ffa3bce58fb3d3d711c762f2bc5db4e33 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 15:29:40 -0700
Subject: [PATCH 251/278] Fix repair method.

---
 kgtk/value/kgtkvalue.py | 77 +++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 29 deletions(-)

diff --git a/kgtk/value/kgtkvalue.py b/kgtk/value/kgtkvalue.py
index b9bb064c2..8723403ce 100644
--- a/kgtk/value/kgtkvalue.py
+++ b/kgtk/value/kgtkvalue.py
@@ -898,14 +898,14 @@ def is_date_and_times(self, validate: bool=False)->bool:
         if m is None:
             return False
 
-        yearstr: str = m.group("year")
-        monthstr: str = m.group("month")
-        daystr: str = m.group("day")
-        hourstr: str = m.group("hour")
-        minutesstr: str = m.group("minutes")
-        secondsstr: str = m.group("seconds")
-        zonestr: str = m.group("zone")
-        precisionstr: str = m.group("precision")
+        yearstr: typing.Optional[str] = m.group("year")
+        monthstr: typing.Optional[str] = m.group("month")
+        daystr: typing.Optional[str] = m.group("day")
+        hourstr: typing.Optional[str] = m.group("hour")
+        minutesstr: typing.Optional[str] = m.group("minutes")
+        secondsstr: typing.Optional[str] = m.group("seconds")
+        zonestr: typing.Optional[str] = m.group("zone")
+        precisionstr: typing.Optional[str] = m.group("precision")
         iso8601extended: bool = m.group("hyphen") is not None
 
         fixup_needed: bool = False
@@ -970,7 +970,16 @@ def is_date_and_times(self, validate: bool=False)->bool:
         if fixup_needed:
             # Repair a month or day zero problem.  If this value is the child
             # of a list, repair the list parent value, too.
-            self.update_date_and_times()
+            self.update_date_and_times(yearstr,
+                                       monthstr,
+                                       daystr,
+                                       hourstr,
+                                       minutesstr,
+                                       secondsstr,
+                                       zonestr,
+                                       precisionstr,
+                                       iso8601extended
+            )
             if self.parent is not None:
                 self.parent.rebuild_list()
 
@@ -997,32 +1006,42 @@ def is_date_and_times(self, validate: bool=False)->bool:
             )
         return True
 
-    def update_date_and_times(self):
-        v: str = "^" + self.yearstr
-        if self.monthstr is not None:
-            if self.iso8601extended:
+    def update_date_and_times(self,
+                              yearstr: str,
+                              monthstr: typing.Optional[str],
+                              daystr: typing.Optional[str],
+                              hourstr: typing.Optional[str],
+                              minutesstr: typing.Optional[str],
+                              secondsstr: typing.Optional[str],
+                              zonestr: typing.Optional[str],
+                              precisionstr: typing.Optional[str],
+                              iso8601extended: bool
+    ):
+        v: str = "^" + yearstr
+        if monthstr is not None:
+            if iso8601extended:
                 v += "-"
-            v += self.monthstr
-        if self.daystr is not None:
-            if self.iso8601extended:
+            v += monthstr
+        if daystr is not None:
+            if iso8601extended:
                 v += "-"
-            v += self.daystr
-        if self.hourstr is not None:
+            v += daystr
+        if hourstr is not None:
             v += "T"
-            v += self.hourstr
-        if self.minutesstr is not None:
-            if self.iso8601extended:
+            v += hourstr
+        if minutesstr is not None:
+            if iso8601extended:
                 v += ":"
-            v += self.minutesstr
-        if self.secondsstr is not None:
-            if self.iso8601extended:
+            v += minutesstr
+        if secondsstr is not None:
+            if iso8601extended:
                 v += ":"
-            v += self.secondsstr
-        if self.zonestr is not None:
-            v += self.zonestr
-        if self.precisionstr is not None:
+            v += secondsstr
+        if zonestr is not None:
+            v += zonestr
+        if precisionstr is not None:
             v += "/"
-            v += self.precisionstr
+            v += precisionstr
         self.value = v
 
     def is_extension(self, validate=False)->bool:

From 90216a3b5440702c99421bb43121a49e453c59b4 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 16:00:36 -0700
Subject: [PATCH 252/278] Exclude invalid data by default. Note when data has
 ben repaired, and update the data in the reader.

---
 kgtk/io/kgtkreader.py   | 19 ++++++++++++-------
 kgtk/value/kgtkvalue.py |  5 +++++
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index 1bbbc86a7..aba22cf19 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -231,7 +231,7 @@ def d(default: typing.Any)->typing.Mapping[str, typing.Any]:
         lgroup.add_argument(prefix1 + "invalid-value-action",
                             dest=prefix2 + "invalid_value_action",
                             help=h(prefix3 + "The action to take when a data cell value is invalid (default=%(default)s)."),
-                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.REPORT))
+                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.EXCLUDE))
 
         lgroup.add_argument(prefix1 + "long-line-action",
                             dest=prefix2 + "long_line_action",
@@ -925,7 +925,7 @@ def kgtk_value_dicts(self, validate: bool=False, concise: bool=False)->typing.It
             except StopIteration:
                 return
 
-    def _ignore_invalid_values(self, values: typing.List[str], line: str)->bool:
+    def _ignore_invalid_values(self, row: typing.List[str], line: str)->bool:
         """Give a row of values, validate each value.  If we find one or more
         validation problems, we might want to emit error messages and we might
         want to ignore the entire row.
@@ -935,12 +935,17 @@ def _ignore_invalid_values(self, values: typing.List[str], line: str)->bool:
         """
         problems: typing.List[str] = [ ] # Build a list of problems.
         idx: int
-        value: str
-        for idx, value in enumerate(values):
-            if len(value) > 0: # Optimize the common case of empty columns.
-                kv: KgtkValue = KgtkValue(value, options=self.value_options)
+        item: str
+        for idx, item in enumerate(row):
+            if len(item) > 0: # Optimize the common case of empty columns.
+                kv: KgtkValue = KgtkValue(item, options=self.value_options)
                 if not kv.is_valid():
-                    problems.append("col %d (%s) value '%s'is an %s" % (idx, self.column_names[idx], value, kv.describe()))
+                    problems.append("col %d (%s) value '%s'is an %s" % (idx, self.column_names[idx], item, kv.describe()))
+                if kv.repaired:
+                    # If this value was repaired, update the item in the row.
+                    #
+                    # Warning: We expect this change to be seen by the caller.
+                    row[idx] = kv.value
 
         if len(problems) == 0:
             return False
diff --git a/kgtk/value/kgtkvalue.py b/kgtk/value/kgtkvalue.py
index 8723403ce..017ba3b13 100644
--- a/kgtk/value/kgtkvalue.py
+++ b/kgtk/value/kgtkvalue.py
@@ -173,6 +173,9 @@ class KgtkValue(KgtkFormat):
     # TODO: proper validation.
     parent: typing.Optional['KgtkValue'] = attr.ib(default=None)
 
+    # Has this value been repaired?
+    repaired: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
+
     # Cache some properties of the value that would be expensive to
     # continuously recompute.
     data_type: typing.Optional[KgtkFormat.DataType] = None
@@ -281,6 +284,7 @@ def rebuild_list(self):
         item: KgtkValue
         for item in list_items:
             values.append(item.value)
+            self.repaired = self.repaired or item.repaired
         self.value = KgtkFormat.LIST_SEPARATOR.join(values)
 
     def _is_number_or_quantity(self)->bool:
@@ -1043,6 +1047,7 @@ def update_date_and_times(self,
             v += "/"
             v += precisionstr
         self.value = v
+        self.repaired = True
 
     def is_extension(self, validate=False)->bool:
         """Return True if the first character is !

From 1e6bf1e3060cdd987dce33d6bab181935ece2f7c Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 16:10:19 -0700
Subject: [PATCH 253/278] Adjust the default error settings.

---
 kgtk/io/kgtkreader.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kgtk/io/kgtkreader.py b/kgtk/io/kgtkreader.py
index aba22cf19..c702180da 100644
--- a/kgtk/io/kgtkreader.py
+++ b/kgtk/io/kgtkreader.py
@@ -79,15 +79,15 @@ class KgtkReaderOptions():
     blank_required_field_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
     
     # Ignore records with too many or too few fields?
-    short_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
-    long_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXCLUDE)
+    short_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.COMPLAIN)
+    long_line_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.COMPLAIN)
 
     # How should header errors be processed?
     header_error_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.EXIT)
     unsafe_column_name_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.REPORT)
 
     # Validate data cell values?
-    invalid_value_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.REPORT)
+    invalid_value_action: ValidationAction = attr.ib(validator=attr.validators.instance_of(ValidationAction), default=ValidationAction.COMPLAIN)
 
     # Repair records with too many or too few fields?
     fill_short_lines: bool = attr.ib(validator=attr.validators.instance_of(bool), default=False)
@@ -231,17 +231,17 @@ def d(default: typing.Any)->typing.Mapping[str, typing.Any]:
         lgroup.add_argument(prefix1 + "invalid-value-action",
                             dest=prefix2 + "invalid_value_action",
                             help=h(prefix3 + "The action to take when a data cell value is invalid (default=%(default)s)."),
-                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.EXCLUDE))
+                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.COMPLAIN))
 
         lgroup.add_argument(prefix1 + "long-line-action",
                             dest=prefix2 + "long_line_action",
                             help=h(prefix3 + "The action to take when a long line is detected (default=%(default)s)."),
-                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.EXCLUDE))
+                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.COMPLAIN))
 
         lgroup.add_argument(prefix1 + "short-line-action",
                             dest=prefix2 + "short_line_action",
                             help=h(prefix3 + "The action to take when a short line is detected (default=%(default)s)."),
-                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.EXCLUDE))
+                            type=ValidationAction, action=EnumNameAction, **d(default=ValidationAction.COMPLAIN))
 
         lgroup.add_argument(prefix1 + "truncate-long-lines",
                             dest=prefix2 + "truncate_long_lines",

From 79d55eb80ad089b2046b1b513e96eae7b1764812 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 16:11:55 -0700
Subject: [PATCH 254/278] Documentation for kgtk clean_data.

---
 docs/clean_data.md | 212 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 212 insertions(+)
 create mode 100644 docs/clean_data.md

diff --git a/docs/clean_data.md b/docs/clean_data.md
new file mode 100644
index 000000000..8e8516d10
--- /dev/null
+++ b/docs/clean_data.md
@@ -0,0 +1,212 @@
+The clean_data command a KGTK file, optionally decompressing
+the input files and compressing the output file, while validating
+and optionally repairing the data in the file.
+
+Input and output files may be (de)compressed using a algorithm selected
+by the file extension: .bz2 .gz .lz4 .xy
+
+## Usage
+
+```bash
+kgtk clean_data [-h] [-v] [input_file] [output_file]
+```
+- `input_file` The input file name or "-" for data piped from another command (default is "-").
+- `ouput_file` The output file name or "-" to pipe data to another command (default is "-").
+- `-v` gives verbose feedback.
+
+Additional options are described in expert help:
+```bash
+kgtk --expert clean_data --help
+```
+
+```bash
+usage: kgtk clean_data [-h] [--errors-to-stdout | --errors-to-stderr] [--show-options] [-v]
+                       [--very-verbose] [--column-separator COLUMN_SEPARATOR]
+                       [--compression-type COMPRESSION_TYPE] [--error-limit ERROR_LIMIT]
+                       [--gzip-in-parallel [GZIP_IN_PARALLEL]]
+                       [--gzip-queue-size GZIP_QUEUE_SIZE] [--mode {NONE,EDGE,NODE,AUTO}]
+                       [--force-column-names FORCE_COLUMN_NAMES [FORCE_COLUMN_NAMES ...]]
+                       [--header-error-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                       [--skip-first-record [SKIP_FIRST_RECORD]]
+                       [--unsafe-column-name-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                       [--repair-and-validate-lines [REPAIR_AND_VALIDATE_LINES]]
+                       [--repair-and-validate-values [REPAIR_AND_VALIDATE_VALUES]]
+                       [--blank-required-field-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                       [--comment-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                       [--empty-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                       [--fill-short-lines [FILL_SHORT_LINES]]
+                       [--invalid-value-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                       [--long-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                       [--short-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                       [--truncate-long-lines [TRUNCATE_LONG_LINES]]
+                       [--whitespace-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                       [--additional-language-codes [ADDITIONAL_LANGUAGE_CODES [ADDITIONAL_LANGUAGE_CODES ...]]]
+                       [--allow-language-suffixes [ALLOW_LANGUAGE_SUFFIXES]]
+                       [--allow-lax-strings [ALLOW_LAX_STRINGS]]
+                       [--allow-lax-lq-strings [ALLOW_LAX_LQ_STRINGS]]
+                       [--allow-month-or-day-zero [ALLOW_MONTH_OR_DAY_ZERO]]
+                       [--repair-month-or-day-zero [REPAIR_MONTH_OR_DAY_ZERO]]
+                       [--minimum-valid-year MINIMUM_VALID_YEAR]
+                       [--maximum-valid-year MAXIMUM_VALID_YEAR]
+                       [--minimum-valid-lat MINIMUM_VALID_LAT]
+                       [--maximum-valid-lat MAXIMUM_VALID_LAT]
+                       [--minimum-valid-lon MINIMUM_VALID_LON]
+                       [--maximum-valid-lon MAXIMUM_VALID_LON]
+                       [--escape-list-separators [ESCAPE_LIST_SEPARATORS]]
+                       [input_file] [output_file]
+
+positional arguments:
+  input_file            The KGTK file to read. May be omitted or '-' for stdin.
+  output_file           The KGTK file to write. May be omitted or '-' for stdout.
+
+optional arguments:
+  -h, --help            show this help message and exit
+
+Error and feedback messages:
+  Send error messages and feedback to stderr or stdout, control the amount of feedback and debugging messages.
+
+  --errors-to-stdout    Send errors to stdout instead of stderr
+  --errors-to-stderr    Send errors to stderr instead of stdout
+  --show-options        Print the options selected (default=False).
+  -v, --verbose         Print additional progress messages (default=False).
+  --very-verbose        Print additional progress messages (default=False).
+
+File options:
+  Options affecting processing
+
+  --column-separator COLUMN_SEPARATOR
+                        Column separator (default=<TAB>).
+  --compression-type COMPRESSION_TYPE
+                        Specify the compression type (default=None).
+  --error-limit ERROR_LIMIT
+                        The maximum number of errors to report before failing (default=1000)
+  --gzip-in-parallel [GZIP_IN_PARALLEL]
+                        Execute gzip in parallel (default=False).
+  --gzip-queue-size GZIP_QUEUE_SIZE
+                        Queue size for parallel gzip (default=1000).
+  --mode {NONE,EDGE,NODE,AUTO}
+                        Determine the KGTK file mode (default=KgtkReaderMode.AUTO).
+
+Header parsing:
+  Options affecting header parsing
+
+  --force-column-names FORCE_COLUMN_NAMES [FORCE_COLUMN_NAMES ...]
+                        Force the column names (default=None).
+  --header-error-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a header error is detected. Only ERROR or EXIT
+                        are supported (default=ValidationAction.EXIT).
+  --skip-first-record [SKIP_FIRST_RECORD]
+                        Skip the first record when forcing column names (default=False).
+  --unsafe-column-name-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a column name is unsafe
+                        (default=ValidationAction.REPORT).
+
+Line parsing:
+  Options affecting data line parsing
+
+  --repair-and-validate-lines [REPAIR_AND_VALIDATE_LINES]
+                        Repair and validate lines (default=True).
+  --repair-and-validate-values [REPAIR_AND_VALIDATE_VALUES]
+                        Repair and validate values (default=True).
+  --blank-required-field-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a line with a blank node1, node2, or id field
+                        (per mode) is detected (default=ValidationAction.EXCLUDE).
+  --comment-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a comment line is detected
+                        (default=ValidationAction.EXCLUDE).
+  --empty-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when an empty line is detected
+                        (default=ValidationAction.EXCLUDE).
+  --fill-short-lines [FILL_SHORT_LINES]
+                        Fill missing trailing columns in short lines with empty values
+                        (default=False).
+  --invalid-value-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a data cell value is invalid
+                        (default=ValidationAction.REPORT).
+  --long-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a long line is detected
+                        (default=ValidationAction.EXCLUDE).
+  --short-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a short line is detected
+                        (default=ValidationAction.EXCLUDE).
+  --truncate-long-lines [TRUNCATE_LONG_LINES]
+                        Remove excess trailing columns in long lines (default=False).
+  --whitespace-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a whitespace line is detected
+                        (default=ValidationAction.EXCLUDE).
+
+Data value parsing:
+  Options controlling the parsing and processing of KGTK data values.
+
+  --additional-language-codes [ADDITIONAL_LANGUAGE_CODES [ADDITIONAL_LANGUAGE_CODES ...]]
+                        Additional language codes (default=None).
+  --allow-language-suffixes [ALLOW_LANGUAGE_SUFFIXES]
+                        Allow language identifier suffixes starting with a dash
+                        (default=False).
+  --allow-lax-strings [ALLOW_LAX_STRINGS]
+                        Do not check if double quotes are backslashed inside strings
+                        (default=False).
+  --allow-lax-lq-strings [ALLOW_LAX_LQ_STRINGS]
+                        Do not check if single quotes are backslashed inside language qualified
+                        strings (default=False).
+  --allow-month-or-day-zero [ALLOW_MONTH_OR_DAY_ZERO]
+                        Allow month or day zero in dates (default=False).
+  --repair-month-or-day-zero [REPAIR_MONTH_OR_DAY_ZERO]
+                        Repair month or day zero in dates (default=False).
+  --minimum-valid-year MINIMUM_VALID_YEAR
+                        The minimum valid year in dates (default=1583).
+  --maximum-valid-year MAXIMUM_VALID_YEAR
+                        The maximum valid year in dates (default=2100).
+  --minimum-valid-lat MINIMUM_VALID_LAT
+                        The minimum valid latitude (default=-90.000000).
+  --maximum-valid-lat MAXIMUM_VALID_LAT
+                        The maximum valid latitude (default=90.000000).
+  --minimum-valid-lon MINIMUM_VALID_LON
+                        The minimum valid longitude (default=-180.000000).
+  --maximum-valid-lon MAXIMUM_VALID_LON
+                        The maximum valid longitude (default=180.000000).
+  --escape-list-separators [ESCAPE_LIST_SEPARATORS]
+                        Escape all list separators instead of splitting on them
+                        (default=False).
+
+
+## Examples
+
+
+Suppose that `file1.tsv` contains the following table in KGTK format:
+
+| node1 | label | node2             |
+| john  | woke  | ^2020-05-00T00:00 |
+| john  | woke  | ^2020-05-02T00:00 |
+
+Clean the data, using default options:
+
+```bash
+kgtk clean_data file1.tsv
+```
+
+Standard output will get the following data:
+```bash
+node1   label   node2
+john    woke    ^2020-05-02T00:00
+```
+
+The following complaint will be issued on standard error:
+```bash
+Data line 1:
+john    woke    ^2020-05-00T00:00
+col 2 (node2) value '^2020-05-00T00:00'is an Invalid Date and Times
+```
+
+The first data line was excluded because it contained "00" in the day
+field, which violates the ISO 8601 specification.
+
+Clean the data, repairing the invalid date/time string by
+changing day "00" to day "01:
+
+```bash
+kgtk clean_data file1.tsv --repair-month-or-day-zero
+node1   label   node2
+john    woke    ^2020-05-01T00:00
+john    woke    ^2020-05-02T00:00
+```

From c64de7759e5ad1a9a9b11d6897712ee3ec3c53b1 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 16:38:52 -0700
Subject: [PATCH 255/278] Remove the bash flag from data blocks.

---
 docs/clean_data.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/docs/clean_data.md b/docs/clean_data.md
index 8e8516d10..9295fac66 100644
--- a/docs/clean_data.md
+++ b/docs/clean_data.md
@@ -186,13 +186,13 @@ kgtk clean_data file1.tsv
 ```
 
 Standard output will get the following data:
-```bash
+```
 node1   label   node2
 john    woke    ^2020-05-02T00:00
 ```
 
 The following complaint will be issued on standard error:
-```bash
+```
 Data line 1:
 john    woke    ^2020-05-00T00:00
 col 2 (node2) value '^2020-05-00T00:00'is an Invalid Date and Times
@@ -206,6 +206,10 @@ changing day "00" to day "01:
 
 ```bash
 kgtk clean_data file1.tsv --repair-month-or-day-zero
+```
+
+Standard output will get the following data, and no errors will be issued:
+```
 node1   label   node2
 john    woke    ^2020-05-01T00:00
 john    woke    ^2020-05-02T00:00

From 10a7212f7e59bcfa54865cec8bf51f026d2f3863 Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Wed, 20 May 2020 17:29:49 -0700
Subject: [PATCH 256/278] Update mkdocs.yml

---
 mkdocs.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mkdocs.yml b/mkdocs.yml
index 30d09ee1f..5abb70201 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -5,6 +5,7 @@ nav:
   - KGTK file specification: specification.md
   - 'KGTK commands':
       - 'connected_components': 'connected_components.md'
+      - 'clean_data': 'clean_data.md'
       - 'export_gt': 'export_gt.md'
       - 'filter': 'filter.md'
       - 'generate_wikidata_triples': 'generate_wikidata_triples.md'

From 98575a5b34cb9f24684f903ab86f92f3302a49f3 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 17:37:58 -0700
Subject: [PATCH 257/278] Fix typo.

---
 docs/clean_data.md            |  66 ++++--
 docs/validate.md              | 402 ++++++++++++++++------------------
 kgtk/cli/cat.py               |   2 +-
 kgtk/cli/clean_data.py        |   6 +-
 kgtk/cli/validate.py          |  12 +-
 kgtk/join/kgtkmergecolumns.py |   2 +-
 6 files changed, 251 insertions(+), 239 deletions(-)

diff --git a/docs/clean_data.md b/docs/clean_data.md
index 9295fac66..2a3f98c8c 100644
--- a/docs/clean_data.md
+++ b/docs/clean_data.md
@@ -1,25 +1,30 @@
-The clean_data command a KGTK file, optionally decompressing
-the input files and compressing the output file, while validating
-and optionally repairing the data in the file.
+Validate and clean the data in a KGTK file, optionally decompressing
+the input files and compressing the output file.
 
 Input and output files may be (de)compressed using a algorithm selected
 by the file extension: .bz2 .gz .lz4 .xy
 
 ## Usage
-
-```bash
-kgtk clean_data [-h] [-v] [input_file] [output_file]
 ```
-- `input_file` The input file name or "-" for data piped from another command (default is "-").
-- `ouput_file` The output file name or "-" to pipe data to another command (default is "-").
-- `-v` gives verbose feedback.
+usage: kgtk clean_data [-h] [-v] [input_file] [output_file]
 
-Additional options are described in expert help:
-```bash
+Validate a KGTK file and output a clean copy. Empty lines, whitespace lines, comment lines, and lines with empty required fields are silently skipped. Header errors cause an immediate exception. Data value errors are reported and the line containing them skipped. 
+
+Additional options are shown in expert help.
 kgtk --expert clean_data --help
-```
 
-```bash
+positional arguments:
+  input_file     The KGTK file to read. May be omitted or '-' for stdin.
+  output_file    The KGTK file to write. May be omitted or '-' for stdout.
+
+optional arguments:
+  -h, --help     show this help message and exit
+
+  -v, --verbose  Print additional progress messages (default=False).
+
+```
+Expert help:
+```
 usage: kgtk clean_data [-h] [--errors-to-stdout | --errors-to-stderr] [--show-options] [-v]
                        [--very-verbose] [--column-separator COLUMN_SEPARATOR]
                        [--compression-type COMPRESSION_TYPE] [--error-limit ERROR_LIMIT]
@@ -55,6 +60,11 @@ usage: kgtk clean_data [-h] [--errors-to-stdout | --errors-to-stderr] [--show-op
                        [--escape-list-separators [ESCAPE_LIST_SEPARATORS]]
                        [input_file] [output_file]
 
+Validate a KGTK file and output a clean copy. Empty lines, whitespace lines, comment lines, and lines with empty required fields are silently skipped. Header errors cause an immediate exception. Data value errors are reported and the line containing them skipped. 
+
+Additional options are shown in expert help.
+kgtk --expert clean_data --help
+
 positional arguments:
   input_file            The KGTK file to read. May be omitted or '-' for stdin.
   output_file           The KGTK file to write. May be omitted or '-' for stdout.
@@ -109,7 +119,7 @@ Line parsing:
   --repair-and-validate-values [REPAIR_AND_VALIDATE_VALUES]
                         Repair and validate values (default=True).
   --blank-required-field-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
-                        The action to take when a line with a blank node1, node2, or id field
+                          The action to take when a line with a blank node1, node2, or id field
                         (per mode) is detected (default=ValidationAction.EXCLUDE).
   --comment-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
                         The action to take when a comment line is detected
@@ -122,13 +132,13 @@ Line parsing:
                         (default=False).
   --invalid-value-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
                         The action to take when a data cell value is invalid
-                        (default=ValidationAction.REPORT).
+                        (default=ValidationAction.COMPLAIN).
   --long-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
                         The action to take when a long line is detected
-                        (default=ValidationAction.EXCLUDE).
+                        (default=ValidationAction.COMPLAIN).
   --short-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
                         The action to take when a short line is detected
-                        (default=ValidationAction.EXCLUDE).
+                        (default=ValidationAction.COMPLAIN).
   --truncate-long-lines [TRUNCATE_LONG_LINES]
                         Remove excess trailing columns in long lines (default=False).
   --whitespace-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
@@ -168,18 +178,32 @@ Data value parsing:
   --escape-list-separators [ESCAPE_LIST_SEPARATORS]
                         Escape all list separators instead of splitting on them
                         (default=False).
+```
 
+By default, the following rules apply:
+ - errors that occur while processing a KGTK file's column header line cause an immediate exit:
+   - An empty column name
+   - A duplicate column name
+   - A missing required column name for an edge or node file
+   - An ambiguous required column name (e.g., `id` and `ID` are both present)
+ - empty data lines are silently ignored and not passed through.
+ - data lines containing only whitespace are silently ignored and not passed through.
+ - data lines with empty required fields (node1 and node2 for KGTK edge files, id for KGTK node files) are silently ignored and not passed through.
+ - data lines that have too few fields cause a complaint to be issued, and are not passed through.
+ - data lines that have too many fields cause a complaint to be issued, and are not passed through.
+ - lines with data value validation errors cause a complaint to be issued, and are not passed through.
+
+These defaults may be changed through expert options.
 
 ## Examples
 
-
 Suppose that `file1.tsv` contains the following table in KGTK format:
 
 | node1 | label | node2             |
 | john  | woke  | ^2020-05-00T00:00 |
 | john  | woke  | ^2020-05-02T00:00 |
 
-Clean the data, using default options:
+### Clean the data, using default options
 
 ```bash
 kgtk clean_data file1.tsv
@@ -201,8 +225,8 @@ col 2 (node2) value '^2020-05-00T00:00'is an Invalid Date and Times
 The first data line was excluded because it contained "00" in the day
 field, which violates the ISO 8601 specification.
 
-Clean the data, repairing the invalid date/time string by
-changing day "00" to day "01:
+### Clean the data, repairing the invalid date/time string
+Change day "00" to day "01:
 
 ```bash
 kgtk clean_data file1.tsv --repair-month-or-day-zero
diff --git a/docs/validate.md b/docs/validate.md
index 520b60c3a..16d42de5a 100644
--- a/docs/validate.md
+++ b/docs/validate.md
@@ -1,252 +1,228 @@
-Command that will validate that a KGTK file complies with the specification in KGTK File Format v2. Currently, validation is limited to header column names and data column counts. It does not yet validate that headers and cells are compliant with the KGTK data type rules.
+Validate one or more KGTK files, optionally decompressing
+the input files.
 
+Input files may be (de)compressed using a algorithm selected
+by the file extension: .bz2 .gz .lz4 .xy
+
+The expert option --compression-type may be used to override the
+decompression selectin algorithim;  this is useful when reading from piped input.
 
 ## Usage
-usage: 
 ```
-kgtk validate [-h]
-                     [--blank-id-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
-                     [--blank-node1-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
-                     [--blank-node2-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
-                     [--blank-required-field-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
-                     [--column-separator COLUMN_SEPARATOR]
-                        [--comment-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
-                     [--compression-type COMPRESSION_TYPE]
-                     [--empty-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
-                     [--errors-to-stdout] [--error-limit ERROR_LIMIT]
-                     [--fill-short-lines]
+usage: kgtk validate [-h] [--header-only [HEADER_ONLY]]
+                     [--errors-to-stdout | --errors-to-stderr] [--show-options] [-v]
+                     [--very-verbose] [--column-separator COLUMN_SEPARATOR]
+                     [--compression-type COMPRESSION_TYPE] [--error-limit ERROR_LIMIT]
+                     [--gzip-in-parallel [GZIP_IN_PARALLEL]]
+                     [--gzip-queue-size GZIP_QUEUE_SIZE] [--mode {NONE,EDGE,NODE,AUTO}]
                      [--force-column-names FORCE_COLUMN_NAMES [FORCE_COLUMN_NAMES ...]]
-                     [--gzip-in-parallel] [--gzip-queue-size GZIP_QUEUE_SIZE]
                      [--header-error-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--skip-first-record [SKIP_FIRST_RECORD]]
+                     [--unsafe-column-name-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--repair-and-validate-lines [REPAIR_AND_VALIDATE_LINES]]
+                     [--repair-and-validate-values [REPAIR_AND_VALIDATE_VALUES]]
+                     [--blank-required-field-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--comment-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--empty-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                     [--fill-short-lines [FILL_SHORT_LINES]]
                      [--invalid-value-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
-                        [--long-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
-                     [--mode {NONE,EDGE,NODE,AUTO}]
+                     [--long-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
                      [--short-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
-                     [--skip-first-record] [--truncate-long-lines] [-v]
-                        [--unsafe-column-name-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
-                     [--very-verbose]
+                     [--truncate-long-lines [TRUNCATE_LONG_LINES]]
                      [--whitespace-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
-                     [kgtk_file [kgtk_file …]]
-```
+                     [--additional-language-codes [ADDITIONAL_LANGUAGE_CODES [ADDITIONAL_LANGUAGE_CODES ...]]]
+                     [--allow-language-suffixes [ALLOW_LANGUAGE_SUFFIXES]]
+                     [--allow-lax-strings [ALLOW_LAX_STRINGS]]
+                     [--allow-lax-lq-strings [ALLOW_LAX_LQ_STRINGS]]
+                     [--allow-month-or-day-zero [ALLOW_MONTH_OR_DAY_ZERO]]
+                     [--repair-month-or-day-zero [REPAIR_MONTH_OR_DAY_ZERO]]
+                     [--minimum-valid-year MINIMUM_VALID_YEAR]
+                     [--maximum-valid-year MAXIMUM_VALID_YEAR]
+                     [--minimum-valid-lat MINIMUM_VALID_LAT]
+                     [--maximum-valid-lat MAXIMUM_VALID_LAT]
+                     [--minimum-valid-lon MINIMUM_VALID_LON]
+                     [--maximum-valid-lon MAXIMUM_VALID_LON]
+                     [--escape-list-separators [ESCAPE_LIST_SEPARATORS]]
+                     [kgtk_files [kgtk_files ...]]
+
+Validate a KGTK file. Empty lines, whitespace lines, comment lines, and lines with empty required fields are silently skipped. Header errors cause an immediate exception. Data value errors are reported. 
+
+To validate data and pass clean data to an output file or pipe, use the kgtk clean_data command.
+
+Additional options are shown in expert help.
+kgtk --expert validate --help
 
 positional arguments:
-```
-  kgtk_file             The KGTK file(s) to validate. May be omitted or '-' for stdin.
-```
+  kgtk_files            The KGTK file(s) to validate. May be omitted or '-' for stdin.
 
 optional arguments:
-```
   -h, --help            show this help message and exit
-  --blank-id-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
-                        The action to take when a blank id field is detected.
-  --blank-node1-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
-                        The action to take when a blank node1 field is detected.
-  --blank-node2-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
-                        The action to take when a blank node2 field is detected.
-  --blank-required-field-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
-                        The action to take when a line with a blank node1, node2, or
-                        id field (per mode) is detected.
+  --header-only [HEADER_ONLY]
+                        Process the only the header of the input file (default=False).
+
+Error and feedback messages:
+  Send error messages and feedback to stderr or stdout, control the amount of feedback and debugging messages.
+
+  --errors-to-stdout    Send errors to stdout instead of stderr
+  --errors-to-stderr    Send errors to stderr instead of stdout
+  --show-options        Print the options selected (default=False).
+  -v, --verbose         Print additional progress messages (default=False).
+  --very-verbose        Print additional progress messages (default=False).
+
+File options:
+  Options affecting processing
+
   --column-separator COLUMN_SEPARATOR
-                        Column separator.
-  --comment-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
-                        The action to take when a comment line is detected.
+                        Column separator (default=<TAB>).
   --compression-type COMPRESSION_TYPE
-                        Specify the input file compression type, otherwise use the
-                        extension.
-  --empty-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
-                        The action to take when an empty line is detected.
-  --errors-to-stdout    Send errors to stdout instead of stderr
+                        Specify the compression type (default=None).
   --error-limit ERROR_LIMIT
-                        The maximum number of errors to report before failing
-  --fill-short-lines    Fill missing trailing columns in short lines with empty
-                        values.
-  --force-column-names FORCE_COLUMN_NAMES [FORCE_COLUMN_NAMES ...]
-                        Force the column names.
-  --gzip-in-parallel    Execute gzip in parallel.
+                        The maximum number of errors to report before failing (default=1000)
+  --gzip-in-parallel [GZIP_IN_PARALLEL]
+                        Execute gzip in parallel (default=False).
   --gzip-queue-size GZIP_QUEUE_SIZE
-                        Queue size for parallel gzip.
+                        Queue size for parallel gzip (default=1000).
+  --mode {NONE,EDGE,NODE,AUTO}
+                        Determine the KGTK file mode (default=KgtkReaderMode.AUTO).
+
+Header parsing:
+  Options affecting header parsing
+
+  --force-column-names FORCE_COLUMN_NAMES [FORCE_COLUMN_NAMES ...]
+                        Force the column names (default=None).
   --header-error-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
-                        The action to take when a header error is detected Only ERROR
-                        or EXIT are supported.
-  --header-only Process only the header of the input file.
+                        The action to take when a header error is detected. Only ERROR or EXIT
+                        are supported (default=ValidationAction.EXIT).
+  --skip-first-record [SKIP_FIRST_RECORD]
+                        Skip the first record when forcing column names (default=False).
+  --unsafe-column-name-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a column name is unsafe
+                        (default=ValidationAction.REPORT).
+
+Line parsing:
+  Options affecting data line parsing
+
+  --repair-and-validate-lines [REPAIR_AND_VALIDATE_LINES]
+                        Repair and validate lines (default=True).
+  --repair-and-validate-values [REPAIR_AND_VALIDATE_VALUES]
+                        Repair and validate values (default=True).
+  --blank-required-field-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a line with a blank node1, node2, or id field
+                        (per mode) is detected (default=ValidationAction.EXCLUDE).
+  --comment-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when a comment line is detected
+                        (default=ValidationAction.EXCLUDE).
+  --empty-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
+                        The action to take when an empty line is detected
+                        (default=ValidationAction.EXCLUDE).
+  --fill-short-lines [FILL_SHORT_LINES]
+                        Fill missing trailing columns in short lines with empty values
+                        (default=False).
   --invalid-value-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
-                        The action to take when a a data cell value is invalid.
+                        The action to take when a data cell value is invalid
+                        (default=ValidationAction.COMPLAIN).
   --long-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
-                        The action to take when a long line is detected.
-  --mode {NONE,EDGE,NODE,AUTO}
-                        Determine the KGTK input file mode.
+                        The action to take when a long line is detected
+                        (default=ValidationAction.COMPLAIN).
   --short-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
-                        The action to take whe a short line is detected.
-  --skip-first-record   Skip the first record when forcing column names.
-  --truncate-long-lines
-                        Remove excess trailing columns in long lines.
-  --unsafe-column-name-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
-                        The action to take when a column name is unsafe.
-  -v, --verbose         Print additional progress messages.
-  --very-verbose        Print additional progress messages.
+                        The action to take when a short line is detected
+                        (default=ValidationAction.COMPLAIN).
+  --truncate-long-lines [TRUNCATE_LONG_LINES]
+                        Remove excess trailing columns in long lines (default=False).
   --whitespace-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}
-                        The action to take when a whitespace line is detected.
+                        The action to take when a whitespace line is detected
+                        (default=ValidationAction.EXCLUDE).
+
+Data value parsing:
+  Options controlling the parsing and processing of KGTK data values.
+
+  --additional-language-codes [ADDITIONAL_LANGUAGE_CODES [ADDITIONAL_LANGUAGE_CODES ...]]
+                        Additional language codes (default=None).
+  --allow-language-suffixes [ALLOW_LANGUAGE_SUFFIXES]
+                        Allow language identifier suffixes starting with a dash
+                        (default=False).
+  --allow-lax-strings [ALLOW_LAX_STRINGS]
+                        Do not check if double quotes are backslashed inside strings
+                        (default=False).
+  --allow-lax-lq-strings [ALLOW_LAX_LQ_STRINGS]
+                        Do not check if single quotes are backslashed inside language qualified
+                        strings (default=False).
+  --allow-month-or-day-zero [ALLOW_MONTH_OR_DAY_ZERO]
+                        Allow month or day zero in dates (default=False).
+  --repair-month-or-day-zero [REPAIR_MONTH_OR_DAY_ZERO]
+                        Repair month or day zero in dates (default=False).
+  --minimum-valid-year MINIMUM_VALID_YEAR
+                        The minimum valid year in dates (default=1583).
+  --maximum-valid-year MAXIMUM_VALID_YEAR
+                        The maximum valid year in dates (default=2100).
+  --minimum-valid-lat MINIMUM_VALID_LAT
+                        The minimum valid latitude (default=-90.000000).
+  --maximum-valid-lat MAXIMUM_VALID_LAT
+                        The maximum valid latitude (default=90.000000).
+  --minimum-valid-lon MINIMUM_VALID_LON
+                        The minimum valid longitude (default=-180.000000).
+  --maximum-valid-lon MAXIMUM_VALID_LON
+                        The maximum valid longitude (default=180.000000).
+  --escape-list-separators [ESCAPE_LIST_SEPARATORS]
+                        Escape all list separators instead of splitting on them
+                        (default=False).
 ```
 
-## Additional Usage Notes
-### kgtk_file
-The input file may be specified by path.  The file path “-” is reserved for standard input; omitting the input file also defaults to standard input. Multiple files may be specified.
+By default, the following rules apply:
+ - errors that occur while processing a KGTK file's column header line cause an immediate exit:
+   - An empty column name
+   - A duplicate column name
+   - A missing required column name for an edge or node file
+   - An ambiguous required column name (e.g., `id` and `ID` are both present)
+ - empty data lines are silently ignored and not passed through.
+ - data lines containing only whitespace are silently ignored and not passed through.
+ - data lines with empty required fields (node1 and node2 for KGTK edge files, id for KGTK node files) are silently ignored.
+ - data lines that have too few fields cause a complaint to be issued.
+ - data lines that have too many fields cause a complaint to be issued.
+ - lines with data value validation errors cause a complaint to be issued.
+
+These defaults may be changed through expert options.
 
-### --blank-id-line-action
-KGTK File Format v2 specifies that lines in node files that contain empty values in the id column (or an allowable alias) are to be ignored.
-
-|Action keyword|Action when condition detected|
-|--------------|------------------------------|
-|PASS|Silently allow the data line to pass through|
-|REPORT|Report the data line and let it pass through|
-|EXCLUDE|Silently exclude (ignore) the data line|
-|COMPLAIN|Report the data line and exclude (ignore) it|
-|ERROR|Raise a ValueError|
-|EXIT|sys.exit(1)|
-
-### --blank-node1-line-action
-KGTK File Format v2 specifies that lines in edge files that contain empty values in the node1 column (or an allowable alias) are to be ignored. 
-
-### --blank-node2-line-action
-KGTK File Format v2 specifies that lines in edge files that contain empty values in the node2 column (or an allowable alias) are to be ignored. 
-
-### --blank-required-field-line-action
-This option is intended for use in auto detection mode.  It supplies the default value for --blank-id-line-action for node files and the default values for --blank-node1-line-actin and 
-
-### --blank-node2-line-action for edge files.
-KGTK File Format v2 specifies that lines containing only whitespace are to be ignored. 
-
-### --column-separator
-KGTK File Format v2 specifies that columns are separated by the tab character. The column separator may be overridden to allow a different separator, such as a comma, although there may be complications, such as comma characters inside quoted strings.
-### --compression-type
-If the input path ends with one of the following extensions, it will be automatically decompressed. Alternatively, the --compression-type option may be specified to force the selection of a specific decompressor.
-
-|Extension|Decompression|
-|---------|-------------|
-|.bz2|bzip2|
-|.gz|gzip|
-|.lz4|lz4|
-|.xz|lzma|
-
-### --comment-line-action
-KGTK File Format v2 specifies that lines beginning with “#” are comment lines.
-
-### --empty-line-action
-KGTK File Format v2 specifies that empty lines (a special case of whitespace lines) should be ignored. 
-
-### --errors-to-stdout
-Error messages are normally written to stdout. This option causes error messages to be written to stdout, which is occasionally useful when debugging.
-
-### --error-limit
-Ths maximum number of errors to report before failing. The default value is 1000.
-
-### --force-column-names
-Supply a set of column names to either override the first line of the input file or to supply column headers, when missing from the input file (see --skip-first-record). The column names are a whitespace separated list.
-
-### --gzip-in-parallel
-This option runs the select decompressor or compressor in a parallel process. This currently results in degraded performance, but it may be possible to gain a performance advantage with more sophisticated inter-process communication.
-
-### --gzip-queue-size
-This is an implementation parameter for the (de)compression parallelization.
-
-### --header-error-action
-The action to take if a header error is detected, such as:
-
-- An empty column name
-- A duplicate column name
-- A missing required column name for an edge or node file
-- An ambiguous required column name (e.g., ‘id’ and ‘ID’ are both present)
-Only ERROR and EXIT actions are implemented for header errors.
-
-### --invalid-value-action
-The action to take if a data cell does not meet the data type requirements given in the KGTK File Format v2.
-
-- Numbers
-- Strings
-- Language-qualified strings
-- Date and times
-- Location coordinates
-- Symbols
-- Quantities are not recognized yet.
+## Examples
 
+Suppose that `file1.tsv` contains the following table in KGTK format:
 
-The default is to check for valid values, complain about a row with any invalid values, and continue to process the row.  If you select the PASS action, then data cell value validation will be bypassed, with significant performance benefits.
+| node1 | label | node2             |
+| john  | woke  | ^2020-05-00T00:00 |
+| john  | woke  | ^2020-05-02T00:00 |
 
-### --long-line-action
-KGTK File Format v2 specifies that data lines should have the same number of fields as there are columns.
+### Validate the data, using default options
 
-### --mode
-Determine the KGTk input file mode.
+```bash
+kgtk clean_data file1.tsv
+```
 
-|Mode|Meaning|
-|----|-------|
-|NONE|Do not require node1, node1, or id columns|
-|EDGE|Treat the input file as a KGTK edge file and require the |presence of node1 and node2 columns or their allowable aliases.
-|NODE|Treat the input file as a KGTK node file and require the presence of an id column or its allowable alias (ID).|
-|AUTO|Automatically determine if an input file is an edge file or a node file. If a node1 (or allowable alias) column is present, assume that the file is a KGTK edge file. Otherwise, assume that it is a KGTK node file|
+Standard output will get the following data:
+```
+node1   label   node2
+john    woke    ^2020-05-02T00:00
+```
 
-### --short-line-action
-KGTK File Format v2 specifies that data lines should have the same number of fields as there are columns. 
+The following complaint will be issued on standard error:
+```
+Data line 1:
+john    woke    ^2020-05-00T00:00
+col 2 (node2) value '^2020-05-00T00:00'is an Invalid Date and Times
+```
 
-### --skip-first-record
-When --force-column-names has supplied a set of column names, this option may be supplied to indicate that the forced column names should replace the first (header) line of the input file.
+The first data line was excluded because it contained "00" in the day
+field, which violates the ISO 8601 specification.
 
-### --unsafe-column-name
-The action to take if a header column name contains one of the following:
-- Leading white space
-- Trailing white space
-- Internal white space except in strings or language-qualified strings
-- Commas
-- Vertical bars
-- Semicolons
-  
-### --whitespace-line-action
-KGTK File Format v2 specifies that data lines containing only whitespace characters should be ignored. 
+### Clean the data, repairing the invalid date/time string
+Change day "00" to day "01:
 
-## Examples
-In this example, the input file has spaces instead of tabs in the header line.
 ```bash
-python3 -m kgtk validate -v ../../drive/datasets/edges-v2-property-stats-labeled.tsv
-Validating '../../drive/datasets/edges-v2-property-stats-labeled.tsv'
-KgtkReader: File_path.suffix: .tsv
-KgtkReader: reading file ../../drive/datasets/edges-v2-property-stats-labeled.tsv
-header: id      count   label
-In input header 'id      count   label': Column name 'id      count   label' contains internal white space
-node1 column not found, assuming this is a KGTK node file
-In input header 'id      count   label': Missing required column: id | ID
-Exit requested
+kgtk clean_data file1.tsv --repair-month-or-day-zero
 ```
 
-In this example, some of the data lines are missing columns.
-```bash
-Validating '../../drive/datasets/edges-v3-short-ids-extra-columns.tsv.gz'
-KgtkReader: File_path.suffix: .gz
-KgtkReader: reading gzip ../../drive/datasets/edges-v3-short-ids-extra-columns.tsv.gz
-header: id      node1   label   node2   magnitude       unit    lower   upper   latitude        longitude       precision       calendar        entity-type
-node1 column found, this is a KGTK edge file
-KgtkReader: Reading an edge file. node1=1 label=2 node2=3
-In input data line 1445572, Required 13 columns, saw 4: '__1445572      Q503323 P3625   &"': __1445572  Q503323 P3625   &"
-In input data line 1445582, saw an empty line:
-In input data line 1445583, Required 13 columns, saw 1: 'Q503323': Q503323
-In input data line 1445584, Required 13 columns, saw 2: '       P2859':         P2859
-In input data line 1445585, Required 13 columns, saw 11: '      6"                                                                      ':      6"
-In input data line 2237571, Required 13 columns, saw 4: '__2237558      Q864677 P3625   r"': __2237558  Q864677 P3625   r"
-In input data line 2237581, saw an empty line:
-In input data line 2237582, Required 13 columns, saw 1: 'Q864677': Q864677
-In input data line 2237583, Required 13 columns, saw 2: '       P3917':         P3917
-In input data line 2237584, Required 13 columns, saw 2: '       +123':  +123
-In input data line 2237585, Required 13 columns, saw 2: '       +123':  +123
-In input data line 2237594, saw an empty line:
+Standard output will get the following data, and no errors will be issued:
+```
+node1   label   node2
+john    woke    ^2020-05-01T00:00
+john    woke    ^2020-05-02T00:00
 ```
-In this example, the KGTk file starts with a comment instead of a header line.
-
-```bash
-Validating '../../drive/datasets/edges-v2-property-stats.tsv'
-KgtkReader: File_path.suffix: .tsv
-KgtkReader: reading file ../../drive/datasets/edges-v2-property-stats.tsv
-header: # > date; zcat edges-v2.csv.gz | cut -f 3 | sort | uniq -c > property-stats.log; date
-In input header '# > date; zcat edges-v2.csv.gz | cut -f 3 | sort | uniq -c > property-stats.log; date': Column name '# > date; zcat edges-v2.csv.gz | cut -f 3 | sort | uniq -c > property-stats.log; date' contains internal white space, Column name '# > date; zcat edges-v2.csv.gz | cut -f 3 | sort | uniq -c > property-stats.log; date' contains a vertical bar (|), Column name '# > date; zcat edges-v2.csv.gz | cut -f 3 | sort | uniq -c > property-stats.log; date' contains a semicolon (;)
-node1 column not found, assuming this is a KGTK node file
-In input header '# > date; zcat edges-v2.csv.gz | cut -f 3 | sort | uniq -c > property-stats.log; date': Missing required column: id | ID
-Command exited with non-zero status 1
-```
\ No newline at end of file
diff --git a/kgtk/cli/cat.py b/kgtk/cli/cat.py
index a36a244af..328c687d7 100644
--- a/kgtk/cli/cat.py
+++ b/kgtk/cli/cat.py
@@ -17,7 +17,7 @@
 
 def parser():
     return {
-        'help': 'Concatenate KGTK files',
+        'help': 'Concatenate KGTK files.',
         'description': 'Concatenate two or more KGTK files, merging the columns appropriately. ' +
         'All files must be KGTK edge files or all files must be KGTK node files (unless overridden with --mode=NONE). ' +
         '\n\nAdditional options are shown in expert help.\nkgtk --expert cat --help'
diff --git a/kgtk/cli/clean_data.py b/kgtk/cli/clean_data.py
index a19e487d4..8f9ea5252 100644
--- a/kgtk/cli/clean_data.py
+++ b/kgtk/cli/clean_data.py
@@ -18,7 +18,11 @@
 
 def parser():
     return {
-        'help': 'Validate a KGTK file and output a clean copy: no comments, whitespace lines, invalid lines, etc. '
+        'help': 'Validate a KGTK file and output a clean copy: no comments, whitespace lines, invalid lines, etc. ',
+        'description': 'Validate a KGTK file and output a clean copy. ' +
+        'Empty lines, whitespace lines, comment lines, and lines with empty required fields are silently skipped. ' +
+        'Header errors cause an immediate exception. Data value errors are reported and the line containing them skipped. ' +
+        '\n\nAdditional options are shown in expert help.\nkgtk --expert clean_data --help'
     }
 
 
diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 7bdad59e2..950b9c39d 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -23,7 +23,15 @@
 
 def parser():
     return {
-        'help': 'Validate a KGTK file '
+        'help': 'Validate a KGTK file ',
+        
+        'description': 'Validate a KGTK file. ' +
+        'Empty lines, whitespace lines, comment lines, and lines with empty required fields are silently skipped. ' +
+        'Header errors cause an immediate exception. Data value errors are reported. ' +
+
+        '\n\nTo validate data and pass clean data to an output file or pipe, use the kgtk clean_data command.' +
+
+        '\n\nAdditional options are shown in expert help.\nkgtk --expert validate --help'
     }
 
 
@@ -38,7 +46,7 @@ def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Names
     parser.add_argument(      "kgtk_files", nargs="*", help="The KGTK file(s) to validate. May be omitted or '-' for stdin.", type=Path)
 
     parser.add_argument(      "--header-only", dest="header_only",
-                              help="Process the only the header of the input file.",
+                              help="Process the only the header of the input file (default=%(default)s).",
                               type=optional_bool, nargs='?', const=True, default=False)
 
     KgtkReader.add_debug_arguments(parser, expert=_expert)
diff --git a/kgtk/join/kgtkmergecolumns.py b/kgtk/join/kgtkmergecolumns.py
index d2d4eef5d..2a57f7d73 100644
--- a/kgtk/join/kgtkmergecolumns.py
+++ b/kgtk/join/kgtkmergecolumns.py
@@ -53,7 +53,7 @@ def merge(self, column_names: typing.List[str], prefix: typing.Optional[str]=Non
                 if self.id_column_idx >= 0:
                     column_name = self.column_names[self.id_column_idx]
                 else:
-                    self.idx_column_idx = len(self.column_names)
+                    self.id_column_idx = len(self.column_names)
 
             elif column_name in KgtkFormat.NODE1_COLUMN_NAMES:
                 if self.node1_column_idx >= 0:

From 808d7230cf7f6ddae5b7e7bed582012a85fba865 Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Wed, 20 May 2020 17:44:33 -0700
Subject: [PATCH 258/278] added join

---
 docs/join.md | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 mkdocs.yml   |  1 +
 2 files changed, 59 insertions(+)
 create mode 100644 docs/join.md

diff --git a/docs/join.md b/docs/join.md
new file mode 100644
index 000000000..fcb869ecd
--- /dev/null
+++ b/docs/join.md
@@ -0,0 +1,58 @@
+
+Join two KGTK edge files or two KGTK node files.
+
+## Usage
+```
+kgtk join [-h] [--join-on-label [JOIN_ON_LABEL]]
+                 [--join-on-node2 [JOIN_ON_NODE2]]
+                 [--left-file-join-columns LEFT_JOIN_COLUMNS [LEFT_JOIN_COLUMNS ...]]
+                 [--left-join [LEFT_JOIN]] [-o OUTPUT_FILE_PATH]
+                 [--prefix PREFIX]
+                 [--right-file-join-columns RIGHT_JOIN_COLUMNS [RIGHT_JOIN_COLUMNS ...]]
+                 [--right-join [RIGHT_JOIN]] [-v]
+                 left_file_path right_file_path
+```
+## Usage considerations
+
+* Join keys are extracted from one or both input files and stored in memory,
+then the data files are processed in a second pass.  
+* stdin will not work as an input file if join keys are needed from it.
+* The output file contains the union of the columns in the two
+input files, adjusted for predefined name aliasing.
+* Specify --left-join to get a left outer join.
+* Specify --right-join to get a right outer join.
+* Specify both to get a full outer join (equivalent to cat).
+* Specify neither to get an inner join.
+* By default, node files are joined on the id column, while edge files are joined on the node1 column. The label and node2 columns may be added to the edge file join criteria.  Alternatively, the left and right file join columns may be
+listed explicitly.
+
+To join an edge file to a node file, or to join quasi-KGTK files, use the
+following option (enable expert mode for more information):
+```
+--mode=NONE
+Expert mode provides additional command arguments.
+positional arguments:
+  left_file_path        The left-side KGTK file to join (required).
+  right_file_path       The right-side KGTK file to join (required).
+optional arguments:
+  -h, --help            show this help message and exit
+  --join-on-label [JOIN_ON_LABEL]
+                        If both input files are edge files, include the label
+                        column in the join (default=False).
+  --join-on-node2 [JOIN_ON_NODE2]
+                        If both input files are edge files, include the node2
+                        column in the join (default=False).
+  --left-file-join-columns LEFT_JOIN_COLUMNS [LEFT_JOIN_COLUMNS ...]
+                        Left file join columns (default=None).
+  --left-join [LEFT_JOIN]
+                        Perform a left outer join (default=False).
+  -o OUTPUT_FILE_PATH, --output-file OUTPUT_FILE_PATH
+                        The KGTK file to write (default=-).
+  --prefix PREFIX       An optional prefix applied to right file column names
+                        in the output file (default=None).
+  --right-file-join-columns RIGHT_JOIN_COLUMNS [RIGHT_JOIN_COLUMNS ...]
+                        Right file join columns (default=None).
+  --right-join [RIGHT_JOIN]
+                        Perform a right outer join (default=False).
+  -v, --verbose         Print additional progress messages (default=False).
+  ```
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
index 5abb70201..ff3b94f65 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -14,6 +14,7 @@ nav:
       - 'import_ntriples': 'import_ntriples.md'
       - 'import_wikidata': 'import_wikidata.md'
       - 'instances': 'instances.md'  
+      - 'join': 'join.md'
       - 'reachable_nodes': 'reachable_nodes.md'
       - 'remove_columns': 'remove_col.md'
       - 'sort': 'sort.md'

From f7c2af4d0d02a454a43c346636853e6007de5fa4 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 17:46:50 -0700
Subject: [PATCH 259/278] Fix bad indent in validate.  Update the
 documentation.

---
 docs/validate.md     | 27 +++++++++------------------
 kgtk/cli/validate.py | 34 +++++++++++++++++-----------------
 2 files changed, 26 insertions(+), 35 deletions(-)

diff --git a/docs/validate.md b/docs/validate.md
index 16d42de5a..2e5a97554 100644
--- a/docs/validate.md
+++ b/docs/validate.md
@@ -194,35 +194,26 @@ Suppose that `file1.tsv` contains the following table in KGTK format:
 ### Validate the data, using default options
 
 ```bash
-kgtk clean_data file1.tsv
+kgtk validate file1.tsv
 ```
 
-Standard output will get the following data:
-```
-node1   label   node2
-john    woke    ^2020-05-02T00:00
-```
-
-The following complaint will be issued on standard error:
+The following complaint will be issued:
 ```
 Data line 1:
 john    woke    ^2020-05-00T00:00
 col 2 (node2) value '^2020-05-00T00:00'is an Invalid Date and Times
 ```
 
-The first data line was excluded because it contained "00" in the day
+The first data line was flagged because it contained "00" in the day
 field, which violates the ISO 8601 specification.
 
-### Clean the data, repairing the invalid date/time string
-Change day "00" to day "01:
+### Allow month or day zero
+Instruct the validator to accept month or day 00, even though
+this is not allowed in ISO 6801.
 
 ```bash
-kgtk clean_data file1.tsv --repair-month-or-day-zero
+kgtk validate file1.tsv --allow-month-or-day-zero
 ```
+This results in no error messages.
+
 
-Standard output will get the following data, and no errors will be issued:
-```
-node1   label   node2
-john    woke    ^2020-05-01T00:00
-john    woke    ^2020-05-02T00:00
-```
diff --git a/kgtk/cli/validate.py b/kgtk/cli/validate.py
index 950b9c39d..2b47e7f61 100644
--- a/kgtk/cli/validate.py
+++ b/kgtk/cli/validate.py
@@ -94,24 +94,24 @@ def run(kgtk_files: typing.Optional[typing.List[typing.Optional[Path]]],
                 else:
                     print ("Validating from stdin", file=error_file, flush=True)
 
-                kr: KgtkReader = KgtkReader.open(kgtk_file,
-                                                 error_file=error_file,
-                                                 options=reader_options,
-                                                 value_options=value_options,
-                                                 verbose=verbose,
-                                                 very_verbose=very_verbose)
+            kr: KgtkReader = KgtkReader.open(kgtk_file,
+                                             error_file=error_file,
+                                             options=reader_options,
+                                             value_options=value_options,
+                                             verbose=verbose,
+                                             very_verbose=very_verbose)
         
-                if header_only:
-                    kr.close()
-                    if verbose:
-                        print("Validated the header only.", file=error_file, flush=True)
-                else:
-                    line_count: int = 0
-                    row: typing.List[str]
-                    for row in kr:
-                        line_count += 1
-                    if verbose:
-                        print("Validated %d data lines" % line_count, file=error_file, flush=True)
+            if header_only:
+                kr.close()
+                if verbose:
+                    print("Validated the header only.", file=error_file, flush=True)
+            else:
+                line_count: int = 0
+                row: typing.List[str]
+                for row in kr:
+                    line_count += 1
+                if verbose:
+                    print("Validated %d data lines" % line_count, file=error_file, flush=True)
         return 0
 
     except SystemExit as e:

From 2b36fcdaa264a443b759d38a084bbcdf36af1362 Mon Sep 17 00:00:00 2001
From: Daniel Garijo <dgarijo@isi.edu>
Date: Wed, 20 May 2020 17:47:07 -0700
Subject: [PATCH 260/278] Update join.md

---
 docs/join.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/join.md b/docs/join.md
index fcb869ecd..778ffcf9e 100644
--- a/docs/join.md
+++ b/docs/join.md
@@ -28,6 +28,7 @@ listed explicitly.
 
 To join an edge file to a node file, or to join quasi-KGTK files, use the
 following option (enable expert mode for more information):
+
 ```
 --mode=NONE
 Expert mode provides additional command arguments.
@@ -55,4 +56,4 @@ optional arguments:
   --right-join [RIGHT_JOIN]
                         Perform a right outer join (default=False).
   -v, --verbose         Print additional progress messages (default=False).
-  ```
\ No newline at end of file
+```
\ No newline at end of file

From 77bb191874c6134362780a520ef209076e002fd7 Mon Sep 17 00:00:00 2001
From: Filip Ilievski <filip.dbrsk@gmail.com>
Date: Wed, 20 May 2020 17:48:53 -0700
Subject: [PATCH 261/278] example notebooks

---
 examples/Example1.ipynb    | 137 ++++++++++++++++++++++++
 examples/Example2.ipynb    | 208 +++++++++++++++++++++++++++++++++++++
 examples/Example3.ipynb    | 119 +++++++++++++++++++++
 kgtk/cli/remove_columns.py |   4 +-
 4 files changed, 466 insertions(+), 2 deletions(-)
 create mode 100644 examples/Example1.ipynb
 create mode 100644 examples/Example2.ipynb
 create mode 100644 examples/Example3.ipynb

diff --git a/examples/Example1.ipynb b/examples/Example1.ipynb
new file mode 100644
index 000000000..f2b60f2af
--- /dev/null
+++ b/examples/Example1.ipynb
@@ -0,0 +1,137 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Example Scenario 1: Obtaining embeddings for strong relations\n",
+    "\n",
+    "*Alice wishes to import the English subset of ConceptNet in KGTK format. Then, she would extract a subset of ConceptNet where two concepts are connected with a precise semantic relation, like `Causes` or `UsedFor` (as opposed to weaker relations like `/r/RelatedTo`). Text embeddings would be computed for all nodes in this subset, and saved in a file called `emb.txt`.*"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Preparation\n",
+    "\n",
+    "To run this notebook, Alice would need the ConceptNet graph file. We will work with the latest ConceptNet, v5.7.0. Presumably, this file is not present on Alice's laptop, so we need to download and unpack it first (note: mac users might need to install `wget` first: `brew install wget`):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "wget https://s3.amazonaws.com/conceptnet/downloads/2019/edges/conceptnet-assertions-5.7.0.csv.gz"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "gunzip conceptnet-assertions-5.7.0.csv.gz"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Implementation in KGTK"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "kgtk import_conceptnet --english_only examples/conceptnet-assertions-5.7.0.csv / \\\n",
+    "            filter -p \" ; /r/Causes,/r/UsedFor,/r/Synonym,/r/DefinedAs,/r/IsA ; \" > tmp.tsv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "kgtk sort -c 1,2,3 -o sorted.tsv tmp.tsv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 674/674 [01:08<00:00,  9.90it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%bash\n",
+    "kgtk text_embedding --debug --embedding-projector-metadata-path none \\\n",
+    "                    --embedding-projector-metadata-path none \\\n",
+    "                    --label-properties \"/r/Synonym\" \\\n",
+    "                    --isa-properties \"/r/IsA\" \\\n",
+    "                    --description-properties \"/r/DefinedAs\" \\\n",
+    "                    --property-value \"/r/Causes\" \"/r/UsedFor\" \\\n",
+    "                    --has-properties \"\" \\\n",
+    "                    -f kgtk_format \\\n",
+    "                    --output-format kgtk_format \\\n",
+    "                    --use-cache \\\n",
+    "                    --model bert-large-nli-cls-token -i sorted.tsv > emb.txt\n",
+    "                    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Remarks\n",
+    "\n",
+    "* sort does not work with pipes\n",
+    "* embeddings use unnatural relations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/Example2.ipynb b/examples/Example2.ipynb
new file mode 100644
index 000000000..1a526bb5c
--- /dev/null
+++ b/examples/Example2.ipynb
@@ -0,0 +1,208 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Example Scenario 2: Validation and statistics on a subset of Wikidata\n",
+    "\n",
+    "*Bob wants to extract a subset of Wikidata with the `instanceof (P31)`  property, validate it, and compute  its statistics, including  centrality metrics.*"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Preparation\n",
+    "\n",
+    "To run this notebook, Bob would need the Wikidata edges file. We will work with version ` ` of Wikidata. Presumably, this file is not present on Bob's laptop, so we need to download and unpack it first (note: mac users might need to install `wget` first: `brew install wget`):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "gunzip wikidata_edges_20200504.tsv.gz"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternatively, you can download the Wikidata json.bz2 file and then use the command \n",
+    "\n",
+    "`kgtk import_wikidata -i wikidata-20200504-all.json.bz2 --node wikidata_nodes_20200504.tsv --edge wikidata_edges_20200504.tsv -qual wikidata_qualifiers_20200504.tsv`\n",
+    "\n",
+    "This takes 11 hours."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Implementation in KGTK\n",
+    "\n",
+    "**Assume that the Wikidata file has already been transformed to KGTK format.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "env: obsolete=id,rank,node2;magnitude,node2;unit,node2;item,node2;lower,node2;upper,node2;entity-type,node2;longitude,node2;latitude,node2;date,node2;calendar,node2;precision\n"
+     ]
+    }
+   ],
+   "source": [
+    "%env obsolete=id,rank,node2;magnitude,node2;unit,node2;item,node2;lower,node2;upper,node2;entity-type,node2;longitude,node2;latitude,node2;date,node2;calendar,node2;precision"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "id,rank,node2;magnitude,node2;unit,node2;item,node2;lower,node2;upper,node2;entity-type,node2;longitude,node2;latitude,node2;date,node2;calendar,node2;precision\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%bash\n",
+    "echo \"$obsolete\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "kgtk clean_data --error-limit 1000000 wikidata_edges_20200504.tsv / remove_columns -c \"$obsolete\"  / filter -p ' ; P31 ; ' > graph.tsv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "node1\tproperty\tnode2\tid\n",
+      "Q8\tQ8\tQ331769\tQ8-Q8-0\n",
+      "Q8\tQ8\tQ60539479\tQ8-Q8-1\n",
+      "Q8\tQ8\tQ9415\tQ8-Q8-2\n",
+      "Q24\tQ24\tQ15632617\tQ24-Q24-3\n",
+      "Q24\tQ24\tQ15773317\tQ24-Q24-4\n",
+      "Q1868\tQ1868\tQ5\tQ1868-Q1868-5\n",
+      "Q45\tQ45\tQ3624078\tQ45-Q45-6\n",
+      "Q45\tQ45\tQ6256\tQ45-Q45-7\n",
+      "Q45\tQ45\tQ20181813\tQ45-Q45-8\n",
+      "Q8\tvertex_in_degree\t0\tQ8-vertex_in_degree-0\n",
+      "Q8\tvertex_out_degree\t3\tQ8-vertex_out_degree-1\n",
+      "Q8\tvertex_pagerank\t0.060975257748869374\tQ8-vertex_pagerank-2\n",
+      "Q331769\tvertex_in_degree\t1\tQ331769-vertex_in_degree-3\n",
+      "Q331769\tvertex_out_degree\t0\tQ331769-vertex_out_degree-4\n",
+      "Q331769\tvertex_pagerank\t0.07825156317735428\tQ331769-vertex_pagerank-5\n",
+      "Q60539479\tvertex_in_degree\t1\tQ60539479-vertex_in_degree-6\n",
+      "Q60539479\tvertex_out_degree\t0\tQ60539479-vertex_out_degree-7\n",
+      "Q60539479\tvertex_pagerank\t0.07825156317735428\tQ60539479-vertex_pagerank-8\n",
+      "Q9415\tvertex_in_degree\t1\tQ9415-vertex_in_degree-9\n",
+      "Q9415\tvertex_out_degree\t0\tQ9415-vertex_out_degree-10\n",
+      "Q9415\tvertex_pagerank\t0.07825156317735428\tQ9415-vertex_pagerank-11\n",
+      "Q24\tvertex_in_degree\t0\tQ24-vertex_in_degree-12\n",
+      "Q24\tvertex_out_degree\t2\tQ24-vertex_out_degree-13\n",
+      "Q24\tvertex_pagerank\t0.060975257748869374\tQ24-vertex_pagerank-14\n",
+      "Q15632617\tvertex_in_degree\t1\tQ15632617-vertex_in_degree-15\n",
+      "Q15632617\tvertex_out_degree\t0\tQ15632617-vertex_out_degree-16\n",
+      "Q15632617\tvertex_pagerank\t0.08688971589159675\tQ15632617-vertex_pagerank-17\n",
+      "Q15773317\tvertex_in_degree\t1\tQ15773317-vertex_in_degree-18\n",
+      "Q15773317\tvertex_out_degree\t0\tQ15773317-vertex_out_degree-19\n",
+      "Q15773317\tvertex_pagerank\t0.08688971589159675\tQ15773317-vertex_pagerank-20\n",
+      "Q1868\tvertex_in_degree\t0\tQ1868-vertex_in_degree-21\n",
+      "Q1868\tvertex_out_degree\t1\tQ1868-vertex_out_degree-22\n",
+      "Q1868\tvertex_pagerank\t0.060975257748869374\tQ1868-vertex_pagerank-23\n",
+      "Q5\tvertex_in_degree\t1\tQ5-vertex_in_degree-24\n",
+      "Q5\tvertex_out_degree\t0\tQ5-vertex_out_degree-25\n",
+      "Q5\tvertex_pagerank\t0.1128041740343241\tQ5-vertex_pagerank-26\n",
+      "Q45\tvertex_in_degree\t0\tQ45-vertex_in_degree-27\n",
+      "Q45\tvertex_out_degree\t3\tQ45-vertex_out_degree-28\n",
+      "Q45\tvertex_pagerank\t0.060975257748869374\tQ45-vertex_pagerank-29\n",
+      "Q3624078\tvertex_in_degree\t1\tQ3624078-vertex_in_degree-30\n",
+      "Q3624078\tvertex_out_degree\t0\tQ3624078-vertex_out_degree-31\n",
+      "Q3624078\tvertex_pagerank\t0.07825156317735428\tQ3624078-vertex_pagerank-32\n",
+      "Q6256\tvertex_in_degree\t1\tQ6256-vertex_in_degree-33\n",
+      "Q6256\tvertex_out_degree\t0\tQ6256-vertex_out_degree-34\n",
+      "Q6256\tvertex_pagerank\t0.07825156317735428\tQ6256-vertex_pagerank-35\n",
+      "Q20181813\tvertex_in_degree\t1\tQ20181813-vertex_in_degree-36\n",
+      "Q20181813\tvertex_out_degree\t0\tQ20181813-vertex_out_degree-37\n",
+      "Q20181813\tvertex_pagerank\t0.07825156317735428\tQ20181813-vertex_pagerank-38\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%bash\n",
+    "kgtk graph_statistics --directed --degrees --pagerank graph.tsv"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Issues\n",
+    "\n",
+    "* import of wikidata file to kgtk\n",
+    "* add the file somewhere downloadable"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/Example3.ipynb b/examples/Example3.ipynb
new file mode 100644
index 000000000..ce0b2415b
--- /dev/null
+++ b/examples/Example3.ipynb
@@ -0,0 +1,119 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Example Scenario 1: Obtaining embeddings for strong relations\n",
+    "\n",
+    "*Carol would like to combine two subsets of Wikidata: one containing all people, and the other containing all edges that point to USA. The combined  file  needs to be sorted by subject, after which she would compute the set of reachable nodes via the properties `instance of (P31)` or `subclass of (P279)`.*"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Preparation\n",
+    "\n",
+    "To run this notebook, Carol would need the Wikidata edges file. We will work with the Wikidata file we extracted in `Example2` for Bob."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Implementation in KGTK\n",
+    "\n",
+    "First, Carol needs to extract the two subsets with the `filter` operation:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "    kgtk filter -p ' ;  P31 ; Q5' wikidata_edges_20200504.tsv > people.tsv\n",
+    "    kgtk filter -p ' ;  ; Q30' wikidata_edges_20200504.tsv > usa.tsv"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then, she can merge the two files into one and perform further operations:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "kgtk zconcat -o merged.tsv people.tsv usa.tsv "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Missing pieces\n",
+    "* get the wikidata file\n",
+    "* perhaps make it a tsv\n",
+    "* concatenate two files - test\n",
+    "* sort test\n",
+    "* reachable nodes test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "kgtk sort -c \"node1,label\" merged.tsv > cat.tsv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "kgtk reachable_nodes cat.tsv --subj 1 --pred 2 --obj 3 --props P31,P279 --root Q1001698 > cat2.tsv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/kgtk/cli/remove_columns.py b/kgtk/cli/remove_columns.py
index 94ea856a3..14b75b713 100644
--- a/kgtk/cli/remove_columns.py
+++ b/kgtk/cli/remove_columns.py
@@ -35,10 +35,10 @@ def run(datatype, columns, input):
 		if input:
 
 			print(input)
-			sh.mlr('--tsv', 'cut', '-x', '-f', columns, 
+			sh.mlr('--tsvlite', 'cut', '-x', '-f', columns, 
 					input, _out=sys.stdout, _err=sys.stderr)
 		elif not sys.stdin.isatty():
-			print(sh.mlr('--tsv', 'cut', '-x', '-f', columns,
+			print(sh.mlr('--tsvlite', 'cut', '-x', '-f', columns,
 						 _in=sys.stdin, _out=sys.stdout, _err=sys.stderr))
 		else:
 			raise KGTKException

From 5324a149cf99ab68b76b6810926998670e0e9e95 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 18:03:43 -0700
Subject: [PATCH 262/278] Update some documents.

---
 docs/clean_data.md | 13 ++++++++
 docs/validate.md   | 82 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)

diff --git a/docs/clean_data.md b/docs/clean_data.md
index 2a3f98c8c..a33175382 100644
--- a/docs/clean_data.md
+++ b/docs/clean_data.md
@@ -23,6 +23,7 @@ optional arguments:
   -v, --verbose  Print additional progress messages (default=False).
 
 ```
+
 Expert help:
 ```
 usage: kgtk clean_data [-h] [--errors-to-stdout | --errors-to-stderr] [--show-options] [-v]
@@ -180,6 +181,7 @@ Data value parsing:
                         (default=False).
 ```
 
+### Default Rules
 By default, the following rules apply:
  - errors that occur while processing a KGTK file's column header line cause an immediate exit:
    - An empty column name
@@ -195,6 +197,17 @@ By default, the following rules apply:
 
 These defaults may be changed through expert options.
 
+### Action Codes
+
+| Action keyword | Action when condition detected |
+| -------------- | ------------------------------ |
+| PASS           | Silently allow the data line to pass through |
+| REPORT         | Report the data line and let it pass through |
+| EXCLUDE        | Silently exclude (ignore) the data line |
+| COMPLAIN       | Report the data line and exclude (ignore) it |
+| ERROR          | Raise a ValueError |
+| EXIT           | sys.exit(1) |
+
 ## Examples
 
 Suppose that `file1.tsv` contains the following table in KGTK format:
diff --git a/docs/validate.md b/docs/validate.md
index 2e5a97554..682c99661 100644
--- a/docs/validate.md
+++ b/docs/validate.md
@@ -8,6 +8,29 @@ The expert option --compression-type may be used to override the
 decompression selectin algorithim;  this is useful when reading from piped input.
 
 ## Usage
+```
+usage: kgtk validate [-h] [--header-only [HEADER_ONLY]] [-v] [kgtk_files [kgtk_files ...]]
+
+Validate a KGTK file. Empty lines, whitespace lines, comment lines, and lines with empty required fields are silently skipped. Header errors cause an immediate exception. Data value errors are reported. 
+
+To validate data and pass clean data to an output file or pipe, use the kgtk clean_data command.
+
+Additional options are shown in expert help.
+kgtk --expert validate --help
+
+positional arguments:
+  kgtk_files            The KGTK file(s) to validate. May be omitted or '-' for stdin.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --header-only [HEADER_ONLY]
+                        Process the only the header of the input file (default=False).
+
+  -v, --verbose         Print additional progress messages (default=False).
+```
+
+Expert help:
+
 ```
 usage: kgtk validate [-h] [--header-only [HEADER_ONLY]]
                      [--errors-to-stdout | --errors-to-stderr] [--show-options] [-v]
@@ -168,6 +191,7 @@ Data value parsing:
                         (default=False).
 ```
 
+### Default Rules
 By default, the following rules apply:
  - errors that occur while processing a KGTK file's column header line cause an immediate exit:
    - An empty column name
@@ -183,6 +207,43 @@ By default, the following rules apply:
 
 These defaults may be changed through expert options.
 
+### Action Codes
+
+| Action keyword | Action when condition detected |
+| -------------- | ------------------------------ |
+| PASS           | Silently allow the data line to pass through |
+| REPORT         | Report the data line and let it pass through |
+| EXCLUDE        | Silently exclude (ignore) the data line |
+| COMPLAIN       | Report the data line and exclude (ignore) it |
+| ERROR          | Raise a ValueError |
+| EXIT           | sys.exit(1) |
+
+### --header-error-action
+The action to take if a header error is detected, such as:
+
+- An empty column name
+- A duplicate column name
+- A missing required column name for an edge or node file
+- An ambiguous required column name (e.g., ‘id’ and ‘ID’ are both present)
+Only ERROR and EXIT actions are implemented for header errors.
+
+### --unsafe-column-name
+The action to take if a header column name contains one of the following:
+- Leading white space
+- Trailing white space
+- Internal white space except in strings or language-qualified strings
+- Commas
+- Vertical bars
+
+### KGTK File Mode
+
+|Mode|Meaning|
+|----|-------|
+|NONE|Do not require node1, node1, or id columns|
+|EDGE|Treat the input file as a KGTK edge file and require the |presence of node1 and node2 columns or their allowable aliases.
+|NODE|Treat the input file as a KGTK node file and require the presence of an id column or its allowable alias (ID).|
+|AUTO|Automatically determine if an input file is an edge file or a node file. If a node1 (or allowable alias) column is present, assume that the file is a KGTK edge file. Otherwise, assume that it is a KGTK node file|
+
 ## Examples
 
 Suppose that `file1.tsv` contains the following table in KGTK format:
@@ -216,4 +277,25 @@ kgtk validate file1.tsv --allow-month-or-day-zero
 ```
 This results in no error messages.
 
+### Validate with verbose feedback
+
+Sometimes you may wish to get more feedback about what kgtk verbose is
+doing.
+
+```bash
+kgtk validate file1.tsv --allow-month-or-day-zero --verbose
+```
+
+This results in the following output:
+```
+====================================================
+Validating 'kgtk/join/test/clean_data-file1.tsv'
+KgtkReader: File_path.suffix: .tsv
+KgtkReader: reading file kgtk/join/test/clean_data-file1.tsv
+header: node1   label   node2
+node1 column found, this is a KGTK edge file
+KgtkReader: Special columns: node1=0 label=1 node2=2 id=-1
+KgtkReader: Reading an edge file.
+Validated 2 data lines
 
+```

From 87af6e99abf83f0296a7c3c0883f970f18c1b312 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 19:49:44 -0700
Subject: [PATCH 263/278] Documentation for ifempty and ifnotempty.  Fixed a
 parsing bug.  Added test files.

---
 docs/ifempty.md                     | 81 +++++++++++++++++++++++++++++
 kgtk/cli/ifempty.py                 |  4 +-
 kgtk/cli/ifnotempty.py              |  2 +-
 kgtk/join/test/clean_data-file1.tsv |  3 ++
 kgtk/join/test/ifempty-file1.tsv    |  7 +++
 kgtk/join/test/ifnotempty-file1.tsv |  7 +++
 kgtk/join/test/short-line-file1.tsv |  7 +++
 7 files changed, 108 insertions(+), 3 deletions(-)
 create mode 100644 docs/ifempty.md
 create mode 100644 kgtk/join/test/clean_data-file1.tsv
 create mode 100644 kgtk/join/test/ifempty-file1.tsv
 create mode 100644 kgtk/join/test/ifnotempty-file1.tsv
 create mode 100644 kgtk/join/test/short-line-file1.tsv

diff --git a/docs/ifempty.md b/docs/ifempty.md
new file mode 100644
index 000000000..4782e1803
--- /dev/null
+++ b/docs/ifempty.md
@@ -0,0 +1,81 @@
+The ifempty command filters KGTK files, passing through only those rows for
+which one (or more) specified columns contain empty values.  When multiple
+columns are specified, there is the choice of requiring any of the columns to
+be empty or all of the columns to be empty.
+
+Optionally, report the count of rows that passed the filter instead of
+copying the rows to the output file.
+
+## Usage
+
+```bash
+usage: kgtk ifempty [-h] [--columns FILTER_COLUMN_NAMES [FILTER_COLUMN_NAMES ...]]
+                    [--count [ONLY_COUNT]] [-o OUTPUT_KGTK_FILE] [--all [ALL_ARE]] [-v]
+                    [input_kgtk_file]
+
+Filter a KGTK file based on whether one or more fields are empty. When multiple fields are specified, either any field or all fields must be empty.
+
+positional arguments:
+  input_kgtk_file       The KGTK file to filter. May be omitted or '-' for stdin.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --columns FILTER_COLUMN_NAMES [FILTER_COLUMN_NAMES ...]
+                        The columns in the file being filtered (Required).
+  --count [ONLY_COUNT]  Only count the records, do not copy them. (default=False).
+  -o OUTPUT_KGTK_FILE, --output-file OUTPUT_KGTK_FILE
+                        The KGTK file to write (default=-).
+  --all [ALL_ARE]       False: Test if any are empty, True: test if all are empty
+                        (default=False).
+
+  -v, --verbose         Print additional progress messages (default=False).
+```
+
+## Examples
+
+Suppose that `file1.tsv` contains the following table in KGTK format:
+
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12345 | home     | 10    |
+| john  | zipcode | 12346 |          |       |
+| peter | zipcode | 12040 | home     |       |
+| peter | zipcode | 12040 | work     | 6     |
+| steve | zipcode | 45600 |          | 3     |
+| steve | zipcode | 45601 | work     |       |
+
+```bash
+kgtk ifempty file1.tsv --columns location
+```
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12346 |          |       |
+| steve | zipcode | 45600 |          | 3     |
+
+
+```bash
+kgtk ifempty file1.tsv --columns years
+```
+
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12346 |          |       |
+| peter | zipcode | 12040 | home     |       |
+| steve | zipcode | 45601 | work     |       |
+
+```bash
+kgtk ifempty file1.tsv --columns location years
+```
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12346 |          |       |
+| peter | zipcode | 12040 | home     |       |
+| steve | zipcode | 45600 |          | 3     |
+| steve | zipcode | 45601 | work     |       |
+
+```bash
+kgtk ifempty file1.tsv --all --columns location years
+```
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12346 |          |       |
diff --git a/kgtk/cli/ifempty.py b/kgtk/cli/ifempty.py
index 69dd3be9c..dede16130 100644
--- a/kgtk/cli/ifempty.py
+++ b/kgtk/cli/ifempty.py
@@ -36,14 +36,14 @@ def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Names
     parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
 
     parser.add_argument(      "--columns", dest="filter_column_names",
-                              help="The columns in the file being filtered (Required).", nargs='+')
+                              help="The columns in the file being filtered (Required).", nargs='+', required=True)
 
     parser.add_argument(      "--count", dest="only_count", help="Only count the records, do not copy them. (default=%(default)s).",
                               type=optional_bool, nargs='?', const=True, default=False)
 
     parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write (default=%(default)s).", type=Path, default="-")
 
-    parser.add_argument(      "--all", dest="all_are", help="False: Test if any are, True: test if all are (default=%(default)s).",
+    parser.add_argument(      "--all", dest="all_are", help="False: Test if any are empty, True: test if all are empty (default=%(default)s).",
                               type=optional_bool, nargs='?', const=True, default=False)
 
     KgtkReader.add_debug_arguments(parser, expert=_expert)
diff --git a/kgtk/cli/ifnotempty.py b/kgtk/cli/ifnotempty.py
index 045d0ecba..097b0c5cb 100644
--- a/kgtk/cli/ifnotempty.py
+++ b/kgtk/cli/ifnotempty.py
@@ -36,7 +36,7 @@ def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Names
     parser.add_argument(      "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path)
 
     parser.add_argument(      "--columns", dest="filter_column_names",
-                              help="The columns in the file being filtered (Required).", nargs='+')
+                              help="The columns in the file being filtered (Required).", nargs='+', required=True)
 
     parser.add_argument(      "--count", dest="only_count", help="Only count the records, do not copy them. (default=%(default)s).",
                               type=optional_bool, nargs='?', const=True, default=False)
diff --git a/kgtk/join/test/clean_data-file1.tsv b/kgtk/join/test/clean_data-file1.tsv
new file mode 100644
index 000000000..030faa161
--- /dev/null
+++ b/kgtk/join/test/clean_data-file1.tsv
@@ -0,0 +1,3 @@
+node1	label	node2
+john	woke	^2020-05-00T00:00
+john	woke	^2020-05-02T00:00
diff --git a/kgtk/join/test/ifempty-file1.tsv b/kgtk/join/test/ifempty-file1.tsv
new file mode 100644
index 000000000..5e755352d
--- /dev/null
+++ b/kgtk/join/test/ifempty-file1.tsv
@@ -0,0 +1,7 @@
+node1	label	node2	location	years
+john	zipcode	12345	home	10
+john	zipcode	12346		
+peter	zipcode	12040	home	
+peter	zipcode	12040	work	6
+steve	zipcode	45601		3
+steve	zipcode	45601	work	
diff --git a/kgtk/join/test/ifnotempty-file1.tsv b/kgtk/join/test/ifnotempty-file1.tsv
new file mode 100644
index 000000000..5dc9e34f2
--- /dev/null
+++ b/kgtk/join/test/ifnotempty-file1.tsv
@@ -0,0 +1,7 @@
+node1	label	node2	location	years
+john	zipcode	12345	home	10
+john	zipcode	12346		
+peter	zipcode	12040	home	
+peter	zipcode	12040	work	6
+steve	zipcode	45601		3
+steve	zipcode	45601		
diff --git a/kgtk/join/test/short-line-file1.tsv b/kgtk/join/test/short-line-file1.tsv
new file mode 100644
index 000000000..bf5954ebe
--- /dev/null
+++ b/kgtk/join/test/short-line-file1.tsv
@@ -0,0 +1,7 @@
+node1	label	node2	location
+john	zipcode	12345
+john	zipcode	12346	work
+peter	zipcode	12040	home
+peter	zipcode	12040
+steve	zipcode	45601	home
+steve	zipcode	45601	work

From c721f433d2b0c0c8293b6021dc8ded30927d9876 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 20:03:33 -0700
Subject: [PATCH 264/278] Annouce the availability of expert help.

---
 docs/ifempty.md         | 3 +++
 kgtk/cli/ifempty.py     | 3 ++-
 kgtk/cli/ifexists.py    | 3 ++-
 kgtk/cli/ifnotempty.py  | 3 ++-
 kgtk/cli/ifnotexists.py | 1 +
 5 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/docs/ifempty.md b/docs/ifempty.md
index 4782e1803..d93775dc9 100644
--- a/docs/ifempty.md
+++ b/docs/ifempty.md
@@ -15,6 +15,9 @@ usage: kgtk ifempty [-h] [--columns FILTER_COLUMN_NAMES [FILTER_COLUMN_NAMES ...
 
 Filter a KGTK file based on whether one or more fields are empty. When multiple fields are specified, either any field or all fields must be empty.
 
+Additional options are shown in expert help.
+kgtk --expert ifempty --help
+
 positional arguments:
   input_kgtk_file       The KGTK file to filter. May be omitted or '-' for stdin.
 
diff --git a/kgtk/cli/ifempty.py b/kgtk/cli/ifempty.py
index dede16130..39f956aa3 100644
--- a/kgtk/cli/ifempty.py
+++ b/kgtk/cli/ifempty.py
@@ -20,7 +20,8 @@ def parser():
     return {
         'help': 'Filter a KGTK file for empty fields',
         'description': 'Filter a KGTK file based on whether one or more fields are empty. ' +
-        'When multiple fields are specified, either any field or all fields must be empty.'
+        'When multiple fields are specified, either any field or all fields must be empty.' +
+        '\n\nAdditional options are shown in expert help.\nkgtk --expert ifempty --help'
     }
 
 
diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
index 4b0d055ad..b092202be 100644
--- a/kgtk/cli/ifexists.py
+++ b/kgtk/cli/ifexists.py
@@ -18,7 +18,8 @@
 def parser():
     return {
         'help': 'Filter a KGTK file',
-        'description': 'Filter a KGTK file based on whether one or more records exist in a second KGTK file with matching values for one or more fields.'
+        'description': 'Filter a KGTK file based on whether one or more records exist in a second KGTK file with matching values for one or more fields.' +
+        '\n\nAdditional options are shown in expert help.\nkgtk --expert ifexists --help'
     }
 
 
diff --git a/kgtk/cli/ifnotempty.py b/kgtk/cli/ifnotempty.py
index 097b0c5cb..755fd119d 100644
--- a/kgtk/cli/ifnotempty.py
+++ b/kgtk/cli/ifnotempty.py
@@ -20,7 +20,8 @@ def parser():
     return {
         'help': 'Filter a KGTK file for nonempty fields',
         'description': 'Filter a KGTK file based on whether one or more fields are not empty. ' +
-        'When multiple fields are specified, either any field or all fields must be not empty.'
+        'When multiple fields are specified, either any field or all fields must be not empty.' +
+        '\n\nAdditional options are shown in expert help.\nkgtk --expert ifnotempty --help'
     }
 
 
diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
index 854ef86bd..8bb3425ff 100644
--- a/kgtk/cli/ifnotexists.py
+++ b/kgtk/cli/ifnotexists.py
@@ -18,6 +18,7 @@
 def parser():
     return {
         'help': 'Filter a KGTK file based on whether one or more records do not exist in a second KGTK file with matching values for one or more fields.'
+        '\n\nAdditional options are shown in expert help.\nkgtk --expert ifnotexists --help'
     }
 
 

From bab88695c8854e1d641edafb674814a6b97e4b27 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 20:04:23 -0700
Subject: [PATCH 265/278] Document ifnotempty.

---
 docs/ifnotempty.md | 84 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 docs/ifnotempty.md

diff --git a/docs/ifnotempty.md b/docs/ifnotempty.md
new file mode 100644
index 000000000..248cba3a0
--- /dev/null
+++ b/docs/ifnotempty.md
@@ -0,0 +1,84 @@
+The ifnotempty command filters KGTK files, passing through only those rows for
+which one (or more) specified columns contain nonempty values.  When multiple
+columns are specified, there is the choice of requiring any of the columns to
+be not empty or all of the columns to be not empty.
+
+Optionally, report the count of rows that passed the filter instead of
+copying the rows to the output file.
+
+## Usage
+
+```
+usage: kgtk ifnotempty [-h] --columns FILTER_COLUMN_NAMES [FILTER_COLUMN_NAMES ...]
+                       [--count [ONLY_COUNT]] [-o OUTPUT_KGTK_FILE] [--all [ALL_ARE]] [-v]
+                       [input_kgtk_file]
+
+Filter a KGTK file based on whether one or more fields are not empty. When multiple fields are specified, either any field or all fields must be not empty.
+
+Additional options are shown in expert help.
+kgtk --expert ifnotempty --help
+
+positional arguments:
+  input_kgtk_file       The KGTK file to filter. May be omitted or '-' for stdin.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --columns FILTER_COLUMN_NAMES [FILTER_COLUMN_NAMES ...]
+                        The columns in the file being filtered (Required).
+  --count [ONLY_COUNT]  Only count the records, do not copy them. (default=False).
+  -o OUTPUT_KGTK_FILE, --output-file OUTPUT_KGTK_FILE
+                        The KGTK file to write (default=-).
+  --all [ALL_ARE]       False: Test if any are, True: test if all are (default=False).
+
+  -v, --verbose         Print additional progress messages (default=False).
+```
+
+## Examples
+
+Suppose that `file1.tsv` contains the following table in KGTK format:
+
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12345 | home     | 10    |
+| john  | zipcode | 12346 |          |       |
+| peter | zipcode | 12040 | home     |       |
+| peter | zipcode | 12040 | work     | 6     |
+| steve | zipcode | 45600 |          | 3     |
+| steve | zipcode | 45601 |          |       |
+
+```bash
+kgtk ifnotempty file1.tsv --columns location
+```
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12345 | home     | 10    |
+| peter | zipcode | 12040 | home     |       |
+| peter | zipcode | 12040 | work     | 6     |
+
+```bash
+kgtk ifnotempty file1.tsv --columns years
+```
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12345 | home     | 10    |
+| peter | zipcode | 12040 | work     | 6     |
+| steve | zipcode | 45600 |          | 3     |
+
+```bash
+kgtk ifnotempty file1.tsv --columns location years
+```
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12345 | home     | 10    |
+| john  | zipcode | 12346 |          |       |
+| peter | zipcode | 12040 | home     |       |
+| peter | zipcode | 12040 | work     | 6     |
+| steve | zipcode | 45600 |          | 3     |
+
+```bash
+kgtk ifnotempty file1.tsv --all --columns location years
+```
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12345 | home     | 10    |
+| peter | zipcode | 12040 | work     | 6     |

From b1fecc854dd149edd958eba30cb3ca4400431169 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 20:25:57 -0700
Subject: [PATCH 266/278] Fix some grammar in descriptions.  Document ifexists.

---
 docs/ifempty.md                               |   2 +-
 docs/ifexists.md                              | 123 ++++++++++++++++++
 docs/ifnotempty.md                            |   8 +-
 kgtk/join/test/ifexists-file1.tsv             |   7 +
 kgtk/join/test/ifexists-file2.tsv             |   2 +
 kgtk/join/test/ifexists-file3.tsv             |   3 +
 kgtk/join/test/ifexists-file4.tsv             |   3 +
 kgtk/join/test/ifexists-file5.tsv             |   2 +
 .../test/ifexists-test1-default-output.tsv    |   4 +
 .../join/test/ifexists-test1-node1-output.tsv |   4 +
 .../ifexists-test2-label-and-node2-output.tsv |   4 +
 11 files changed, 157 insertions(+), 5 deletions(-)
 create mode 100644 docs/ifexists.md
 create mode 100644 kgtk/join/test/ifexists-file1.tsv
 create mode 100644 kgtk/join/test/ifexists-file2.tsv
 create mode 100644 kgtk/join/test/ifexists-file3.tsv
 create mode 100644 kgtk/join/test/ifexists-file4.tsv
 create mode 100644 kgtk/join/test/ifexists-file5.tsv
 create mode 100644 kgtk/join/test/ifexists-test1-default-output.tsv
 create mode 100644 kgtk/join/test/ifexists-test1-node1-output.tsv
 create mode 100644 kgtk/join/test/ifexists-test2-label-and-node2-output.tsv

diff --git a/docs/ifempty.md b/docs/ifempty.md
index d93775dc9..8f85fcec1 100644
--- a/docs/ifempty.md
+++ b/docs/ifempty.md
@@ -1,4 +1,4 @@
-The ifempty command filters KGTK files, passing through only those rows for
+The ifempty command filters a KGTK file, passing through only those rows for
 which one (or more) specified columns contain empty values.  When multiple
 columns are specified, there is the choice of requiring any of the columns to
 be empty or all of the columns to be empty.
diff --git a/docs/ifexists.md b/docs/ifexists.md
new file mode 100644
index 000000000..7e8346088
--- /dev/null
+++ b/docs/ifexists.md
@@ -0,0 +1,123 @@
+The ifexists command filters a KGTK file, passing through only those rows for
+which one or mroe specified columns match records in a second KGTK file.
+
+The fields to match may be supplied by the user.  If not supplied,
+the following defaults will be used.  "left" refers to the KFTK
+file being filtered, and "right" refers to the file supplying the
+matching records.
+
+| Left    | Right   | Key fields |
+| ------- | ------- | ---------- |
+| edge    | edge    | left.node1 = right.node1 and |
+|         |         | left.label=right.label and |
+|         |         | left.node2=right.node2 |
+| node    | node    | left.id = right.id |
+| edge    | node    | left.node1 = right.id |
+| node    | edge    | right.id = left.node1 |
+
+## Usage
+
+```bash
+usage: kgtk ifexists [-h] [--input-keys [INPUT_KEYS [INPUT_KEYS ...]]] --filter-on
+                     FILTER_KGTK_FILE [--filter-keys [FILTER_KEYS [FILTER_KEYS ...]]]
+                     [-o OUTPUT_KGTK_FILE] [-v]
+                     [input_kgtk_file]
+
+Filter a KGTK file based on whether one or more records exist in a second KGTK file with matching values for one or more fields.
+
+Additional options are shown in expert help.
+kgtk --expert ifexists --help
+
+positional arguments:
+  input_kgtk_file       The KGTK file to filter. May be omitted or '-' for stdin.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --input-keys [INPUT_KEYS [INPUT_KEYS ...]], --left-keys [INPUT_KEYS [INPUT_KEYS ...]]
+                        The key columns in the file being filtered (default=None).
+  --filter-on FILTER_KGTK_FILE
+                        The KGTK file to filter against (required).
+  --filter-keys [FILTER_KEYS [FILTER_KEYS ...]], --right-keys [FILTER_KEYS [FILTER_KEYS ...]]
+                        The key columns in the filter-on file (default=None).
+  -o OUTPUT_KGTK_FILE, --output-file OUTPUT_KGTK_FILE
+                        The KGTK file to write (required).
+
+  -v, --verbose         Print additional progress messages (default=False).
+```
+
+## Examples
+
+Suppose that `file1.tsv` contains the following table in KGTK format:
+
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12345 | home     | 10    |
+| john  | zipcode | 12346 |          |       |
+| peter | zipcode | 12040 | home     |       |
+| peter | zipcode | 12040 | work     | 6     |
+| steve | zipcode | 45600 |          | 3     |
+| steve | zipcode | 45601 | work     |       |
+
+Suppose that `file2.tsv` contains the following table in KGTK format:
+
+| node1 | label   | node2 |
+| ----- | ------- | ----- |
+| peter | zipcode | 12040 |
+
+Suppose that `file3.tsv` contains the following table in KGTK format:
+
+| id    |
+| ----- |
+| steve |
+| john  |
+
+Suppose that `file4.tsv` contains the following table in KGTK format:
+
+| id    |
+| ----- |
+| peter |
+| john  |
+
+Suppose that `file5.tsv` contains the following table in KGTK format:
+
+| id   |
+| ---- |
+| home |
+
+```bash
+kgtk ifexists file1.tsv --filter-on file2.tsv
+
+```
+
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| peter | zipcode | 12040 | home     |       |
+| peter | zipcode | 12040 | work     | 6     |
+
+```bash
+kgtk ifexists file1.tsv --filter-on file3.tsv
+
+```
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12345 | home     | 10    |
+| john  | zipcode | 12346 |          |       |
+| steve | zipcode | 45600 |          | 3     |
+| steve | zipcode | 45601 | work     |       |
+
+```bash
+kgtk ifexists file4.tsv --filter-on file3.tsv
+
+```
+| id    |
+| ----- |
+| john  |
+
+```bash
+kgtk ifexists file1.tsv --filter-on file5.tsv --input-keys location
+
+```
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12345 | home     | 10    |
+| peter | zipcode | 12040 | home     |       |
diff --git a/docs/ifnotempty.md b/docs/ifnotempty.md
index 248cba3a0..31c175099 100644
--- a/docs/ifnotempty.md
+++ b/docs/ifnotempty.md
@@ -1,7 +1,7 @@
-The ifnotempty command filters KGTK files, passing through only those rows for
-which one (or more) specified columns contain nonempty values.  When multiple
-columns are specified, there is the choice of requiring any of the columns to
-be not empty or all of the columns to be not empty.
+The ifnotempty command filters a KGTK file, passing through only those rows
+for which one (or more) specified columns contain nonempty values.  When
+multiple columns are specified, there is the choice of requiring any of the
+columns to be not empty or all of the columns to be not empty.
 
 Optionally, report the count of rows that passed the filter instead of
 copying the rows to the output file.
diff --git a/kgtk/join/test/ifexists-file1.tsv b/kgtk/join/test/ifexists-file1.tsv
new file mode 100644
index 000000000..5e755352d
--- /dev/null
+++ b/kgtk/join/test/ifexists-file1.tsv
@@ -0,0 +1,7 @@
+node1	label	node2	location	years
+john	zipcode	12345	home	10
+john	zipcode	12346		
+peter	zipcode	12040	home	
+peter	zipcode	12040	work	6
+steve	zipcode	45601		3
+steve	zipcode	45601	work	
diff --git a/kgtk/join/test/ifexists-file2.tsv b/kgtk/join/test/ifexists-file2.tsv
new file mode 100644
index 000000000..78c7554c6
--- /dev/null
+++ b/kgtk/join/test/ifexists-file2.tsv
@@ -0,0 +1,2 @@
+node1	label	node2
+peter	zipcode	12040
diff --git a/kgtk/join/test/ifexists-file3.tsv b/kgtk/join/test/ifexists-file3.tsv
new file mode 100644
index 000000000..7dff4fd5c
--- /dev/null
+++ b/kgtk/join/test/ifexists-file3.tsv
@@ -0,0 +1,3 @@
+id
+steve
+john
diff --git a/kgtk/join/test/ifexists-file4.tsv b/kgtk/join/test/ifexists-file4.tsv
new file mode 100644
index 000000000..f6e1f6916
--- /dev/null
+++ b/kgtk/join/test/ifexists-file4.tsv
@@ -0,0 +1,3 @@
+id
+peter
+john
diff --git a/kgtk/join/test/ifexists-file5.tsv b/kgtk/join/test/ifexists-file5.tsv
new file mode 100644
index 000000000..f577759f4
--- /dev/null
+++ b/kgtk/join/test/ifexists-file5.tsv
@@ -0,0 +1,2 @@
+id
+home
diff --git a/kgtk/join/test/ifexists-test1-default-output.tsv b/kgtk/join/test/ifexists-test1-default-output.tsv
new file mode 100644
index 000000000..96c5707e9
--- /dev/null
+++ b/kgtk/join/test/ifexists-test1-default-output.tsv
@@ -0,0 +1,4 @@
+node1	label	node2	location
+john	zipcode	12345	home
+peter	zipcode	12040	home
+steve	zipcode	45601	home
diff --git a/kgtk/join/test/ifexists-test1-node1-output.tsv b/kgtk/join/test/ifexists-test1-node1-output.tsv
new file mode 100644
index 000000000..96c5707e9
--- /dev/null
+++ b/kgtk/join/test/ifexists-test1-node1-output.tsv
@@ -0,0 +1,4 @@
+node1	label	node2	location
+john	zipcode	12345	home
+peter	zipcode	12040	home
+steve	zipcode	45601	home
diff --git a/kgtk/join/test/ifexists-test2-label-and-node2-output.tsv b/kgtk/join/test/ifexists-test2-label-and-node2-output.tsv
new file mode 100644
index 000000000..96c5707e9
--- /dev/null
+++ b/kgtk/join/test/ifexists-test2-label-and-node2-output.tsv
@@ -0,0 +1,4 @@
+node1	label	node2	location
+john	zipcode	12345	home
+peter	zipcode	12040	home
+steve	zipcode	45601	home

From 30e68aac2362280db0aad76ae68ccae6e913f07b Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 20:41:14 -0700
Subject: [PATCH 267/278] Document ifnotexists.  Correct a typo in
 ifnotexists.py.  Improve some documentation.

---
 docs/ifexists.md        |  14 ++---
 docs/ifnotexists.md     | 126 ++++++++++++++++++++++++++++++++++++++++
 kgtk/cli/ifempty.py     |   2 +-
 kgtk/cli/ifexists.py    |   2 +-
 kgtk/cli/ifnotempty.py  |   2 +-
 kgtk/cli/ifnotexists.py |   7 ++-
 kgtk/cli/unique.py      |   5 +-
 7 files changed, 143 insertions(+), 15 deletions(-)
 create mode 100644 docs/ifnotexists.md

diff --git a/docs/ifexists.md b/docs/ifexists.md
index 7e8346088..b94693a07 100644
--- a/docs/ifexists.md
+++ b/docs/ifexists.md
@@ -1,5 +1,5 @@
 The ifexists command filters a KGTK file, passing through only those rows for
-which one or mroe specified columns match records in a second KGTK file.
+which one or more specified columns match records in a second KGTK file.
 
 The fields to match may be supplied by the user.  If not supplied,
 the following defaults will be used.  "left" refers to the KFTK
@@ -8,12 +8,12 @@ matching records.
 
 | Left    | Right   | Key fields |
 | ------- | ------- | ---------- |
-| edge    | edge    | left.node1 = right.node1 and |
-|         |         | left.label=right.label and |
-|         |         | left.node2=right.node2 |
-| node    | node    | left.id = right.id |
-| edge    | node    | left.node1 = right.id |
-| node    | edge    | right.id = left.node1 |
+| edge    | edge    | left.node1 == right.node1 and |
+|         |         | left.label == right.label and |
+|         |         | left.node2 == right.node2 |
+| node    | node    | left.id    == right.id |
+| edge    | node    | left.node1 == right.id |
+| node    | edge    | right.id   == left.node1 |
 
 ## Usage
 
diff --git a/docs/ifnotexists.md b/docs/ifnotexists.md
new file mode 100644
index 000000000..04264b018
--- /dev/null
+++ b/docs/ifnotexists.md
@@ -0,0 +1,126 @@
+The ifnotexists command filters a KGTK file, passing through only those rows for
+which one or more specified columns do not match records in a second KGTK file.
+
+The fields to match may be supplied by the user.  If not supplied,
+the following defaults will be used.  "left" refers to the KFTK
+file being filtered, and "right" refers to the file supplying the
+matching records.
+
+| Left    | Right   | Key fields |
+| ------- | ------- | ---------- |
+| edge    | edge    | left.node1 == right.node1 and |
+|         |         | left.label == right.label and |
+|         |         | left.node2 == right.node2 |
+| node    | node    | left.id    == right.id |
+| edge    | node    | left.node1 == right.id |
+| node    | edge    | right.id   == left.node1 |
+
+## Usage
+
+```bash
+usage: kgtk ifnotexists [-h] [--input-keys [INPUT_KEYS [INPUT_KEYS ...]]] --filter-on
+                        _FILTER_KGTK_FILE [--filter-keys [FILTER_KEYS [FILTER_KEYS ...]]]
+                        [-o OUTPUT_KGTK_FILE] [-v]
+                        [input_kgtk_file]
+
+Filter a KGTK file based on whether one or more records do not exist in a second KGTK file with matching values for one or more fields.
+
+Additional options are shown in expert help.
+kgtk --expert ifexists --help
+
+positional arguments:
+  input_kgtk_file       The KGTK file to filter. May be omitted or '-' for stdin.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --input-keys [INPUT_KEYS [INPUT_KEYS ...]], --left-keys [INPUT_KEYS [INPUT_KEYS ...]]
+                        The key columns in the file being filtered (default=None).
+  --filter-on _FILTER_KGTK_FILE
+                        The KGTK file to filter against (required).
+  --filter-keys [FILTER_KEYS [FILTER_KEYS ...]], --right-keys [FILTER_KEYS [FILTER_KEYS ...]]
+                        The key columns in the filter-on file (default=None).
+  -o OUTPUT_KGTK_FILE, --output-file OUTPUT_KGTK_FILE
+                        The KGTK file to write (required),
+
+  -v, --verbose         Print additional progress messages (default=False).
+```
+
+## Examples
+
+Suppose that `file1.tsv` contains the following table in KGTK format:
+
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12345 | home     | 10    |
+| john  | zipcode | 12346 |          |       |
+| peter | zipcode | 12040 | home     |       |
+| peter | zipcode | 12040 | work     | 6     |
+| steve | zipcode | 45600 |          | 3     |
+| steve | zipcode | 45601 | work     |       |
+
+Suppose that `file2.tsv` contains the following table in KGTK format:
+
+| node1 | label   | node2 |
+| ----- | ------- | ----- |
+| peter | zipcode | 12040 |
+
+Suppose that `file3.tsv` contains the following table in KGTK format:
+
+| id    |
+| ----- |
+| steve |
+| john  |
+
+Suppose that `file4.tsv` contains the following table in KGTK format:
+
+| id    |
+| ----- |
+| peter |
+| john  |
+
+Suppose that `file5.tsv` contains the following table in KGTK format:
+
+| id   |
+| ---- |
+| home |
+
+```bash
+kgtk ifnotexists file1.tsv --filter-on file2.tsv
+
+```
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12345 | home     | 10    |
+| john  | zipcode | 12346 |          |       |
+| steve | zipcode | 45600 |          | 3     |
+| steve | zipcode | 45601 | work     |       |
+
+
+```bash
+kgtk ifnotexists file1.tsv --filter-on file3.tsv
+
+```
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| peter | zipcode | 12040 | home     |       |
+| peter | zipcode | 12040 | work     | 6     |
+
+```bash
+kgtk ifnotexists file4.tsv --filter-on file3.tsv
+
+```
+| id    |
+| ----- |
+| peter  |
+
+```bash
+kgtk ifexists file1.tsv --filter-on file5.tsv --input-keys location
+
+```
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| john  | zipcode | 12346 |          |       |
+| peter | zipcode | 12040 | work     | 6     |
+| steve | zipcode | 45600 |          | 3     |
+| steve | zipcode | 45601 | work     |       |
+
diff --git a/kgtk/cli/ifempty.py b/kgtk/cli/ifempty.py
index 39f956aa3..d75168b98 100644
--- a/kgtk/cli/ifempty.py
+++ b/kgtk/cli/ifempty.py
@@ -18,7 +18,7 @@
 
 def parser():
     return {
-        'help': 'Filter a KGTK file for empty fields',
+        'help': 'Filter a KGTK file for empty fields.',
         'description': 'Filter a KGTK file based on whether one or more fields are empty. ' +
         'When multiple fields are specified, either any field or all fields must be empty.' +
         '\n\nAdditional options are shown in expert help.\nkgtk --expert ifempty --help'
diff --git a/kgtk/cli/ifexists.py b/kgtk/cli/ifexists.py
index b092202be..80839bbb2 100644
--- a/kgtk/cli/ifexists.py
+++ b/kgtk/cli/ifexists.py
@@ -17,7 +17,7 @@
 
 def parser():
     return {
-        'help': 'Filter a KGTK file',
+        'help': 'Filter a KGTK file by matching records in a second KGTK file.',
         'description': 'Filter a KGTK file based on whether one or more records exist in a second KGTK file with matching values for one or more fields.' +
         '\n\nAdditional options are shown in expert help.\nkgtk --expert ifexists --help'
     }
diff --git a/kgtk/cli/ifnotempty.py b/kgtk/cli/ifnotempty.py
index 755fd119d..2b93a8aa5 100644
--- a/kgtk/cli/ifnotempty.py
+++ b/kgtk/cli/ifnotempty.py
@@ -18,7 +18,7 @@
 
 def parser():
     return {
-        'help': 'Filter a KGTK file for nonempty fields',
+        'help': 'Filter a KGTK file for nonempty fields.',
         'description': 'Filter a KGTK file based on whether one or more fields are not empty. ' +
         'When multiple fields are specified, either any field or all fields must be not empty.' +
         '\n\nAdditional options are shown in expert help.\nkgtk --expert ifnotempty --help'
diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
index 8bb3425ff..7d5f83f9b 100644
--- a/kgtk/cli/ifnotexists.py
+++ b/kgtk/cli/ifnotexists.py
@@ -17,8 +17,9 @@
 
 def parser():
     return {
-        'help': 'Filter a KGTK file based on whether one or more records do not exist in a second KGTK file with matching values for one or more fields.'
-        '\n\nAdditional options are shown in expert help.\nkgtk --expert ifnotexists --help'
+        'help': 'Filter a KGTK file by not matching records in a second KGTK file.',
+        'description': 'Filter a KGTK file based on whether one or more records do not exist in a second KGTK file with matching values for one or more fields.' +
+        '\n\nAdditional options are shown in expert help.\nkgtk --expert ifexists --help'
     }
 
 
@@ -45,7 +46,7 @@ def h(msg: str)->str:
     parser.add_argument(      "--input-keys", "--left-keys", dest="input_keys",
                               help="The key columns in the file being filtered (default=None).", nargs='*')
 
-    parser.add_argument(      "--filter-on", dest="_filter_kgtk_file", help="The KGTK file to filter against (required).", type=Path, required=True)
+    parser.add_argument(      "--filter-on", dest="filter_kgtk_file", help="The KGTK file to filter against (required).", type=Path, required=True)
 
     parser.add_argument(      "--filter-keys", "--right-keys", dest="filter_keys",
                               help="The key columns in the filter-on file (default=None).", nargs='*')
diff --git a/kgtk/cli/unique.py b/kgtk/cli/unique.py
index 9b3498cff..8ee86a348 100644
--- a/kgtk/cli/unique.py
+++ b/kgtk/cli/unique.py
@@ -17,8 +17,9 @@
 
 def parser():
     return {
-        'help': 'Count unique values',
-        'description': 'Count the unique value in a column in a KGTK file. Write the unique values and counts as a new KGTK edge file.'
+        'help': 'Count unique values in a column.',
+        'description': 'Count the unique values in a column in a KGTK file. Write the unique values and counts as a new KGTK file.' +
+        '\n\nAdditional options are shown in expert help.\nkgtk --expert ifnotempty --help'
     }
 
 

From cba692bcabd62c744cfd39ac2845aadd05458658 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 21:04:40 -0700
Subject: [PATCH 268/278] Remove redundant prefix.  Document unique.

---
 docs/unique.md                  | 166 ++++++++++++++++++++++++++++++++
 kgtk/join/test/unique-file1.tsv |   7 ++
 kgtk/join/unique.py             |   2 +-
 3 files changed, 174 insertions(+), 1 deletion(-)
 create mode 100644 docs/unique.md
 create mode 100644 kgtk/join/test/unique-file1.tsv

diff --git a/docs/unique.md b/docs/unique.md
new file mode 100644
index 000000000..490c92aec
--- /dev/null
+++ b/docs/unique.md
@@ -0,0 +1,166 @@
+The ifexists command reads a KGTK file, constructing a second KGTK file
+containing the unique values and counts for a column in the first file.
+
+In the default output format, the output file is a KGTK edge file.
+The node1 column contains the unique values, thelabel column is `count`,
+and the node2 column contains the unique count.
+
+Since KGTK edge files cannot have an empty node1 column, the `--empty value`
+option provides a substitute value (e.g. NONE) that will be used in the ouput
+KGTK file to represent empty values in the input KGTK file.
+
+The value used in the `label` column, normally `count`, may be changed
+with the `--label VALUE` option.
+
+There are two expert options specifically for this command:
+
+The `--prefix VALUE` option supplies a prefix to the value in the output file.
+
+The `--format node` option creates a KGTK node file as its output.  The value
+(prefixed if requested) appears in the `id` column of the output file, and new
+columns (prefixed) are created for each unique value found in the specified
+column in the input file.
+
+## Usage
+
+```bash
+usage: kgtk unique [-h] --column COLUMN_NAME [--empty EMPTY_VALUE] [-o OUTPUT_KGTK_FILE]
+                   [--label LABEL_VALUE] [-v]
+                   [input_kgtk_file]
+
+Count the unique values in a column in a KGTK file. Write the unique values and counts as a new KGTK file.
+
+Additional options are shown in expert help.
+kgtk --expert ifnotempty --help
+
+positional arguments:
+  input_kgtk_file       The KGTK file to filter. May be omitted or '-' for stdin.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --column COLUMN_NAME  The column to count unique values (required).
+  --empty EMPTY_VALUE   A value to substitute for empty values (default=).
+  -o OUTPUT_KGTK_FILE, --output-file OUTPUT_KGTK_FILE
+                        The KGTK file to write (required).
+  --label LABEL_VALUE   The output file label column value (default=count).
+
+  -v, --verbose         Print additional progress messages (default=False).
+
+```
+
+Expert help:
+```
+usage: kgtk unique [-h] --column COLUMN_NAME [--empty EMPTY_VALUE] [-o OUTPUT_KGTK_FILE]
+                   [--label LABEL_VALUE] [--format {edge,node}] [--prefix PREFIX]
+                   [--errors-to-stdout | --errors-to-stderr] [--show-options] [-v]
+                   [--very-verbose] [--column-separator COLUMN_SEPARATOR]
+                   [--compression-type COMPRESSION_TYPE] [--error-limit ERROR_LIMIT]
+                   [--gzip-in-parallel [GZIP_IN_PARALLEL]] [--gzip-queue-size GZIP_QUEUE_SIZE]
+                   [--mode {NONE,EDGE,NODE,AUTO}]
+                   [--force-column-names FORCE_COLUMN_NAMES [FORCE_COLUMN_NAMES ...]]
+                   [--header-error-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                   [--skip-first-record [SKIP_FIRST_RECORD]]
+                   [--unsafe-column-name-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                   [--repair-and-validate-lines [REPAIR_AND_VALIDATE_LINES]]
+                   [--repair-and-validate-values [REPAIR_AND_VALIDATE_VALUES]]
+                   [--blank-required-field-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                   [--comment-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                   [--empty-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                   [--fill-short-lines [FILL_SHORT_LINES]]
+                   [--invalid-value-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                   [--long-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                   [--short-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                   [--truncate-long-lines [TRUNCATE_LONG_LINES]]
+                   [--whitespace-line-action {PASS,REPORT,EXCLUDE,COMPLAIN,ERROR,EXIT}]
+                   [--additional-language-codes [ADDITIONAL_LANGUAGE_CODES [ADDITIONAL_LANGUAGE_CODES ...]]]
+                   [--allow-language-suffixes [ALLOW_LANGUAGE_SUFFIXES]]
+                   [--allow-lax-strings [ALLOW_LAX_STRINGS]]
+                   [--allow-lax-lq-strings [ALLOW_LAX_LQ_STRINGS]]
+                   [--allow-month-or-day-zero [ALLOW_MONTH_OR_DAY_ZERO]]
+                   [--repair-month-or-day-zero [REPAIR_MONTH_OR_DAY_ZERO]]
+                   [--minimum-valid-year MINIMUM_VALID_YEAR]
+                   [--maximum-valid-year MAXIMUM_VALID_YEAR]
+                   [--minimum-valid-lat MINIMUM_VALID_LAT]
+                   [--maximum-valid-lat MAXIMUM_VALID_LAT]
+                   [--minimum-valid-lon MINIMUM_VALID_LON]
+                   [--maximum-valid-lon MAXIMUM_VALID_LON]
+                   [--escape-list-separators [ESCAPE_LIST_SEPARATORS]]
+                   [input_kgtk_file]
+
+Count the unique values in a column in a KGTK file. Write the unique values and counts as a new KGTK file.
+
+Additional options are shown in expert help.
+kgtk --expert unique --help
+
+positional arguments:
+  input_kgtk_file       The KGTK file to filter. May be omitted or '-' for stdin.
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --column COLUMN_NAME  The column to count unique values (required).
+  --empty EMPTY_VALUE   A value to substitute for empty values (default=).
+  -o OUTPUT_KGTK_FILE, --output-file OUTPUT_KGTK_FILE
+                        The KGTK file to write (required).
+  --label LABEL_VALUE   The output file label column value (default=count).
+  --format {edge,node}  The output file format and mode (default=edge).
+  --prefix PREFIX       The value prefix (default=).
+
+...
+```
+(See `kgtk validate` for a description of additional options)
+
+## Examples
+
+Suppose that `file1.tsv` contains the following table in KGTK format:
+
+| node1 | label   | node2 | location | years |
+| ----- | ------- | ----- | -------- | ----- |
+| eric  | zipcode | 12040 | work     | 5     |
+| john  | zipcode | 12345 | home     | 10    |
+| john  | zipcode | 12346 |          |       |
+| john  | zipcode | 12347 |          |       |
+| peter | zipcode | 12040 | home     |       |
+| peter | zipcode | 12040 | work     | 6     |
+| steve | zipcode | 45600 |          | 3     |
+| steve | zipcode | 45601 | work     |       |
+
+
+```bash
+kgtk unique file1.tsv --column location
+
+```
+
+| node1 | label | node2 |
+| ----- | ----- | ----- |
+| home  | count | 2     |
+| work  | count | 3     |
+
+```bash
+kgtk unique file1.tsv --column location --empty NONE
+
+```
+
+| node1 | label | node2 |
+| ----- | ----- | ----- |
+| NONE  | count | 3     |
+| home  | count | 2     |
+| work  | count | 3     |
+
+```bash
+kgtk unique file1.tsv --column location --empty NONE --format node
+
+```
+
+| id       | NONE | home | work |
+| -------- | ---- | ---- | ---- |
+| location | 3    | 2    | 3    |
+
+```bash
+kgtk unique file1.tsv --column location --empty NONE --format node --prefix 'location;'
+
+```
+
+| id       | location;NONE | location;home | location;work |
+| -------- | ---- | ---- | ---- |
+| location | 3    | 2    | 3    |
+
diff --git a/kgtk/join/test/unique-file1.tsv b/kgtk/join/test/unique-file1.tsv
new file mode 100644
index 000000000..5e755352d
--- /dev/null
+++ b/kgtk/join/test/unique-file1.tsv
@@ -0,0 +1,7 @@
+node1	label	node2	location	years
+john	zipcode	12345	home	10
+john	zipcode	12346		
+peter	zipcode	12040	home	
+peter	zipcode	12040	work	6
+steve	zipcode	45601		3
+steve	zipcode	45601	work	
diff --git a/kgtk/join/unique.py b/kgtk/join/unique.py
index 630e6e1d2..ec1322997 100644
--- a/kgtk/join/unique.py
+++ b/kgtk/join/unique.py
@@ -116,7 +116,7 @@ def process(self):
 
         if self.output_format == "edge":
             for value in sorted(value_counts.keys()):
-                ew.write([prefix + value, self.label_value, str(value_counts[value])])
+                ew.write([value, self.label_value, str(value_counts[value])])
         elif self.output_format == "node":
             row = [ self.column_name ]
             for value in sorted(value_counts.keys()):

From 0e5d2d3cf5b8674433bcdecc5b994cd7147da85b Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 21:04:52 -0700
Subject: [PATCH 269/278] Remove redundant prefix.  Document unique.

---
 kgtk/cli/ifnotexists.py         | 2 +-
 kgtk/cli/unique.py              | 2 +-
 kgtk/join/test/unique-file1.tsv | 2 ++
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/kgtk/cli/ifnotexists.py b/kgtk/cli/ifnotexists.py
index 7d5f83f9b..882df66ce 100644
--- a/kgtk/cli/ifnotexists.py
+++ b/kgtk/cli/ifnotexists.py
@@ -19,7 +19,7 @@ def parser():
     return {
         'help': 'Filter a KGTK file by not matching records in a second KGTK file.',
         'description': 'Filter a KGTK file based on whether one or more records do not exist in a second KGTK file with matching values for one or more fields.' +
-        '\n\nAdditional options are shown in expert help.\nkgtk --expert ifexists --help'
+        '\n\nAdditional options are shown in expert help.\nkgtk --expert ifnotexists --help'
     }
 
 
diff --git a/kgtk/cli/unique.py b/kgtk/cli/unique.py
index 8ee86a348..25ac82e18 100644
--- a/kgtk/cli/unique.py
+++ b/kgtk/cli/unique.py
@@ -19,7 +19,7 @@ def parser():
     return {
         'help': 'Count unique values in a column.',
         'description': 'Count the unique values in a column in a KGTK file. Write the unique values and counts as a new KGTK file.' +
-        '\n\nAdditional options are shown in expert help.\nkgtk --expert ifnotempty --help'
+        '\n\nAdditional options are shown in expert help.\nkgtk --expert unique --help'
     }
 
 
diff --git a/kgtk/join/test/unique-file1.tsv b/kgtk/join/test/unique-file1.tsv
index 5e755352d..272dedf16 100644
--- a/kgtk/join/test/unique-file1.tsv
+++ b/kgtk/join/test/unique-file1.tsv
@@ -1,6 +1,8 @@
 node1	label	node2	location	years
+eric	zipcode	12040	work	5
 john	zipcode	12345	home	10
 john	zipcode	12346		
+john	zipcode	12347		
 peter	zipcode	12040	home	
 peter	zipcode	12040	work	6
 steve	zipcode	45601		3

From d8365bcba0a813f0d49b6354f0f8c498e08e8f39 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 21:09:36 -0700
Subject: [PATCH 270/278] Minor formatting changes.

---
 docs/join.md | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/docs/join.md b/docs/join.md
index 778ffcf9e..1dbbfa027 100644
--- a/docs/join.md
+++ b/docs/join.md
@@ -23,18 +23,23 @@ input files, adjusted for predefined name aliasing.
 * Specify --right-join to get a right outer join.
 * Specify both to get a full outer join (equivalent to cat).
 * Specify neither to get an inner join.
-* By default, node files are joined on the id column, while edge files are joined on the node1 column. The label and node2 columns may be added to the edge file join criteria.  Alternatively, the left and right file join columns may be
-listed explicitly.
+* By default, node files are joined on the id column, while edge files are joined on the node1 column. The label and node2 columns may be added to the edge file join  criteria.  Alternatively, the left and right file join columns may be
+  listed explicitly.
 
 To join an edge file to a node file, or to join quasi-KGTK files, use the
 following option (enable expert mode for more information):
 
 ```
 --mode=NONE
+```
+
 Expert mode provides additional command arguments.
+
+```
 positional arguments:
   left_file_path        The left-side KGTK file to join (required).
   right_file_path       The right-side KGTK file to join (required).
+
 optional arguments:
   -h, --help            show this help message and exit
   --join-on-label [JOIN_ON_LABEL]
@@ -56,4 +61,4 @@ optional arguments:
   --right-join [RIGHT_JOIN]
                         Perform a right outer join (default=False).
   -v, --verbose         Print additional progress messages (default=False).
-```
\ No newline at end of file
+```

From a805077e9000fde764fb9ca50b7162291def48d9 Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Wed, 20 May 2020 21:16:05 -0700
Subject: [PATCH 271/278] Add documents to mkdocs.

---
 mkdocs.yml | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/mkdocs.yml b/mkdocs.yml
index ff3b94f65..dbef8131b 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -4,12 +4,17 @@ nav:
   - Getting started: install.md
   - KGTK file specification: specification.md
   - 'KGTK commands':
-      - 'connected_components': 'connected_components.md'
+      - 'cat': 'cat.md'
       - 'clean_data': 'clean_data.md'
+      - 'connected_components': 'connected_components.md'
       - 'export_gt': 'export_gt.md'
       - 'filter': 'filter.md'
       - 'generate_wikidata_triples': 'generate_wikidata_triples.md'
       - 'graph_statistics': 'stats.md'
+      - 'ifempty': 'ifempty.md'
+      - 'ifexists': 'ifexists.md'
+      - 'ifnotempty': 'ifnotempty.md'
+      - 'ifnotexists': 'ifnotexists.md'
       - 'import_conceptnet': 'import_conceptnet.md'
       - 'import_ntriples': 'import_ntriples.md'
       - 'import_wikidata': 'import_wikidata.md'
@@ -19,6 +24,7 @@ nav:
       - 'remove_columns': 'remove_col.md'
       - 'sort': 'sort.md'
       - 'text_embeddings': 'embedding.md'
+      - 'unique': 'unique.md'
       - 'validate': 'validate.md'
       - 'zconcat': 'zconcat.md'
 theme:

From f54b5d341a816695b4366a840f43d6ba3e3dba97 Mon Sep 17 00:00:00 2001
From: Filip Ilievski <filip.dbrsk@gmail.com>
Date: Wed, 20 May 2020 21:35:06 -0700
Subject: [PATCH 272/278] Examples updated

---
 examples/Example1.ipynb |  3 ++-
 examples/Example2.ipynb | 37 +++++++++++++++++++++++++++++++++++--
 examples/Example3.ipynb | 35 +++++++++++++++++------------------
 3 files changed, 54 insertions(+), 21 deletions(-)

diff --git a/examples/Example1.ipynb b/examples/Example1.ipynb
index f2b60f2af..b9ed01809 100644
--- a/examples/Example1.ipynb
+++ b/examples/Example1.ipynb
@@ -53,7 +53,8 @@
    "source": [
     "%%bash\n",
     "kgtk import_conceptnet --english_only examples/conceptnet-assertions-5.7.0.csv / \\\n",
-    "            filter -p \" ; /r/Causes,/r/UsedFor,/r/Synonym,/r/DefinedAs,/r/IsA ; \" > tmp.tsv"
+    "            filter -p \" ; /r/Causes,/r/UsedFor,/r/Synonym,/r/DefinedAs,/r/IsA ; \" / \\\n",
+    "            sort -c 1,2,3 -o sorted.tsv tmp.tsv"
    ]
   },
   {
diff --git a/examples/Example2.ipynb b/examples/Example2.ipynb
index 1a526bb5c..09c482119 100644
--- a/examples/Example2.ipynb
+++ b/examples/Example2.ipynb
@@ -92,9 +92,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 30,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "IOPub data rate exceeded.\n",
+      "The notebook server will temporarily stop sending output\n",
+      "to the client in order to avoid crashing it.\n",
+      "To change this limit, set the config variable\n",
+      "`--NotebookApp.iopub_data_rate_limit`.\n",
+      "\n",
+      "Current values:\n",
+      "NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n",
+      "NotebookApp.rate_limit_window=3.0 (secs)\n",
+      "\n"
+     ]
+    },
+    {
+     "ename": "CalledProcessError",
+     "evalue": "Command 'b'kgtk clean_data --error-limit 1000000 wikidata_edges_20200504.tsv / remove_columns -c \"$obsolete\"  / filter -p \\' ; P31 ; \\' > graph.tsv\\n'' returned non-zero exit status 2.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mCalledProcessError\u001b[0m                        Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-30-3fac4f7494c9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_cell_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'bash'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m''\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'kgtk clean_data --error-limit 1000000 wikidata_edges_20200504.tsv / remove_columns -c \"$obsolete\"  / filter -p \\' ; P31 ; \\' > graph.tsv\\n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m~/opt/anaconda3/envs/kgtk-env/lib/python3.7/site-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mrun_cell_magic\u001b[0;34m(self, magic_name, line, cell)\u001b[0m\n\u001b[1;32m   2360\u001b[0m             \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuiltin_trap\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2361\u001b[0m                 \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mmagic_arg_s\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2362\u001b[0;31m                 \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2363\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2364\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/envs/kgtk-env/lib/python3.7/site-packages/IPython/core/magics/script.py\u001b[0m in \u001b[0;36mnamed_script_magic\u001b[0;34m(line, cell)\u001b[0m\n\u001b[1;32m    140\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    141\u001b[0m                 \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscript\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 142\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshebang\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    144\u001b[0m         \u001b[0;31m# write a basic docstring:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<decorator-gen-110>\u001b[0m in \u001b[0;36mshebang\u001b[0;34m(self, line, cell)\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/envs/kgtk-env/lib/python3.7/site-packages/IPython/core/magic.py\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(f, *a, **k)\u001b[0m\n\u001b[1;32m    185\u001b[0m     \u001b[0;31m# but it's overkill for just that one bit of state.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    186\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mmagic_deco\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 187\u001b[0;31m         \u001b[0mcall\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    188\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    189\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/opt/anaconda3/envs/kgtk-env/lib/python3.7/site-packages/IPython/core/magics/script.py\u001b[0m in \u001b[0;36mshebang\u001b[0;34m(self, line, cell)\u001b[0m\n\u001b[1;32m    243\u001b[0m             \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstderr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflush\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    244\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraise_error\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreturncode\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 245\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mCalledProcessError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreturncode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstderr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    247\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_run_script\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mto_close\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mCalledProcessError\u001b[0m: Command 'b'kgtk clean_data --error-limit 1000000 wikidata_edges_20200504.tsv / remove_columns -c \"$obsolete\"  / filter -p \\' ; P31 ; \\' > graph.tsv\\n'' returned non-zero exit status 2."
+     ]
+    }
+   ],
    "source": [
     "%%bash\n",
     "kgtk clean_data --error-limit 1000000 wikidata_edges_20200504.tsv / remove_columns -c \"$obsolete\"  / filter -p ' ; P31 ; ' > graph.tsv"
diff --git a/examples/Example3.ipynb b/examples/Example3.ipynb
index ce0b2415b..b20af1789 100644
--- a/examples/Example3.ipynb
+++ b/examples/Example3.ipynb
@@ -15,7 +15,7 @@
    "source": [
     "## Preparation\n",
     "\n",
-    "To run this notebook, Carol would need the Wikidata edges file. We will work with the Wikidata file we extracted in `Example2` for Bob."
+    "To run this notebook, Carol would need the Wikidata edges file. We will work with the Wikidata file we downloaded and unpacked in `Example2` for Bob."
    ]
   },
   {
@@ -29,13 +29,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
     "%%bash\n",
-    "    kgtk filter -p ' ;  P31 ; Q5' wikidata_edges_20200504.tsv > people.tsv\n",
-    "    kgtk filter -p ' ;  ; Q30' wikidata_edges_20200504.tsv > usa.tsv"
+    "kgtk filter -p ' ; P279 ; ' wikidata_edges_20200504.tsv > subclass.tsv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "kgtk filter -p ' Q8023,Q483203,Q1426 ;  P106 ; ' wikidata_edges_20200504.tsv > people.tsv\n"
    ]
   },
   {
@@ -47,12 +56,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
     "%%bash\n",
-    "kgtk zconcat -o merged.tsv people.tsv usa.tsv "
+    "kgtk cat people.tsv subclass.tsv / newsort -c \"node1\" -o cat.tsv"
    ]
   },
   {
@@ -69,22 +78,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%bash\n",
-    "kgtk sort -c \"node1,label\" merged.tsv > cat.tsv"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
     "%%bash\n",
-    "kgtk reachable_nodes cat.tsv --subj 1 --pred 2 --obj 3 --props P31,P279 --root Q1001698 > cat2.tsv"
+    "kgtk reachable_nodes cat.tsv --subj 1 --pred 2 --obj 3 --props P31,P279 --root \"Q1001698, Q5\" > reachable.tsv"
    ]
   },
   {

From 69cc88f84c3303b464dcb0ca0265172e0c25b1ff Mon Sep 17 00:00:00 2001
From: Filip Ilievski <filip.dbrsk@gmail.com>
Date: Thu, 21 May 2020 08:46:13 -0700
Subject: [PATCH 273/278] example 3 runs well

---
 examples/Example3.ipynb | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/examples/Example3.ipynb b/examples/Example3.ipynb
index b20af1789..f5dc64bd7 100644
--- a/examples/Example3.ipynb
+++ b/examples/Example3.ipynb
@@ -6,7 +6,7 @@
    "source": [
     "# Example Scenario 1: Obtaining embeddings for strong relations\n",
     "\n",
-    "*Carol would like to combine two subsets of Wikidata: one containing all people, and the other containing all edges that point to USA. The combined  file  needs to be sorted by subject, after which she would compute the set of reachable nodes via the properties `instance of (P31)` or `subclass of (P279)`.*"
+    "*Carol would like to combine two subsets of Wikidata: one containing all subclass relations, and the other containing occuppations for several notable people. The combined  file  needs to be sorted by subject, after which she would compute the set of reachable nodes for those people via the properties `occupation (P106)` or `subclass of (P279)`.*"
    ]
   },
   {
@@ -51,12 +51,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Then, she can merge the two files into one and perform further operations:"
+    "Then, she can merge the two files into one, sort that file, and generate the set of reachable nodes for the three nodes of interest."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -65,25 +65,23 @@
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "## Missing pieces\n",
-    "* get the wikidata file\n",
-    "* perhaps make it a tsv\n",
-    "* concatenate two files - test\n",
-    "* sort test\n",
-    "* reachable nodes test"
+    "%%bash\n",
+    "kgtk reachable_nodes cat.tsv --subj 1 --pred 2 --obj 3 --props P106,P279 --root \"Q8023,Q483203,Q1426\"  -o reachable.tsv"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 12,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "%%bash\n",
-    "kgtk reachable_nodes cat.tsv --subj 1 --pred 2 --obj 3 --props P31,P279 --root \"Q1001698, Q5\" > reachable.tsv"
+    "## Missing pieces\n",
+    "* get the wikidata file\n",
+    "* perhaps make it a tsv\n",
+    "* sort function integrated in kgtk dev"
    ]
   },
   {

From 3eefc820da1b31c0d1274ad5050eb4afda75d71c Mon Sep 17 00:00:00 2001
From: Filip Ilievski <filip.dbrsk@gmail.com>
Date: Thu, 21 May 2020 09:06:10 -0700
Subject: [PATCH 274/278] notebooks updated

---
 examples/CSKG.ipynb     |  39 +++++++++++++
 examples/Example1.ipynb |  40 ++++++++------
 examples/Example2.ipynb | 119 ++--------------------------------------
 3 files changed, 69 insertions(+), 129 deletions(-)
 create mode 100644 examples/CSKG.ipynb

diff --git a/examples/CSKG.ipynb b/examples/CSKG.ipynb
new file mode 100644
index 000000000..cd63cf8fc
--- /dev/null
+++ b/examples/CSKG.ipynb
@@ -0,0 +1,39 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# CSKG use case"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/Example1.ipynb b/examples/Example1.ipynb
index b9ed01809..94aced85f 100644
--- a/examples/Example1.ipynb
+++ b/examples/Example1.ipynb
@@ -45,16 +45,30 @@
     "## Implementation in KGTK"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Separate sort"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
     "%%bash\n",
-    "kgtk import_conceptnet --english_only examples/conceptnet-assertions-5.7.0.csv / \\\n",
-    "            filter -p \" ; /r/Causes,/r/UsedFor,/r/Synonym,/r/DefinedAs,/r/IsA ; \" / \\\n",
-    "            sort -c 1,2,3 -o sorted.tsv tmp.tsv"
+    "kgtk import_conceptnet --english_only conceptnet-assertions-5.7.0.csv / \\\n",
+    "            filter -p \" ; /r/Causes,/r/UsedFor,/r/Synonym,/r/DefinedAs,/r/IsA ; \" > tmp.tsv\n",
+    "kgtk newsort -c 1,2,3 -o sorted.tsv tmp.tsv"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Sort together"
    ]
   },
   {
@@ -63,23 +77,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%%bash\n",
-    "kgtk sort -c 1,2,3 -o sorted.tsv tmp.tsv"
+    "# %%bash\n",
+    "# kgtk import_conceptnet --english_only conceptnet-assertions-5.7.0.csv / \\\n",
+    "#             filter -p \" ; /r/Causes,/r/UsedFor,/r/Synonym,/r/DefinedAs,/r/IsA ; \" / \\\n",
+    "#             newsort -c 1,2,3 -o sorted.tsv #tmp.tsv"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████| 674/674 [01:08<00:00,  9.90it/s]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%bash\n",
     "kgtk text_embedding --debug --embedding-projector-metadata-path none \\\n",
diff --git a/examples/Example2.ipynb b/examples/Example2.ipynb
index 09c482119..df973bdf0 100644
--- a/examples/Example2.ipynb
+++ b/examples/Example2.ipynb
@@ -57,7 +57,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -74,126 +74,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "id,rank,node2;magnitude,node2;unit,node2;item,node2;lower,node2;upper,node2;entity-type,node2;longitude,node2;latitude,node2;date,node2;calendar,node2;precision\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%bash\n",
-    "echo \"$obsolete\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "IOPub data rate exceeded.\n",
-      "The notebook server will temporarily stop sending output\n",
-      "to the client in order to avoid crashing it.\n",
-      "To change this limit, set the config variable\n",
-      "`--NotebookApp.iopub_data_rate_limit`.\n",
-      "\n",
-      "Current values:\n",
-      "NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n",
-      "NotebookApp.rate_limit_window=3.0 (secs)\n",
-      "\n"
-     ]
-    },
-    {
-     "ename": "CalledProcessError",
-     "evalue": "Command 'b'kgtk clean_data --error-limit 1000000 wikidata_edges_20200504.tsv / remove_columns -c \"$obsolete\"  / filter -p \\' ; P31 ; \\' > graph.tsv\\n'' returned non-zero exit status 2.",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mCalledProcessError\u001b[0m                        Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-30-3fac4f7494c9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_cell_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'bash'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m''\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'kgtk clean_data --error-limit 1000000 wikidata_edges_20200504.tsv / remove_columns -c \"$obsolete\"  / filter -p \\' ; P31 ; \\' > graph.tsv\\n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m~/opt/anaconda3/envs/kgtk-env/lib/python3.7/site-packages/IPython/core/interactiveshell.py\u001b[0m in \u001b[0;36mrun_cell_magic\u001b[0;34m(self, magic_name, line, cell)\u001b[0m\n\u001b[1;32m   2360\u001b[0m             \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuiltin_trap\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2361\u001b[0m                 \u001b[0margs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mmagic_arg_s\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2362\u001b[0;31m                 \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2363\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2364\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/opt/anaconda3/envs/kgtk-env/lib/python3.7/site-packages/IPython/core/magics/script.py\u001b[0m in \u001b[0;36mnamed_script_magic\u001b[0;34m(line, cell)\u001b[0m\n\u001b[1;32m    140\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    141\u001b[0m                 \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mscript\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 142\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshebang\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    144\u001b[0m         \u001b[0;31m# write a basic docstring:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m<decorator-gen-110>\u001b[0m in \u001b[0;36mshebang\u001b[0;34m(self, line, cell)\u001b[0m\n",
-      "\u001b[0;32m~/opt/anaconda3/envs/kgtk-env/lib/python3.7/site-packages/IPython/core/magic.py\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(f, *a, **k)\u001b[0m\n\u001b[1;32m    185\u001b[0m     \u001b[0;31m# but it's overkill for just that one bit of state.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    186\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mmagic_deco\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 187\u001b[0;31m         \u001b[0mcall\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    188\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    189\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mcallable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/opt/anaconda3/envs/kgtk-env/lib/python3.7/site-packages/IPython/core/magics/script.py\u001b[0m in \u001b[0;36mshebang\u001b[0;34m(self, line, cell)\u001b[0m\n\u001b[1;32m    243\u001b[0m             \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstderr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mflush\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    244\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraise_error\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreturncode\u001b[0m\u001b[0;34m!=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 245\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mCalledProcessError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreturncode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstderr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    247\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_run_script\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mp\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mto_close\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mCalledProcessError\u001b[0m: Command 'b'kgtk clean_data --error-limit 1000000 wikidata_edges_20200504.tsv / remove_columns -c \"$obsolete\"  / filter -p \\' ; P31 ; \\' > graph.tsv\\n'' returned non-zero exit status 2."
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%bash\n",
-    "kgtk clean_data --error-limit 1000000 wikidata_edges_20200504.tsv / remove_columns -c \"$obsolete\"  / filter -p ' ; P31 ; ' > graph.tsv"
+    "kgtk clean_data --error-limit 1000000 wikidata_edges_20200504.tsv 2> /dev/null / remove_columns -c \"$obsolete\"  / filter -p ' ; P31 ; ' > graph.tsv"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "node1\tproperty\tnode2\tid\n",
-      "Q8\tQ8\tQ331769\tQ8-Q8-0\n",
-      "Q8\tQ8\tQ60539479\tQ8-Q8-1\n",
-      "Q8\tQ8\tQ9415\tQ8-Q8-2\n",
-      "Q24\tQ24\tQ15632617\tQ24-Q24-3\n",
-      "Q24\tQ24\tQ15773317\tQ24-Q24-4\n",
-      "Q1868\tQ1868\tQ5\tQ1868-Q1868-5\n",
-      "Q45\tQ45\tQ3624078\tQ45-Q45-6\n",
-      "Q45\tQ45\tQ6256\tQ45-Q45-7\n",
-      "Q45\tQ45\tQ20181813\tQ45-Q45-8\n",
-      "Q8\tvertex_in_degree\t0\tQ8-vertex_in_degree-0\n",
-      "Q8\tvertex_out_degree\t3\tQ8-vertex_out_degree-1\n",
-      "Q8\tvertex_pagerank\t0.060975257748869374\tQ8-vertex_pagerank-2\n",
-      "Q331769\tvertex_in_degree\t1\tQ331769-vertex_in_degree-3\n",
-      "Q331769\tvertex_out_degree\t0\tQ331769-vertex_out_degree-4\n",
-      "Q331769\tvertex_pagerank\t0.07825156317735428\tQ331769-vertex_pagerank-5\n",
-      "Q60539479\tvertex_in_degree\t1\tQ60539479-vertex_in_degree-6\n",
-      "Q60539479\tvertex_out_degree\t0\tQ60539479-vertex_out_degree-7\n",
-      "Q60539479\tvertex_pagerank\t0.07825156317735428\tQ60539479-vertex_pagerank-8\n",
-      "Q9415\tvertex_in_degree\t1\tQ9415-vertex_in_degree-9\n",
-      "Q9415\tvertex_out_degree\t0\tQ9415-vertex_out_degree-10\n",
-      "Q9415\tvertex_pagerank\t0.07825156317735428\tQ9415-vertex_pagerank-11\n",
-      "Q24\tvertex_in_degree\t0\tQ24-vertex_in_degree-12\n",
-      "Q24\tvertex_out_degree\t2\tQ24-vertex_out_degree-13\n",
-      "Q24\tvertex_pagerank\t0.060975257748869374\tQ24-vertex_pagerank-14\n",
-      "Q15632617\tvertex_in_degree\t1\tQ15632617-vertex_in_degree-15\n",
-      "Q15632617\tvertex_out_degree\t0\tQ15632617-vertex_out_degree-16\n",
-      "Q15632617\tvertex_pagerank\t0.08688971589159675\tQ15632617-vertex_pagerank-17\n",
-      "Q15773317\tvertex_in_degree\t1\tQ15773317-vertex_in_degree-18\n",
-      "Q15773317\tvertex_out_degree\t0\tQ15773317-vertex_out_degree-19\n",
-      "Q15773317\tvertex_pagerank\t0.08688971589159675\tQ15773317-vertex_pagerank-20\n",
-      "Q1868\tvertex_in_degree\t0\tQ1868-vertex_in_degree-21\n",
-      "Q1868\tvertex_out_degree\t1\tQ1868-vertex_out_degree-22\n",
-      "Q1868\tvertex_pagerank\t0.060975257748869374\tQ1868-vertex_pagerank-23\n",
-      "Q5\tvertex_in_degree\t1\tQ5-vertex_in_degree-24\n",
-      "Q5\tvertex_out_degree\t0\tQ5-vertex_out_degree-25\n",
-      "Q5\tvertex_pagerank\t0.1128041740343241\tQ5-vertex_pagerank-26\n",
-      "Q45\tvertex_in_degree\t0\tQ45-vertex_in_degree-27\n",
-      "Q45\tvertex_out_degree\t3\tQ45-vertex_out_degree-28\n",
-      "Q45\tvertex_pagerank\t0.060975257748869374\tQ45-vertex_pagerank-29\n",
-      "Q3624078\tvertex_in_degree\t1\tQ3624078-vertex_in_degree-30\n",
-      "Q3624078\tvertex_out_degree\t0\tQ3624078-vertex_out_degree-31\n",
-      "Q3624078\tvertex_pagerank\t0.07825156317735428\tQ3624078-vertex_pagerank-32\n",
-      "Q6256\tvertex_in_degree\t1\tQ6256-vertex_in_degree-33\n",
-      "Q6256\tvertex_out_degree\t0\tQ6256-vertex_out_degree-34\n",
-      "Q6256\tvertex_pagerank\t0.07825156317735428\tQ6256-vertex_pagerank-35\n",
-      "Q20181813\tvertex_in_degree\t1\tQ20181813-vertex_in_degree-36\n",
-      "Q20181813\tvertex_out_degree\t0\tQ20181813-vertex_out_degree-37\n",
-      "Q20181813\tvertex_pagerank\t0.07825156317735428\tQ20181813-vertex_pagerank-38\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%bash\n",
     "kgtk graph_statistics --directed --degrees --pagerank graph.tsv"

From d3f7a452d0ffcbe80fb04980b90029b39b45420c Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 21 May 2020 10:46:12 -0700
Subject: [PATCH 275/278] Fixed a typo.

---
 docs/cat.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/cat.md b/docs/cat.md
index e359d12ab..dfa427294 100644
--- a/docs/cat.md
+++ b/docs/cat.md
@@ -7,7 +7,7 @@ Input and output files may be (de)compressed using a algorithm selected
 by the file extension: .bz2 .gz .lz4 .xy
 
 When merging the column headers, KGTK's required column aliases are respected,
-which the leftmost alias seen taking priority.  For example, if the first
+with the leftmost alias seen taking priority.  For example, if the first
 input file has a "node1" column and the second has a "from" column, the two
 columns will be combined as the "node1" column in the output file.
 

From 2df50a6ea0dc2a6819cbe3043f7ecba74c3de2bf Mon Sep 17 00:00:00 2001
From: Craig Milo Rogers <rogers@isi.edu>
Date: Thu, 21 May 2020 11:04:15 -0700
Subject: [PATCH 276/278] Add some examples for the join command.

---
 docs/join.md                        | 109 ++++++++++++++++++++++++++++
 kgtk/join/test/kgtkjoiner-file1.tsv |   7 ++
 kgtk/join/test/kgtkjoiner-file2.tsv |   6 ++
 3 files changed, 122 insertions(+)
 create mode 100644 kgtk/join/test/kgtkjoiner-file1.tsv
 create mode 100644 kgtk/join/test/kgtkjoiner-file2.tsv

diff --git a/docs/join.md b/docs/join.md
index 1dbbfa027..ee65f1616 100644
--- a/docs/join.md
+++ b/docs/join.md
@@ -62,3 +62,112 @@ optional arguments:
                         Perform a right outer join (default=False).
   -v, --verbose         Print additional progress messages (default=False).
 ```
+
+## Examples
+
+Suppose that `file1.tsv` contains the following table in KGTK format:
+
+| node1 | label   | node2 | location |
+| ----- | ------- | ----- | -------- |
+| john  | zipcode | 12345 | home     |
+| john  | zipcode | 12346 | work     |
+| peter | zipcode | 12040 | home     |
+| peter | zipcode | 12040 | work     |
+| steve | zipcode | 45601 | home     |
+| steve | zipcode | 45601 | work     |
+
+and `file2.tsv` contains the following table in KGTK format:
+
+| node1  | label    | node2      | years |
+| ------ | -------- | ---------- | ----- |
+| john   | position | programmer | 3     |
+| peter  | position | engineer   | 2     |
+| edward | position | supervisor | 10    |
+| john   | laptop   | dell       | 4     |
+| peter  | laptop   | apple      | 7     |
+
+Do an inner join on two KGTK files on node1, sending the output to standard output.
+
+```bash
+kgtk join file1.tsv file2.tsv
+```
+
+The result will be the following table in KGTK format:
+
+| node1 | label    | node2      | location | years |
+| ----- | -------- | ---------- | -------- | ----- |
+| john  | zipcode  | 12345      | home     |       |
+| john  | zipcode  | 12346      | work     |       |
+| peter | zipcode  | 12040      | home     |       |
+| peter | zipcode  | 12040      | work     |       |
+| john  | position | programmer |          | 3     |
+| peter | position | engineer   |          | 2     |
+| john  | laptop   | dell       |          | 4     |
+| peter | laptop   | apple      |          | 7     |
+
+
+Do a left outer join on two KGTK files on node1, sending the output to standard output.
+
+```bash
+kgtk join file1.tsv file2.tsv --left-join
+```
+
+The result will be the following table in KGTK format:
+
+| node1 | label    | node2      | location | years |
+| ----- | -------- | ---------- | -------- | ----- |
+| john  | zipcode  | 12345      | home     |       |
+| john  | zipcode  | 12346      | work     |       |
+| peter | zipcode  | 12040      | home     |       |
+| peter | zipcode  | 12040      | work     |       |
+| steve | zipcode  | 45601      | home     |       |
+| steve | zipcode  | 45601      | work     |       |
+| john  | position | programmer |          | 3     |
+| peter | position | engineer   |          | 2     |
+| john  | laptop   | dell       |          | 4     |
+| peter | laptop   | apple      |          | 7     |
+
+
+Do a right outer join on two KGTK files on node1, sending the output to standard output.
+
+```bash
+kgtk join file1.tsv file2.tsv --right-join
+```
+
+The result will be the following table in KGTK format:
+
+| node1  | label    | node2      | location | years |
+| ------ | -------- | ---------- | -------- | ----- |
+| john   | zipcode  | 12345      | home     |       |
+| john   | zipcode  | 12346      | work     |       |
+| peter  | zipcode  | 12040      | home     |       |
+| peter  | zipcode  | 12040      | work     |       |
+| john   | position | programmer |          | 3     |
+| peter  | position | engineer   |          | 2     |
+| edward | position | supervisor |          | 10    |
+| john   | laptop   | dell       |          | 4     |
+| peter  | laptop   | apple      |          | 7     |
+
+Do a full outer join on two KGTK files on node1, sending the output to standard output.
+This produces the same output as the `kgtk cat` command.
+
+```bash
+kgtk join file1.tsv file2.tsv --left-join --right-join
+```
+
+The result will be the following table in KGTK format:
+
+| node1  | label    | node2      | location | years |
+| ------ | -------- | ---------- | -------- | ----- |
+| john   | zipcode  | 12345      | home     |       |
+| john   | zipcode  | 12346      | work     |       |
+| peter  | zipcode  | 12040      | home     |       |
+| peter  | zipcode  | 12040      | work     |       |
+| steve  | zipcode  | 45601      | home     |       |
+| steve  | zipcode  | 45601      | work     |       |
+| john   | position | programmer |          | 3     |
+| peter  | position | engineer   |          | 2     |
+| edward | position | supervisor |          | 10    |
+| john   | laptop   | dell       |          | 4     |
+| peter  | laptop   | apple      |          | 7     |
+
diff --git a/kgtk/join/test/kgtkjoiner-file1.tsv b/kgtk/join/test/kgtkjoiner-file1.tsv
new file mode 100644
index 000000000..cfa4f7cb4
--- /dev/null
+++ b/kgtk/join/test/kgtkjoiner-file1.tsv
@@ -0,0 +1,7 @@
+node1	label	node2	location
+john	zipcode	12345	home
+john	zipcode	12346	work
+peter	zipcode	12040	home
+peter	zipcode	12040	work
+steve	zipcode	45601	home
+steve	zipcode	45601	work
diff --git a/kgtk/join/test/kgtkjoiner-file2.tsv b/kgtk/join/test/kgtkjoiner-file2.tsv
new file mode 100644
index 000000000..26b2429a0
--- /dev/null
+++ b/kgtk/join/test/kgtkjoiner-file2.tsv
@@ -0,0 +1,6 @@
+node1	label	node2	years
+john	position	programmer	3
+peter	position	engineer	2
+edward	position	supervisor	10
+john	laptop	dell	4
+peter	laptop	apple	7

From e336d51ed1d0c3587f4b61b5b74375e2cf425b4c Mon Sep 17 00:00:00 2001
From: Hans Chalupsky <hans@isi.edu>
Date: Thu, 21 May 2020 12:16:41 -0700
Subject: [PATCH 277/278] Generalized in_bufsize handling to deal with sh
 1.13's increased pickiness. Added some preliminary debug option handling,
 still need to link that in properly. NOTE: this duplicates a commit on the
 master branch (git mistake).

---
 kgtk/cli/zconcat.py | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/kgtk/cli/zconcat.py b/kgtk/cli/zconcat.py
index d38eda7fc..623da175a 100644
--- a/kgtk/cli/zconcat.py
+++ b/kgtk/cli/zconcat.py
@@ -26,12 +26,13 @@ def add_arguments(parser):
 
 ### general command utilities (some of these should make it into a more central location):
 
-tmp_dir = '/tmp'  # this should be configurable
+tmp_dir = '/tmp'       # this should be configurable
+kgtk_encoding = 'utf8' # this should be configurable
 
 def make_temp_file(prefix='kgtk.'):
     return tempfile.mkstemp(dir=tmp_dir, prefix=prefix)[1]
 
-def get_buf_sizes(output=None, _tty_out=True, _piped=False):
+def get_buf_sizes(input=None, output=None, _tty_out=True, _piped=False):
     """Determine stream buffer sizes to use.  Since sh has complex rules for this depending on
     what streams are used and flags are set, we simply try this here and see if it lets us do it.
     We want to make sure to use large output buffers whenever possible for speed.
@@ -39,6 +40,10 @@ def get_buf_sizes(output=None, _tty_out=True, _piped=False):
     """
     in_bufsize = 2**16
     out_bufsize = in_bufsize
+    try:
+        sh.ls.bake(_in=input, _in_bufsize=in_bufsize)
+    except:
+        in_bufsize = None
     try:
         sh.ls.bake(_out=output, _out_bufsize=out_bufsize, _tty_out=_tty_out, _piped=_piped)
     except:
@@ -68,18 +73,20 @@ def get_stream_header(stream, n=1, unit='line', preserve=False):
         with open(temp, 'wb') as out:
             out.write(header.getvalue())
         cleanup = lambda cmd, status, exit_code: sh.rm('-f', temp)
-        in_bufsize, out_bufsize = get_buf_sizes(_tty_out=False, _piped=True)
+        in_bufsize, out_bufsize = get_buf_sizes(input=stream, _tty_out=False, _piped=True)
         return header.getvalue(), [sh.cat.bake(temp, '-', _in=stream, _in_bufsize=in_bufsize, _piped=True, _done=cleanup)]
     else:
         return header.getvalue()
 
-def run_sh_commands(commands):
+def run_sh_commands(commands, debug=False):
     """Run a single or list of prebaked sh `commands', compose them with pipes when they
     are marked with piped=True or bg=True.  Return the last run command which can be used
     to access the final exit_code and other state.
     """
     if not hasattr(commands, "__iter__"):
         commands = [commands]
+    if debug:
+        sys.stderr.write('zconcat.run_sh_commands: %s\n' % commands)
     piped_output = None
     last_cmd = None
     for cmd in commands:
@@ -146,7 +153,7 @@ def build_command_1(input=None, output=None, gz=False, bz2=False, xz=False, _pip
         outfile = open(output, _out_mode)
         output = outfile
     compress = get_compress_command(compress_switch_to_file_type(gz, bz2, xz))
-    in_bufsize, out_bufsize = get_buf_sizes(output, not compress, _piped)
+    in_bufsize, out_bufsize = get_buf_sizes(output=output, _tty_out=not compress, _piped=_piped)
     
     if input == '-':
         # process input piped in from stdin, possibly compressed in different ways:
@@ -191,13 +198,13 @@ def build_command(inputs=[], output=None, gz=False, bz2=False, xz=False):
         out_mode='ab'
     return command
 
-def run(inputs=[], output=None, gz=False, bz2=False, xz=False):
+def run(inputs=[], output=None, gz=False, bz2=False, xz=False, _debug=False):
     """Run zconcat according to the provided command-line arguments.
     """
+    # TO DO: figure out how to properly access shared --debug option
     try:
         commands = build_command(inputs=inputs, output=output, gz=gz, bz2=bz2, xz=xz)
-        #print(commands)
-        return run_sh_commands(commands).exit_code
+        return run_sh_commands(commands, debug=_debug).exit_code
     except sh.SignalException_SIGPIPE:
         # cleanup in case we piped and terminated prematurely:
         sys.stdout.flush()

From 407a89a9951c97c8c8b8661fbeb8b1204352f07a Mon Sep 17 00:00:00 2001
From: Hans Chalupsky <hans@isi.edu>
Date: Thu, 21 May 2020 12:17:26 -0700
Subject: [PATCH 278/278] build_command.record_key_spec:   Handle the fact that
 sh 1.13 can now give us either strings or bytes depending on the   type of
 file or stream we are processing. NOTE: this duplicates a commit on the
 master branch (git mistake).

---
 kgtk/cli/sort.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kgtk/cli/sort.py b/kgtk/cli/sort.py
index dd8fbab4e..10147d6d6 100644
--- a/kgtk/cli/sort.py
+++ b/kgtk/cli/sort.py
@@ -112,12 +112,18 @@ def build_command(input=None, output=None, columns='1', colsep='\t', options='',
 
     # define these in here, so we can pass in some process-local variables via closures:
     def record_key_spec(chunk):
+        # starting with sh 1.13 it looks like we can get either strings or bytes here;
+        # if we get bytes we convert to an identical string using `latin1' encoding:
+        if isinstance(chunk, bytes):
+            chunk = chunk.decode('latin1')
         buffer.write(chunk)
         header = buffer.getvalue()
         eol = header.find('\n')
         if eol >= 0:
             with open(sort_env['KGTK_HEADER'], 'w') as out:
                 out.write(header[0:eol+1])
+            # reencode from latin1 to utf8 for header processing:
+            header = header[0:eol].encode('latin1').decode(zcat.kgtk_encoding)
             with open(sort_env['KGTK_SORT_KEY_SPEC'], 'w') as out:
                 out.write(build_sort_key_spec(header, columns, colsep))
             # this signals to ignore the callback once we are done collecting the header: