usc-isi-i2 · HardiRathod · Oct 22, 2021 · Oct 22, 2021 · Oct 22, 2021 · Oct 22, 2021
diff --git a/tl/cli/context-match.py b/tl/cli/context-match.py
@@ -40,6 +40,9 @@ def add_arguments(parser):
     parser.add_argument('--ignore-column-name', action='store',
                         dest='ignore_column_name', default=None,
                         help='This column is used to consider only few rows by setting to 1.')
+    parser.add_argument('--pseudo-gt-column-name', action='store',
+                        dest='pseudo_gt_column_name', default=None,
+                        help='This column is used to consider only few rows by setting to 0.')
     parser.add_argument('--context-properties-path', action='store',
                         dest='context_properties_path', default=None,
                         help="The path where relevant properties will be stored.")
@@ -48,6 +51,9 @@ def add_arguments(parser):
     parser.add_argument('--save-relevant-properties', action='store_true', default=False,
                         dest='save_relevant_properties',
                         help="if set, relevant properties are written a file.")
+    parser.add_argument('--property-relevance-threshold', action='store', type=float,
+                        default=0, dest='property_relevance_threshold',
+                        help='The minimum property score to be used for determining the relevance of a property.')
     # output
     parser.add_argument('-o', '--output-column-name', action='store', dest='output_column', default="context_score",
                         help='The output column is the named column of the score for the matches '
@@ -65,15 +71,20 @@ def run(**kwargs):
         similarity_string_threshold = kwargs.pop("similarity_string_threshold")
         similarity_quantity_threshold = kwargs.pop("similarity_quantity_threshold")
         ignore_column_name = kwargs.pop("ignore_column_name")
+        property_relevance_threshold = kwargs.pop("property_relevance_threshold")
+        pseudo_gt_column_name = kwargs.pop("pseudo_gt_column_name")
 
-        obj = TableContextMatches(context_path=context_file_path, context_dict=None, input_path=input_file_path,
+        obj = TableContextMatches(context_path=context_file_path, context_dict=None,
+                                  input_path=input_file_path,
                                   context_matches_path=None, label_column='label_clean',
                                   ignore_column=ignore_column_name,
+                                  pseudo_column=pseudo_gt_column_name,
                                   relevant_properties_file=kwargs['context_properties_path'],
                                   use_relevant_properties=kwargs['use_relevant_properties'],
                                   save_relevant_properties=kwargs['save_relevant_properties'],
                                   string_similarity_threshold=similarity_string_threshold,
                                   quantity_similarity_threshold=similarity_quantity_threshold,
+                                  property_relevance_threshold=property_relevance_threshold,
                                   output_column_name=output_column_name)
         start = time.time()
         result_df = obj.input_df

diff --git a/tl/cli/metrics.py b/tl/cli/metrics.py
@@ -27,17 +27,21 @@ def add_arguments(parser):
     parser.add_argument('--tag', action='store', type=str, dest='tag', default='',
                         help='a tag to use in the output file to identify the results of running the given pipeline')
 
+    parser.add_argument('--method', action='store', dest='method',
+                        default="column",
+                        choices=["column", 'cell'],
+                        help="The method for calculating metrics.")
+
     parser.add_argument('input_file', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
 
-
 def run(**kwargs):
     from tl.evaluation import evaluation
     import pandas as pd
     import time
     try:
         df = pd.read_csv(kwargs['input_file'], dtype=object)
         start = time.time()
-        odf = evaluation.metrics(kwargs['column'], k=kwargs['k'], df=df, tag=kwargs['tag'])
+        odf = evaluation.metrics(kwargs['column'], k=kwargs['k'], df=df, tag=kwargs['tag'], method=kwargs['method'])
         end = time.time()
         logger = Logger(kwargs["logfile"])
         logger.write_to_file(args={

diff --git a/tl/evaluation/evaluation.py b/tl/evaluation/evaluation.py
@@ -16,15 +16,12 @@ def read_csv(file_path, dtype=object):
 def ground_truth_labeler(gt_file_path, file_path=None, df=None):
     """
     compares each candidate for the input cells with the ground truth value for that cell and adds an evaluation label.
-
     Args:
         gt_file_path: ground truth file path.
         column: column name with ranking scores
         file_path: input file path
         df: or input dataframe
-
     Returns: a dataframe with added column `evaluation_label`
-
     """
     if file_path is None and df is None:
         raise RequiredInputParameterMissingException(
@@ -58,20 +55,59 @@ def assign_evaluation_label(row):
         return 1
     return -1
 
+def calculate_metrics_by_group(cgdf, k, method, col, column, tag):
+    results = []
+    # true positive for precision at 1
+    tp_ps = []
 
-def metrics(column, file_path=None, df=None, k: int = 1, tag=""):
+    # true positive for recall at k
+    tp_rs = defaultdict(list)
+    if method == 'column':
+        grouped = cgdf.groupby(by=['row'])
+    else:
+        grouped = cgdf.groupby(by=['column-id', 'row'])
+    n = len(grouped)
+    for key, gdf in grouped:
+        gdf = gdf.sort_values(by=[column, 'kg_id'], ascending=[False, True]).reset_index()
+
+        for i, row in gdf.iterrows():
+            if float(row['evaluation_label']) == 1 and row[column] == row['max_score']:
+                tp_ps.append(key)
+
+            # this df is sorted by score, so highest ranked candidate is rank 1 and so on...
+            rank = i + 1
+            if rank <= k and (row['evaluation_label'] == '1' or row['evaluation_label'] == 1.0):
+                tp_rs[k].append(key)
+
+    precision = float(len(tp_ps)) / float(n)
+    recall = {k: float(len(each_tp_rs)) / float(n) for k, each_tp_rs in tp_rs.items()}
+    # sort as k value increasing
+    recall = {k: v for k, v in sorted(recall.items(), key=lambda x: x[0])}
+
+    for _k, each_recall in recall.items():
+        if precision == 0 and each_recall == 0:
+            f1_score = 0.0
+        else:
+            f1_score = (2 * precision * each_recall) / (precision + each_recall)
+        results.append({"k": _k,
+                        'f1': f1_score,
+                        'precision': precision,
+                        'recall': each_recall,
+                        'column': col,
+                        'tag': tag})
+    return results
+
+
+def metrics(column, file_path=None, df=None, k: int = 1, tag="", method = 'column'):
     """
     computes the precision, recall and f1 score for the tl pipeline.
-
     Args:
         column: column with ranking score
         file_path: input file path
         df: or input dataframe
         k: calculate recall at top k candidates
         tag: a tag to use in the output file to identify the results of running the given pipeline
-
     Returns:
-
     """
     if file_path is None and df is None:
         raise RequiredInputParameterMissingException(
@@ -85,49 +121,24 @@ def metrics(column, file_path=None, df=None, k: int = 1, tag=""):
 
     # replace na to 0.0
     df[column] = df[column].astype(float).fillna(0.0)
-    df['max_score'] = df.groupby(by=['column', 'row'])[column].transform(max)
+    if method == 'column':
+        separating_column = 'column'
+    else:
+        separating_column = 'column-id'
+    df['max_score'] = df.groupby(by=[separating_column, 'row'])[column].transform(max)
 
     # relevant df
     rdf = df[df['evaluation_label'].astype(float) != 0.0]
-
-    col_grouped = rdf.groupby(by=['column'])
-    results = []
-    for col, cgdf in col_grouped:
-        # true positive for precision at 1
-        tp_ps = []
-
-        # true positive for recall at k
-        tp_rs = defaultdict(list)
-        grouped = cgdf.groupby(by=['row'])
-        n = len(grouped)
-        for key, gdf in grouped:
-            gdf = gdf.sort_values(by=[column, 'kg_id'], ascending=[False, True]).reset_index()
-
-            for i, row in gdf.iterrows():
-                if float(row['evaluation_label']) == 1 and row[column] == row['max_score']:
-                    tp_ps.append(key)
-
-                # this df is sorted by score, so highest ranked candidate is rank 1 and so on...
-                rank = i + 1
-                if rank <= k and (row['evaluation_label'] == '1' or row['evaluation_label'] == 1.0):
-                    tp_rs[k].append(key)
-
-        precision = float(len(tp_ps)) / float(n)
-        recall = {k: float(len(each_tp_rs)) / float(n) for k, each_tp_rs in tp_rs.items()}
-        # sort as k value increasing
-        recall = {k: v for k, v in sorted(recall.items(), key=lambda x: x[0])}
-
-        for _k, each_recall in recall.items():
-            if precision == 0 and each_recall == 0:
-                f1_score = 0.0
-            else:
-                f1_score = (2 * precision * each_recall) / (precision + each_recall)
-            results.append({"k": _k,
-                            'f1': f1_score,
-                            'precision': precision,
-                            'recall': each_recall,
-                            'column': col,
-                            'tag': tag})
+    if method == 'column':
+        col_grouped = rdf.groupby(by=['column'])
+        results = []
+        # output_df = calculate_metrics_by_group(col_grouped)
+        for col, cgdf in col_grouped:
+            col_wise_result = calculate_metrics_by_group(cgdf, k, method, col, column, tag)
+            results.extend(col_wise_result)
+
+    else:
+        results = calculate_metrics_by_group(rdf, k, method = method, col = "", column = column, tag = tag)
 
     output_df = pd.DataFrame(results)
     return output_df