Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updates for context match and metrics #93

Open
wants to merge 7 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion tl/cli/context-match.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ def add_arguments(parser):
parser.add_argument('--ignore-column-name', action='store',
dest='ignore_column_name', default=None,
help='This column is used to consider only few rows by setting to 1.')
parser.add_argument('--pseudo-gt-column-name', action='store',
dest='pseudo_gt_column_name', default=None,
help='This column is used to consider only few rows by setting to 0.')
parser.add_argument('--context-properties-path', action='store',
dest='context_properties_path', default=None,
help="The path where relevant properties will be stored.")
Expand All @@ -48,6 +51,9 @@ def add_arguments(parser):
parser.add_argument('--save-relevant-properties', action='store_true', default=False,
dest='save_relevant_properties',
help="if set, relevant properties are written a file.")
parser.add_argument('--property-relevance-threshold', action='store', type=float,
default=0, dest='property_relevance_threshold',
help='The minimum property score to be used for determining the relevance of a property.')
# output
parser.add_argument('-o', '--output-column-name', action='store', dest='output_column', default="context_score",
help='The output column is the named column of the score for the matches '
Expand All @@ -65,15 +71,20 @@ def run(**kwargs):
similarity_string_threshold = kwargs.pop("similarity_string_threshold")
similarity_quantity_threshold = kwargs.pop("similarity_quantity_threshold")
ignore_column_name = kwargs.pop("ignore_column_name")
property_relevance_threshold = kwargs.pop("property_relevance_threshold")
pseudo_gt_column_name = kwargs.pop("pseudo_gt_column_name")

obj = TableContextMatches(context_path=context_file_path, context_dict=None, input_path=input_file_path,
obj = TableContextMatches(context_path=context_file_path, context_dict=None,
input_path=input_file_path,
context_matches_path=None, label_column='label_clean',
ignore_column=ignore_column_name,
pseudo_column=pseudo_gt_column_name,
relevant_properties_file=kwargs['context_properties_path'],
use_relevant_properties=kwargs['use_relevant_properties'],
save_relevant_properties=kwargs['save_relevant_properties'],
string_similarity_threshold=similarity_string_threshold,
quantity_similarity_threshold=similarity_quantity_threshold,
property_relevance_threshold=property_relevance_threshold,
output_column_name=output_column_name)
start = time.time()
result_df = obj.input_df
Expand Down
8 changes: 6 additions & 2 deletions tl/cli/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,21 @@ def add_arguments(parser):
parser.add_argument('--tag', action='store', type=str, dest='tag', default='',
help='a tag to use in the output file to identify the results of running the given pipeline')

parser.add_argument('--method', action='store', dest='method',
default="column",
choices=["column", 'cell'],
help="The method for calculating metrics.")

parser.add_argument('input_file', nargs='?', type=argparse.FileType('r'), default=sys.stdin)


def run(**kwargs):
from tl.evaluation import evaluation
import pandas as pd
import time
try:
df = pd.read_csv(kwargs['input_file'], dtype=object)
start = time.time()
odf = evaluation.metrics(kwargs['column'], k=kwargs['k'], df=df, tag=kwargs['tag'])
odf = evaluation.metrics(kwargs['column'], k=kwargs['k'], df=df, tag=kwargs['tag'], method=kwargs['method'])
end = time.time()
logger = Logger(kwargs["logfile"])
logger.write_to_file(args={
Expand Down
105 changes: 58 additions & 47 deletions tl/evaluation/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,12 @@ def read_csv(file_path, dtype=object):
def ground_truth_labeler(gt_file_path, file_path=None, df=None):
"""
compares each candidate for the input cells with the ground truth value for that cell and adds an evaluation label.

Args:
gt_file_path: ground truth file path.
column: column name with ranking scores
file_path: input file path
df: or input dataframe

Returns: a dataframe with added column `evaluation_label`

"""
if file_path is None and df is None:
raise RequiredInputParameterMissingException(
Expand Down Expand Up @@ -58,20 +55,59 @@ def assign_evaluation_label(row):
return 1
return -1

def calculate_metrics_by_group(cgdf, k, method, col, column, tag):
results = []
# true positive for precision at 1
tp_ps = []

def metrics(column, file_path=None, df=None, k: int = 1, tag=""):
# true positive for recall at k
tp_rs = defaultdict(list)
if method == 'column':
grouped = cgdf.groupby(by=['row'])
else:
grouped = cgdf.groupby(by=['column-id', 'row'])
n = len(grouped)
for key, gdf in grouped:
gdf = gdf.sort_values(by=[column, 'kg_id'], ascending=[False, True]).reset_index()

for i, row in gdf.iterrows():
if float(row['evaluation_label']) == 1 and row[column] == row['max_score']:
tp_ps.append(key)

# this df is sorted by score, so highest ranked candidate is rank 1 and so on...
rank = i + 1
if rank <= k and (row['evaluation_label'] == '1' or row['evaluation_label'] == 1.0):
tp_rs[k].append(key)

precision = float(len(tp_ps)) / float(n)
recall = {k: float(len(each_tp_rs)) / float(n) for k, each_tp_rs in tp_rs.items()}
# sort as k value increasing
recall = {k: v for k, v in sorted(recall.items(), key=lambda x: x[0])}

for _k, each_recall in recall.items():
if precision == 0 and each_recall == 0:
f1_score = 0.0
else:
f1_score = (2 * precision * each_recall) / (precision + each_recall)
results.append({"k": _k,
'f1': f1_score,
'precision': precision,
'recall': each_recall,
'column': col,
'tag': tag})
return results


def metrics(column, file_path=None, df=None, k: int = 1, tag="", method = 'column'):
"""
computes the precision, recall and f1 score for the tl pipeline.

Args:
column: column with ranking score
file_path: input file path
df: or input dataframe
k: calculate recall at top k candidates
tag: a tag to use in the output file to identify the results of running the given pipeline

Returns:

"""
if file_path is None and df is None:
raise RequiredInputParameterMissingException(
Expand All @@ -85,49 +121,24 @@ def metrics(column, file_path=None, df=None, k: int = 1, tag=""):

# replace na to 0.0
df[column] = df[column].astype(float).fillna(0.0)
df['max_score'] = df.groupby(by=['column', 'row'])[column].transform(max)
if method == 'column':
separating_column = 'column'
else:
separating_column = 'column-id'
df['max_score'] = df.groupby(by=[separating_column, 'row'])[column].transform(max)

# relevant df
rdf = df[df['evaluation_label'].astype(float) != 0.0]

col_grouped = rdf.groupby(by=['column'])
results = []
for col, cgdf in col_grouped:
# true positive for precision at 1
tp_ps = []

# true positive for recall at k
tp_rs = defaultdict(list)
grouped = cgdf.groupby(by=['row'])
n = len(grouped)
for key, gdf in grouped:
gdf = gdf.sort_values(by=[column, 'kg_id'], ascending=[False, True]).reset_index()

for i, row in gdf.iterrows():
if float(row['evaluation_label']) == 1 and row[column] == row['max_score']:
tp_ps.append(key)

# this df is sorted by score, so highest ranked candidate is rank 1 and so on...
rank = i + 1
if rank <= k and (row['evaluation_label'] == '1' or row['evaluation_label'] == 1.0):
tp_rs[k].append(key)

precision = float(len(tp_ps)) / float(n)
recall = {k: float(len(each_tp_rs)) / float(n) for k, each_tp_rs in tp_rs.items()}
# sort as k value increasing
recall = {k: v for k, v in sorted(recall.items(), key=lambda x: x[0])}

for _k, each_recall in recall.items():
if precision == 0 and each_recall == 0:
f1_score = 0.0
else:
f1_score = (2 * precision * each_recall) / (precision + each_recall)
results.append({"k": _k,
'f1': f1_score,
'precision': precision,
'recall': each_recall,
'column': col,
'tag': tag})
if method == 'column':
col_grouped = rdf.groupby(by=['column'])
results = []
# output_df = calculate_metrics_by_group(col_grouped)
for col, cgdf in col_grouped:
col_wise_result = calculate_metrics_by_group(cgdf, k, method, col, column, tag)
results.extend(col_wise_result)

else:
results = calculate_metrics_by_group(rdf, k, method = method, col = "", column = column, tag = tag)

output_df = pd.DataFrame(results)
return output_df
Loading