From 4f9c123dce1f0f7f5c3cd8fe4f14484dd09f5bb7 Mon Sep 17 00:00:00 2001 From: Gertjan van Noord Date: Fri, 2 Feb 2024 15:44:55 +0100 Subject: [PATCH] list features which occur in best parse --- Disambiguation/features_in_best_parse.py | 65 ++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 Disambiguation/features_in_best_parse.py diff --git a/Disambiguation/features_in_best_parse.py b/Disambiguation/features_in_best_parse.py new file mode 100644 index 000000000..71c0654f1 --- /dev/null +++ b/Disambiguation/features_in_best_parse.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 + +import sys +import errno + +def score(iscore): + (overlap,correct,system,postaglemma,postaglemma_total) = iscore.split('|') + n = max(int(correct),int(system)) + pen = n-int(overlap) + if n: + return 100*(1-pen/n) + else: + return 100 + + +def printit(features): + flist = features.split('|') + for pair in flist: + try: + fields = pair.split('@') + count=fields[0] + feat='@'.join(fields[1:]) + print(feat) + except Exception as e: + if e.errno == errno.EPIPE: + raise + print(pair,file=sys.stderr) + print(e,file=sys.stderr) + + +def main(): + prev_key="" + best_score=0 + + for line in sys.stdin: + try: + fields = line.rstrip().split('#') # (key,ilen,iscore,features) + key = fields[0] + ilen = fields[1] + iscore = fields[2] + features = '#'.join(fields[3:]) + if not key == prev_key: + if prev_key: + printit(best_features) + prev_key=key + best_features=features + best_score=score(iscore) + else: + next_score=score(iscore) + if next_score > best_score: + best_features=features + best_score=next_score + except Exception as e: + if e.errno == errno.EPIPE: + raise + print("ignoring line {}".format(line[:70]),file=sys.stderr) + print(e,file=sys.stderr) + + if key: + printit(best_features) + + +if __name__ == "__main__": + main() +