forked from matejklemen/CLARINprojekt2024-koreferencnost
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a037113
commit 2e2f947
Showing
76 changed files
with
6,708 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
FROM python:3.8.11-bullseye | ||
|
||
COPY evaluation_scripts/eval_coref149/scorer ./scorer | ||
COPY evaluation_scripts/eval_coref149/ua-scorer.py . | ||
COPY evaluation_scripts/eval_coref149/evaluate_corefud.py . | ||
COPY evaluation_scripts/eval_coref149/evaluate.py . | ||
COPY evaluation_scripts/eval_coref149/requirements.txt . | ||
RUN pip install --no-cache-dir -r requirements.txt | ||
|
||
COPY run.py . | ||
|
||
ENTRYPOINT ["python3", "run.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
The MIT License (MIT) | ||
|
||
Copyright 2018 Nafise Sadat Moosavi (ns.moosavi at gmail dot com) | ||
Copyright 2021 Juntao Yu (juntao.cn at gmail dot com) | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# eval_senticoref slobench evaluation script | ||
|
||
All commands should be run from the root directory of the repository. | ||
|
||
## Build docker image (from the root directory of this repo): | ||
|
||
``` | ||
docker buildx build --platform linux/amd64 -t eval:eval_coref149 -f evaluation_scripts/eval_coref149/Dockerfile . | ||
``` | ||
|
||
## Run mock evaluation (from the root directory of this repo) | ||
|
||
``` | ||
docker run -it --name eval-container_coref149 --rm \ | ||
-v $PWD/evaluation_scripts/eval_coref149/sample_ground_truth.zip:/ground_truth.zip \ | ||
-v $PWD/evaluation_scripts/eval_coref149/sample_submission.zip:/submission.zip \ | ||
eval:eval_coref149 ground_truth.zip submission.zip | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import os | ||
|
||
from evaluate_corefud import call_scorer | ||
|
||
|
||
def evaluate(data_ground_truth_path, data_submission_path): | ||
try: | ||
# The script is just glue code around ufal/corefud-scorer | ||
metrics = call_scorer(os.path.join(".", data_ground_truth_path, "coref149.conllu"), | ||
os.path.join(".", data_submission_path, "submission.conllu")) | ||
return metrics | ||
except Exception as e: | ||
raise Exception(f'Exception in metric calculation: {e}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import importlib | ||
|
||
from scorer.corefud.reader import CorefUDReader | ||
from scorer.eval import evaluator | ||
uascorer = importlib.import_module("ua-scorer") | ||
|
||
|
||
def call_scorer(ref_file, pred_file): | ||
args = { | ||
"key_file": ref_file, | ||
"sys_file": pred_file, | ||
"metrics": ['muc', 'bcub', 'ceafe', 'ceafm', 'blanc', 'lea', 'mor'], | ||
"keep_singletons": False, | ||
"match": "head", | ||
"zero_match_method": "dependent", | ||
"format": "corefud", | ||
"keep_split_antecedents": False, | ||
"keep_zeros": True, | ||
"evaluate_discourse_deixis": False, | ||
"only_split_antecedent": False, | ||
"allow_boundary_crossing": False, | ||
"np_only": False, | ||
"remove_nested_mentions": False, | ||
"shared_task": None | ||
} | ||
uascorer.process_arguments(args) | ||
reader = CorefUDReader(**args) | ||
reader.get_coref_infos(args["key_file"], args["sys_file"]) | ||
|
||
conll = 0 | ||
conll_subparts_num = 0 | ||
|
||
calculated_metrics = {} | ||
for name, metric in args["metrics"]: | ||
recall, precision, f1 = evaluator.evaluate_documents( | ||
reader.doc_discourse_deixis_infos if args['evaluate_discourse_deixis'] else reader.doc_coref_infos, | ||
metric, | ||
beta=1, | ||
only_split_antecedent=args['only_split_antecedent']) | ||
|
||
calculated_metrics[f"Precision({name})"] = precision | ||
calculated_metrics[f"Recall({name})"] = recall | ||
calculated_metrics[f"F1({name})"] = f1 | ||
|
||
if name in ["muc", "bcub", "ceafe"]: | ||
conll += f1 | ||
conll_subparts_num += 1 | ||
|
||
if conll_subparts_num == 3: | ||
conll = (conll / 3) | ||
calculated_metrics["conll"] = conll | ||
|
||
return calculated_metrics |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
numpy | ||
scipy>=0.17.0 | ||
pytest | ||
udapi>=0.3.0 |
Binary file not shown.
Binary file not shown.
Empty file.
Empty file.
184 changes: 184 additions & 0 deletions
184
Benchmarking_SloBENCH/eval_coref149/scorer/base/mention.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
class Mention: | ||
def __init__(self, matching="exact"): | ||
# here we only include the properties might be used outside the mention class, | ||
# and assign a default value to make sure no error even if fuction not used by | ||
# specific format | ||
self._words = [] # store all word indies | ||
self._wordsset = set() | ||
self._minset = set() | ||
self._is_referring = True # for non-referring | ||
self._is_split_antecedent = False # for split-antecedent | ||
self._split_antecedent_sets = set() # for split-antecedent | ||
self._is_zero = False | ||
# in case of the "head" matching, the two mentions are considered to be the same | ||
# only if their spans as well as their min sets are the same | ||
if matching == "head": | ||
self._eq_match = self._super_exact_match | ||
self._hash_match = self._super_exact_match_hash | ||
# for the remaining matching types, it is sufficient for the spans to be tha same | ||
else: | ||
self._eq_match = self._exact_match | ||
self._hash_match = self._exact_match_hash | ||
|
||
############## Properties ############### | ||
|
||
@property | ||
def words(self): | ||
return self._words | ||
|
||
@property | ||
def start(self): | ||
return self._words[0] | ||
|
||
@property | ||
def end(self): | ||
return self._words[-1] | ||
|
||
@property | ||
def is_zero(self): | ||
return self._is_zero | ||
|
||
@property | ||
def is_referring(self): | ||
return self._is_referring | ||
|
||
@property | ||
def is_split_antecedent(self): | ||
return self._is_split_antecedent | ||
|
||
@property | ||
def split_antecedent_sets(self): | ||
return self._split_antecedent_sets | ||
|
||
############## Operators ############### | ||
|
||
def __getitem__(self, i): | ||
return self._words[i] | ||
|
||
def __len__(self): | ||
return len(self._words) | ||
|
||
def __eq__(self, other): | ||
return self._eq_match(other) | ||
|
||
def __ne__(self, other): | ||
return not self.__eq__(other) | ||
|
||
def __lt__(self, other): | ||
if isinstance(other, self.__class__): | ||
if self._words[0] == other._words[0]: | ||
if self._words[-1] == other._words[-1]: | ||
return len(self._words) < len(other._words) | ||
else: | ||
return self._words[-1] < other._words[-1] | ||
else: | ||
return self._words[0] < other._words[0] | ||
return NotImplemented | ||
|
||
def __le__(self, other): | ||
return self.__lt__(other) or self.__eq__(other) | ||
|
||
def __hash__(self): | ||
if self.is_split_antecedent: | ||
return hash(frozenset(self.split_antecedent_sets)) | ||
return self._hash_match() | ||
|
||
def __str__(self): | ||
if self.is_split_antecedent: | ||
return "({:s})".format(",".join([str(cl[0]) for cl in self.split_antecedent_sets])) | ||
return "({:s})".format( | ||
",".join([str(w) + "*" if self._minset and w in self._minset else str(w) for w in self._words])) | ||
|
||
def __repr__(self): | ||
return str(self) | ||
|
||
def intersection(self, other): | ||
if isinstance(other, self.__class__): | ||
if self._words[0] > other._words[-1] or \ | ||
other._words[0] > self._words[-1]: | ||
return [] | ||
return self._wordsset.intersection(other._wordsset) | ||
return NotImplemented | ||
|
||
############## Matching types ################# | ||
|
||
# both mention span and its min set must be matched exactly | ||
def _super_exact_match(self, other): | ||
if not isinstance(other, self.__class__): | ||
return NotImplemented | ||
|
||
# for split-antecedent we check all the members are the same | ||
if self.is_split_antecedent or other.is_split_antecedent: | ||
return self.split_antecedent_sets == other.split_antecedent_sets | ||
|
||
# check if the mention spans are the same | ||
# TODO rewrite using _wordsset | ||
if len(self._words) != len(other._words): | ||
return False | ||
words_zip = zip(self._words, other._words) | ||
if not all(self_w == other_w for self_w, other_w in words_zip): | ||
return False | ||
|
||
# check if the min spans / heads are the same | ||
return self._minset == other._minset | ||
|
||
# mention span must be matched | ||
def _exact_match(self, other): | ||
if isinstance(other, self.__class__): | ||
# for split-antecedent we check all the members are the same | ||
if self.is_split_antecedent or other.is_split_antecedent: | ||
return self.split_antecedent_sets == other.split_antecedent_sets | ||
else: | ||
if len(self._words) != len(other._words): | ||
return False | ||
words_zip = zip(self._words, other._words) | ||
return all(self_w == other_w for self_w, other_w in words_zip) | ||
|
||
def match_score(self, other, matching): | ||
if not isinstance(other, self.__class__): | ||
return NotImplemented | ||
if matching == "zero-dependent": | ||
return self.zero_dependent_match_score(other) | ||
if matching == "partial-craft": | ||
return self.craft_partial_match_score(other) | ||
if matching == "partial-corefud": | ||
return self.corefud_partial_match_score(other) | ||
if matching == "head": | ||
return self.head_match_score(other) | ||
# exact match | ||
if self.__eq__(other): | ||
return 1.0 | ||
return 0.0 | ||
|
||
# Default (with MIN tag) similar to the CorefUD that allow the response to be part of the key, in the | ||
# sametime the response must include all the words in MIN(head), if the above condition is | ||
# satisfied then a non-zero similarity score based on the proportion of the common words | ||
# (num_of_common_words/total_words_in_key) will be returned otherwise 0 will be returned. | ||
# self = key mention, other = sys mention | ||
def corefud_partial_match_score(self, other): | ||
if self._minset and self._minset.issubset(other._wordsset) and other._wordsset.issubset( | ||
self._wordsset): | ||
return len(self._wordsset & other._wordsset) * 1.0 / len(self._wordsset) | ||
return 0.0 | ||
|
||
# CRAFT (with craft tag) same as the CRAFT 2019 CR task that use the first key span as the MIN and any | ||
# response that overlapping with the MIN (start>=MIN[0] and end <=MIN[1]) will receive a | ||
# non-zero similarity score otherwise a zero will be returned. | ||
# self = key mention, other = sys mention | ||
def craft_partial_match_score(self, other): | ||
# only support UA format yet | ||
return NotImplemented | ||
|
||
# self = key mention, other = sys mention | ||
def head_match_score(self, other): | ||
# only support CorefUD format yet | ||
return NotImplemented | ||
|
||
def zero_dependent_match_score(self, other): | ||
return NotImplemented | ||
|
||
def _exact_match_hash(self): | ||
return hash(frozenset(self._words)) | ||
|
||
def _super_exact_match_hash(self): | ||
return hash((frozenset(self._words), frozenset(self._minset))) |
Oops, something went wrong.