-
Notifications
You must be signed in to change notification settings - Fork 100
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introducing ROUGE: A full Python Implementation of the ROUGE Metric
We provide a useful and fast module for ROUGE scoring as well as a command to use it directly from the shell.
- Loading branch information
Showing
15 changed files
with
842 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
# swap files | ||
*.swp | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
# Rouge | ||
*A full Python librarie for the ROUGE metric [(paper)](http://www.aclweb.org/anthology/W04-1013).* | ||
|
||
## Quickstart | ||
#### Clone & Install | ||
```shell | ||
git clone https://github.com/pltrdy/pyrouge | ||
cd pyrouge | ||
sudo python3 setup.py install | ||
``` | ||
or from pip: | ||
``` | ||
sudo pip3 install rouge | ||
``` | ||
#### Use it from the shell (JSON Output) | ||
``` | ||
$rouge -h | ||
usage: rouge [-h] [-f] [-a] hypothesis reference | ||
Rouge Metric Calculator | ||
positional arguments: | ||
hypothesis Text of file path | ||
reference Text or file path | ||
optional arguments: | ||
-h, --help show this help message and exit | ||
-f, --file File mode | ||
-a, --avg Average mode | ||
``` | ||
|
||
e.g. | ||
|
||
|
||
```shell | ||
# Single Sentence | ||
rouge "transcript is a written version of each day 's cnn student" \ | ||
""this page includes the show transcript use the transcript to help students with" | ||
# Scoring using two files (line by line) | ||
rouge -f ./tests/hyp.txt ./ref.txt | ||
``` | ||
#### As a library | ||
###### Score 1 sentence | ||
```python | ||
from rouge import Rouge | ||
hypothesis = "the #### transcript is a written version of each day 's cnn student news program use this transcript to he lp students with reading comprehension and vocabulary use the weekly newsquiz to test your knowledge of storie s you saw on cnn student news" | ||
|
||
reference = "this page includes the show transcript use the transcript to help students with reading comprehension and vocabulary at the bottom of the page , comment for a chance to be mentioned on cnn student news . you must be a teac her or a student age # # or older to request a mention on the cnn student news roll call . the weekly newsquiz tests students ' knowledge of even ts in the news" | ||
|
||
rouge = Rouge() | ||
scores = rouge.get_scores() | ||
``` | ||
|
||
*Output:* | ||
|
||
```json | ||
{ | ||
"rouge-1": { | ||
"f": 0.5238095189484127, | ||
"p": 0.6285714285714286, | ||
"r": 0.4489795918367347 | ||
}, | ||
"rouge-2": { | ||
"f": 0.27027026566025497, | ||
"p": 0.375, | ||
"r": 0.2112676056338028 | ||
}, | ||
"rouge-l": { | ||
"f": 0.28711800978275975, | ||
"p": 0.4418604651162791, | ||
"r": 0.25675675675675674 | ||
} | ||
} | ||
``` | ||
|
||
###### Score multiple sentences | ||
```python | ||
import json | ||
from rouge import Rouge | ||
|
||
# Load some sentences | ||
with open('./tests/data.json') as f: | ||
data = json.load(f) | ||
|
||
hyps, refs = map(list, zip(*[[d['hyp'], d['ref']] for d in data])) | ||
rouge = Rouge() | ||
scores = rouge.get_scores(hyps, refs) | ||
# or | ||
scores = rouge.get_scores(hyps, refs, avg=True) | ||
``` | ||
|
||
*Output (`avg=False`)*: a list of `n` dicts: | ||
|
||
``` | ||
{"rouge-1": {"f": _, "p": _, "r": _}, "rouge-2" : { .. }, "rouge-3": { ... }} | ||
``` | ||
|
||
|
||
*Output (`avg=True`)*: a single dict with average values: | ||
|
||
``` | ||
{"rouge-1": {"f": _, "p": _, "r": _}, "rouge-2" : { .. }, "rouge-3": { ... }} | ||
``` | ||
|
||
###### Score two files (line by line) | ||
Given two files `hyp_path`, `ref_path`, with the same number (`n`) of lines, calculate score for each of this lines, or, the average over the whole file. | ||
|
||
```python | ||
from rouge import FilesRouge | ||
|
||
files_rouge = FilesRouge(hyp_path, ref_path) | ||
scores = files_rouge.get_scores() | ||
# or | ||
scores = files_rouge.get_scores(avg=True) | ||
``` | ||
|
||
**Note** that you can avoid consuming too much memory by using `batch_line=l`. This way, the script will read only `l` lines at a time. (otherwise it loads the whole files). |
Binary file not shown.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/usr/bin/env python3 | ||
import argparse | ||
import json | ||
import os | ||
from rouge import Rouge, FilesRouge | ||
|
||
def main(): | ||
import argparse | ||
parser = argparse.ArgumentParser(description='Rouge Metric Calculator') | ||
parser.add_argument('-f', '--file', help="File mode", action='store_true') | ||
parser.add_argument('-a', '--avg', help="Average mode", action='store_true') | ||
parser.add_argument('hypothesis', type=str, help='Text of file path') | ||
parser.add_argument('reference', type=str, help='Text or file path') | ||
|
||
args = parser.parse_args() | ||
if args.file: | ||
hyp, ref = args.hypothesis, args.reference | ||
assert(os.path.isfile(hyp)) | ||
assert(os.path.isfile(ref)) | ||
|
||
files_rouge = FilesRouge(hyp, ref) | ||
scores = files_rouge.get_scores(avg=args.avg) | ||
|
||
print(json.dumps(scores, indent=2)) | ||
else: | ||
hyp, ref = args.hypothesis, args.reference | ||
assert(type(hyp) == str) | ||
assert(type(ref) == str) | ||
|
||
rouge = Rouge() | ||
scores = rouge.get_scores(hyp, ref, avg=args.avg) | ||
|
||
print(json.dumps(scores, indent=2)) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from rouge.rouge import FilesRouge, Rouge | ||
|
||
__version__ = "0.2" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
# -*- coding: utf-8 -*- | ||
import rouge.rouge_score as rouge_score | ||
import os | ||
import numpy as np | ||
|
||
class FilesRouge: | ||
def __init__(self, hyp_path, ref_path, metrics=None, stats=None, batch_lines=None): | ||
assert(os.path.isfile(hyp_path)) | ||
assert(os.path.isfile(ref_path)) | ||
|
||
self.rouge = Rouge(metrics=metrics, stats=stats) | ||
|
||
def line_count(path): | ||
count = 0 | ||
for line in open(path): | ||
count += 1 | ||
return count | ||
|
||
hyp_lc = line_count(hyp_path) | ||
ref_lc = line_count(ref_path) | ||
assert(hyp_lc == ref_lc) | ||
|
||
assert(batch_lines is None or type(batch_lines) == int) | ||
|
||
self.hyp_path = hyp_path | ||
self.ref_path = ref_path | ||
self.batch_lines = batch_lines | ||
|
||
def get_scores(self, avg=False): | ||
"""Calculate ROUGE scores between each pair of | ||
lines (hyp_file[i], ref_file[i]). | ||
Args: | ||
* hyp_path: hypothesis file path | ||
* ref_path: references file path | ||
* avg (False): whether to get an average scores or a list | ||
* batch_line(None): set it to an integer value to work with | ||
subsets of `batch_line` lines (uses less memory) | ||
""" | ||
batch_lines = self.batch_lines | ||
hyp_path, ref_path = self.hyp_path, self.ref_path | ||
|
||
if batch_lines is None: | ||
hyps = [line[:-1] for line in open(hyp_path).readlines()] | ||
refs = [line[:-1] for line in open(ref_path).readlines()] | ||
|
||
|
||
return self.rouge.get_scores(hyps, refs, avg=avg) | ||
|
||
else: | ||
if batch_lines > hyp_lc: | ||
batch_lines = hyp_lc | ||
|
||
if avg: | ||
sc = [0, 0, 0] | ||
update_scores = lambda s, h, r: [sum(x) for x in zip(s, self.rouge.get_scores(h, r, avg=True))] | ||
else: | ||
sc = [] | ||
update_scores = lambda s, h, r: s + self.rouge.get_scores(batch_hyp, batch_ref) | ||
|
||
hyp_file = open(hyp_path) | ||
ref_file = open(ref_path) | ||
|
||
batch_hyp = [] | ||
batch_ref = [] | ||
|
||
for count in range(hyp_lc): | ||
batch_hyp.append(hyp_file.readline()[:-1]) | ||
batch_ref.append(ref_file.readline()[:-1]) | ||
|
||
count += 1 | ||
if count == batch_lines: | ||
sc = update_scores(sc, batch_hyp, batch_ref) | ||
count = 0 | ||
batch_hyp = [] | ||
batch_ref = [] | ||
|
||
if avg: | ||
return [s/hyp_lc for s in sc] | ||
return sc | ||
|
||
|
||
class Rouge: | ||
DEFAULT_METRICS = ["rouge-1", "rouge-2", "rouge-l"] | ||
AVAILABLE_METRICS = {"rouge-1": lambda hyp, ref: rouge_score.rouge_n([hyp], [ref], 1), | ||
"rouge-2": lambda hyp, ref: rouge_score.rouge_n([hyp], [ref], 2), | ||
"rouge-l": lambda hyp, ref: rouge_score.rouge_l_sentence_level([hyp], [ref]), | ||
} | ||
|
||
DEFAULT_STATS = ["f", "p", "r"] | ||
AVAILABLE_STATS = {"f": 0, "p": 1, "r": 2 | ||
} | ||
def __init__(self, metrics=None, stats=None): | ||
self.metrics = metrics if metrics is not None else Rouge.DEFAULT_METRICS | ||
self.stats = stats if stats is not None else Rouge.DEFAULT_STATS | ||
|
||
for m in self.metrics: | ||
if m not in Rouge.AVAILABLE_METRICS: | ||
raise ValueError("Unknown metric '%s'" % m) | ||
|
||
for s in self.stats: | ||
if s not in Rouge.AVAILABLE_STATS: | ||
raise ValueError("Unknown stat '%s'" % s) | ||
|
||
def get_scores(self, hyps, refs, avg=False): | ||
if type(hyps) == str: | ||
hyps, refs = [hyps], [refs] | ||
|
||
assert(type(hyps) == type(refs)) | ||
assert(len(hyps) == len(refs)) | ||
|
||
if not avg: | ||
return self._get_scores(hyps, refs) | ||
return self._get_avg_scores(hyps, refs) | ||
|
||
def _get_scores(self, hyps, refs): | ||
scores = [] | ||
for hyp, ref in zip(hyps, refs): | ||
sen_score = {} | ||
for m in self.metrics: | ||
fn = Rouge.AVAILABLE_METRICS[m] | ||
sc = fn(hyp, ref) | ||
sen_score[m] = {s: sc[Rouge.AVAILABLE_STATS[s]] for s in self.stats} | ||
scores.append(sen_score) | ||
return scores | ||
|
||
def _get_avg_scores(self, hyps, refs): | ||
scores = {} | ||
for m in self.metrics: | ||
fn = Rouge.AVAILABLE_METRICS[m] | ||
sc = [fn(hyp, ref) for hyp, ref in zip(hyps, refs)] | ||
sc = [[sen_sc[Rouge.AVAILABLE_STATS[s]] for s in self.stats] for sen_sc in sc] | ||
scores[m] = {s: st for s, st in zip(self.stats, tuple(map(np.mean, zip(*sc))))} | ||
return scores | ||
|
||
|
Oops, something went wrong.