Skip to content

Commit

Permalink
Introducing ROUGE: A full Python Implementation of the ROUGE Metric
Browse files Browse the repository at this point in the history
We provide a useful and fast module for ROUGE scoring as well as a
command to use it directly from the shell.
  • Loading branch information
pltrdy committed Mar 16, 2017
1 parent 55f2d66 commit 93d0f18
Show file tree
Hide file tree
Showing 15 changed files with 842 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# swap files
*.swp

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
123 changes: 123 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Rouge
*A full Python librarie for the ROUGE metric [(paper)](http://www.aclweb.org/anthology/W04-1013).*

## Quickstart
#### Clone & Install
```shell
git clone https://github.com/pltrdy/pyrouge
cd pyrouge
sudo python3 setup.py install
```
or from pip:
```
sudo pip3 install rouge
```
#### Use it from the shell (JSON Output)
```
$rouge -h
usage: rouge [-h] [-f] [-a] hypothesis reference
Rouge Metric Calculator
positional arguments:
hypothesis Text of file path
reference Text or file path
optional arguments:
-h, --help show this help message and exit
-f, --file File mode
-a, --avg Average mode
```

e.g.


```shell
# Single Sentence
rouge "transcript is a written version of each day 's cnn student" \
""this page includes the show transcript use the transcript to help students with"
# Scoring using two files (line by line)
rouge -f ./tests/hyp.txt ./ref.txt
```
#### As a library
###### Score 1 sentence
```python
from rouge import Rouge
hypothesis = "the #### transcript is a written version of each day 's cnn student news program use this transcript to he lp students with reading comprehension and vocabulary use the weekly newsquiz to test your knowledge of storie s you saw on cnn student news"

reference = "this page includes the show transcript use the transcript to help students with reading comprehension and vocabulary at the bottom of the page , comment for a chance to be mentioned on cnn student news . you must be a teac her or a student age # # or older to request a mention on the cnn student news roll call . the weekly newsquiz tests students ' knowledge of even ts in the news"

rouge = Rouge()
scores = rouge.get_scores()
```

*Output:*

```json
{
"rouge-1": {
"f": 0.5238095189484127,
"p": 0.6285714285714286,
"r": 0.4489795918367347
},
"rouge-2": {
"f": 0.27027026566025497,
"p": 0.375,
"r": 0.2112676056338028
},
"rouge-l": {
"f": 0.28711800978275975,
"p": 0.4418604651162791,
"r": 0.25675675675675674
}
}
```

###### Score multiple sentences
```python
import json
from rouge import Rouge

# Load some sentences
with open('./tests/data.json') as f:
data = json.load(f)

hyps, refs = map(list, zip(*[[d['hyp'], d['ref']] for d in data]))
rouge = Rouge()
scores = rouge.get_scores(hyps, refs)
# or
scores = rouge.get_scores(hyps, refs, avg=True)
```

*Output (`avg=False`)*: a list of `n` dicts:

```
{"rouge-1": {"f": _, "p": _, "r": _}, "rouge-2" : { .. }, "rouge-3": { ... }}
```


*Output (`avg=True`)*: a single dict with average values:

```
{"rouge-1": {"f": _, "p": _, "r": _}, "rouge-2" : { ..     }, "rouge-3": { ... }}
```

###### Score two files (line by line)
Given two files `hyp_path`, `ref_path`, with the same number (`n`) of lines, calculate score for each of this lines, or, the average over the whole file.

```python
from rouge import FilesRouge

files_rouge = FilesRouge(hyp_path, ref_path)
scores = files_rouge.get_scores()
# or
scores = files_rouge.get_scores(avg=True)
```

**Note** that you can avoid consuming too much memory by using `batch_line=l`. This way, the script will read only `l` lines at a time. (otherwise it loads the whole files).
Binary file added bin/.rouge_cmd.py.swp
Binary file not shown.
Empty file added bin/__init__.py
Empty file.
36 changes: 36 additions & 0 deletions bin/rouge_cmd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env python3
import argparse
import json
import os
from rouge import Rouge, FilesRouge

def main():
import argparse
parser = argparse.ArgumentParser(description='Rouge Metric Calculator')
parser.add_argument('-f', '--file', help="File mode", action='store_true')
parser.add_argument('-a', '--avg', help="Average mode", action='store_true')
parser.add_argument('hypothesis', type=str, help='Text of file path')
parser.add_argument('reference', type=str, help='Text or file path')

args = parser.parse_args()
if args.file:
hyp, ref = args.hypothesis, args.reference
assert(os.path.isfile(hyp))
assert(os.path.isfile(ref))

files_rouge = FilesRouge(hyp, ref)
scores = files_rouge.get_scores(avg=args.avg)

print(json.dumps(scores, indent=2))
else:
hyp, ref = args.hypothesis, args.reference
assert(type(hyp) == str)
assert(type(ref) == str)

rouge = Rouge()
scores = rouge.get_scores(hyp, ref, avg=args.avg)

print(json.dumps(scores, indent=2))

if __name__ == "__main__":
main()
3 changes: 3 additions & 0 deletions rouge/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from rouge.rouge import FilesRouge, Rouge

__version__ = "0.2"
135 changes: 135 additions & 0 deletions rouge/rouge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# -*- coding: utf-8 -*-
import rouge.rouge_score as rouge_score
import os
import numpy as np

class FilesRouge:
def __init__(self, hyp_path, ref_path, metrics=None, stats=None, batch_lines=None):
assert(os.path.isfile(hyp_path))
assert(os.path.isfile(ref_path))

self.rouge = Rouge(metrics=metrics, stats=stats)

def line_count(path):
count = 0
for line in open(path):
count += 1
return count

hyp_lc = line_count(hyp_path)
ref_lc = line_count(ref_path)
assert(hyp_lc == ref_lc)

assert(batch_lines is None or type(batch_lines) == int)

self.hyp_path = hyp_path
self.ref_path = ref_path
self.batch_lines = batch_lines

def get_scores(self, avg=False):
"""Calculate ROUGE scores between each pair of
lines (hyp_file[i], ref_file[i]).
Args:
* hyp_path: hypothesis file path
* ref_path: references file path
* avg (False): whether to get an average scores or a list
* batch_line(None): set it to an integer value to work with
subsets of `batch_line` lines (uses less memory)
"""
batch_lines = self.batch_lines
hyp_path, ref_path = self.hyp_path, self.ref_path

if batch_lines is None:
hyps = [line[:-1] for line in open(hyp_path).readlines()]
refs = [line[:-1] for line in open(ref_path).readlines()]


return self.rouge.get_scores(hyps, refs, avg=avg)

else:
if batch_lines > hyp_lc:
batch_lines = hyp_lc

if avg:
sc = [0, 0, 0]
update_scores = lambda s, h, r: [sum(x) for x in zip(s, self.rouge.get_scores(h, r, avg=True))]
else:
sc = []
update_scores = lambda s, h, r: s + self.rouge.get_scores(batch_hyp, batch_ref)

hyp_file = open(hyp_path)
ref_file = open(ref_path)

batch_hyp = []
batch_ref = []

for count in range(hyp_lc):
batch_hyp.append(hyp_file.readline()[:-1])
batch_ref.append(ref_file.readline()[:-1])

count += 1
if count == batch_lines:
sc = update_scores(sc, batch_hyp, batch_ref)
count = 0
batch_hyp = []
batch_ref = []

if avg:
return [s/hyp_lc for s in sc]
return sc


class Rouge:
DEFAULT_METRICS = ["rouge-1", "rouge-2", "rouge-l"]
AVAILABLE_METRICS = {"rouge-1": lambda hyp, ref: rouge_score.rouge_n([hyp], [ref], 1),
"rouge-2": lambda hyp, ref: rouge_score.rouge_n([hyp], [ref], 2),
"rouge-l": lambda hyp, ref: rouge_score.rouge_l_sentence_level([hyp], [ref]),
}

DEFAULT_STATS = ["f", "p", "r"]
AVAILABLE_STATS = {"f": 0, "p": 1, "r": 2
}
def __init__(self, metrics=None, stats=None):
self.metrics = metrics if metrics is not None else Rouge.DEFAULT_METRICS
self.stats = stats if stats is not None else Rouge.DEFAULT_STATS

for m in self.metrics:
if m not in Rouge.AVAILABLE_METRICS:
raise ValueError("Unknown metric '%s'" % m)

for s in self.stats:
if s not in Rouge.AVAILABLE_STATS:
raise ValueError("Unknown stat '%s'" % s)

def get_scores(self, hyps, refs, avg=False):
if type(hyps) == str:
hyps, refs = [hyps], [refs]

assert(type(hyps) == type(refs))
assert(len(hyps) == len(refs))

if not avg:
return self._get_scores(hyps, refs)
return self._get_avg_scores(hyps, refs)

def _get_scores(self, hyps, refs):
scores = []
for hyp, ref in zip(hyps, refs):
sen_score = {}
for m in self.metrics:
fn = Rouge.AVAILABLE_METRICS[m]
sc = fn(hyp, ref)
sen_score[m] = {s: sc[Rouge.AVAILABLE_STATS[s]] for s in self.stats}
scores.append(sen_score)
return scores

def _get_avg_scores(self, hyps, refs):
scores = {}
for m in self.metrics:
fn = Rouge.AVAILABLE_METRICS[m]
sc = [fn(hyp, ref) for hyp, ref in zip(hyps, refs)]
sc = [[sen_sc[Rouge.AVAILABLE_STATS[s]] for s in self.stats] for sen_sc in sc]
scores[m] = {s: st for s, st in zip(self.stats, tuple(map(np.mean, zip(*sc))))}
return scores


Loading

0 comments on commit 93d0f18

Please sign in to comment.