Initial commit.

cvangysel · Aug 14, 2017 · b8358d4 · b8358d4
commit b8358d4
Show file tree

Hide file tree

Showing 14 changed files with 1,189 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+build/
+*.pyc
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "trec_eval"]
+	path = trec_eval
+	url = https://github.com/usnistgov/trec_eval.git
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2017 Christophe Van Gysel
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,94 @@
+pytrec_eval
+===========
+
+pytrec\_eval is a Python interface to TREC's evaluation tool, [trec\_eval](https://github.com/usnistgov/trec_eval). It is an attempt to stop the cultivation of custom implementations of Information Retrieval evaluation measures for the Python programming language.
+
+Requirements
+------------
+
+The module was developed using Python 3.5. You need a Python distribution that comes with development headers. In addition to the default Python modules, [numpy](http://www.numpy.org) and [scipy](https://www.scipy.org) are required.
+
+Installation
+------------
+
+Installation is simple and should be relatively painless if your Python environment is functioning correctly (see below for FAQs).
+
+	# Clone the source.
+	git clone https://github.com/cvangysel/pytrec_eval.git
+
+	cd pytrec_eval
+
+	# Pull in the trec_eval source.
+	git submodule init
+	git submodule update
+
+	# Install dependencies.
+	pip install -r requirements.txt
+
+	# Install pytrec_eval.
+	python setup.py install
+
+Examples
+--------
+
+Check out the examples that simulates the standard [trec\_eval front-end](examples/trec_eval.py) and that computes [statistical significance](examples/statistical_significance.py) between two runs.
+
+To get a grasp of how simple the module is to use, check this out:
+
+	import pytrec_eval
+	import json
+
+	qrel = {
+	    'q1': {
+	        'd1': 0,
+	        'd2': 1,
+	        'd3': 0,
+	    },
+	    'q2': {
+	        'd2': 1,
+	        'd3': 1,
+	    },
+	}
+
+	run = {
+	    'q1': {
+	        'd1': 1.0,
+	        'd2': 0.0,
+	        'd3': 1.5,
+	    },
+	    'q2': {
+	        'd1': 1.5,
+	        'd2': 0.2,
+	        'd3': 0.5,
+	    }
+	}
+
+	evaluator = pytrec_eval.RelevanceEvaluator(
+	    qrel, {'map', 'ndcg'})
+
+	print(json.dumps(evaluator.evaluate(run), indent=1))
+
+The above snippet will return a data structure that contains the requested evaluation measures for queries `q1` and `q2`:
+
+	{
+	    'q1': {
+	        'ndcg': 0.5,
+	        'map': 0.3333333333333333
+	    },
+	    'q2': {
+	        'ndcg': 0.6934264036172708,
+	        'map': 0.5833333333333333
+	    }
+	}
+
+For more like this, see the examples that uses [parametrized evaluation measures](examples/simple_cut.py).
+
+Frequently Asked Questions
+--------------------------
+
+Since the module's first release, no questions have been asked so frequently that they deserved a spot in this section.
+
+License
+-------
+
+pytrec\_eval is licensed under the [MIT license](LICENSE). Please note that [trec\_eval](https://github.com/usnistgov/trec_eval) is licensed separately. If you modify pytrec\_eval in any way, please link back to this repository.
diff --git a/examples/simple.py b/examples/simple.py
@@ -0,0 +1,34 @@
+"""A very simple example."""
+
+import pytrec_eval
+import json
+
+qrel = {
+    'q1': {
+        'd1': 0,
+        'd2': 1,
+        'd3': 0,
+    },
+    'q2': {
+        'd2': 1,
+        'd3': 1,
+    },
+}
+
+run = {
+    'q1': {
+        'd1': 1.0,
+        'd2': 0.0,
+        'd3': 1.5,
+    },
+    'q2': {
+        'd1': 1.5,
+        'd2': 0.2,
+        'd3': 0.5,
+    }
+}
+
+evaluator = pytrec_eval.RelevanceEvaluator(
+    qrel, {'map', 'ndcg'})
+
+print(json.dumps(evaluator.evaluate(run), indent=1))
diff --git a/examples/simple_cut.py b/examples/simple_cut.py
@@ -0,0 +1,34 @@
+"""A very simple example, but then with parametrized measures."""
+
+import pytrec_eval
+import json
+
+qrel = {
+    'q1': {
+        'd1': 0,
+        'd2': 1,
+        'd3': 0,
+    },
+    'q2': {
+        'd2': 1,
+        'd3': 1,
+    },
+}
+
+run = {
+    'q1': {
+        'd1': 1.0,
+        'd2': 0.0,
+        'd3': 1.5,
+    },
+    'q2': {
+        'd1': 1.5,
+        'd2': 0.2,
+        'd3': 0.5,
+    }
+}
+
+evaluator = pytrec_eval.RelevanceEvaluator(
+    qrel, {'map_cut', 'ndcg_cut'})
+
+print(json.dumps(evaluator.evaluate(run), indent=1))
diff --git a/examples/statistical_significance.py b/examples/statistical_significance.py
@@ -0,0 +1,54 @@
+"""Demonstrates how statistical significance tests can be ran using pytrec_eval."""
+
+import argparse
+import os
+import scipy.stats
+import sys
+
+import pytrec_eval
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('qrel')
+    parser.add_argument('run', nargs=2)
+
+    # A bit too strict, as it does not allow for parametrized measures,
+    # but sufficient for the example.
+    parser.add_argument('--measure',
+                        choices=pytrec_eval.supported_measures,
+                        required=True)
+
+    args = parser.parse_args()
+
+    assert os.path.exists(args.qrel)
+    assert all(map(os.path.exists, args.run))
+
+    with open(args.qrel, 'r') as f_qrel:
+        qrel = pytrec_eval.parse_qrel(f_qrel)
+
+    with open(args.run[0], 'r') as f_run:
+        first_run = pytrec_eval.parse_run(f_run)
+
+    with open(args.run[1], 'r') as f_run:
+        second_run = pytrec_eval.parse_run(f_run)
+
+    evaluator = pytrec_eval.RelevanceEvaluator(
+        qrel, {args.measure})
+
+    first_results = evaluator.evaluate(first_run)
+    second_results = evaluator.evaluate(second_run)
+
+    query_ids = list(
+        set(first_results.keys()) & set(second_results.keys()))
+
+    first_scores = [
+        first_results[query_id][args.measure] for query_id in query_ids]
+    second_scores = [
+        second_results[query_id][args.measure] for query_id in query_ids]
+
+    print(scipy.stats.ttest_rel(first_scores, second_scores))
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/examples/trec_eval.py b/examples/trec_eval.py
@@ -0,0 +1,54 @@
+"""Approximately simulates trec_eval using pytrec_eval."""
+
+import argparse
+import os
+import sys
+
+import pytrec_eval
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('qrel')
+    parser.add_argument('run')
+
+    args = parser.parse_args()
+
+    assert os.path.exists(args.qrel)
+    assert os.path.exists(args.run)
+
+    with open(args.qrel, 'r') as f_qrel:
+        qrel = pytrec_eval.parse_qrel(f_qrel)
+
+    with open(args.run, 'r') as f_run:
+        run = pytrec_eval.parse_run(f_run)
+
+    evaluator = pytrec_eval.RelevanceEvaluator(
+        qrel, pytrec_eval.supported_measures)
+
+    results = evaluator.evaluate(run)
+
+    def print_line(measure, scope, value):
+        print('{:25s}{:8s}{:.4f}'.format(measure, scope, value))
+
+    for query_id, query_measures in sorted(results.items()):
+        for measure, value in sorted(query_measures.items()):
+            print_line(measure, query_id, value)
+
+    # Scope hack: use query_measures of last item in previous loop to
+    # figure out all unique measure names.
+    #
+    # TODO(cvangysel): add member to RelevanceEvaluator
+    #                  with a list of measure names.
+    for measure in sorted(query_measures.keys()):
+        print_line(
+            measure,
+            'all',
+            pytrec_eval.compute_aggregated_measure(
+                measure,
+                [query_measures[measure]
+                 for query_measures in results.values()]))
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/py/__init__.py b/py/__init__.py
@@ -0,0 +1,49 @@
+"""Module pytrec_eval."""
+
+import collections
+import numpy as np
+
+from pytrec_eval_ext import RelevanceEvaluator, supported_measures
+
+__all__ = [
+    'parse_run',
+    'parse_qrel',
+    'supported_measures',
+    'RelevanceEvaluator',
+]
+
+
+def parse_run(f_run):
+    run = collections.defaultdict(dict)
+
+    for line in f_run:
+        query_id, _, object_id, ranking, score, _ = line.strip().split()
+
+        assert object_id not in run[query_id]
+        run[query_id][object_id] = float(score)
+
+    return run
+
+
+def parse_qrel(f_qrel):
+    qrel = collections.defaultdict(dict)
+
+    for line in f_qrel:
+        query_id, _, object_id, relevance = line.strip().split()
+
+        assert object_id not in qrel[query_id]
+        qrel[query_id][object_id] = int(relevance)
+
+    return qrel
+
+
+def compute_aggregated_measure(measure, values):
+    if measure.startswith('num_'):
+        agg_fun = np.sum
+    elif measure.startswith('gm_'):
+        def agg_fun(values):
+            return np.exp(np.sum(values) / len(values))
+    else:
+        agg_fun = np.mean
+
+    return agg_fun(values)
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,2 @@
+numpy==1.11.1
+scipy==0.18.1