Source code for nordlys.core.eval.trec_eval

"""
Trec Evaluation
===============

Wrapper for trec_eval.

:Authors: Dario Garigliotti, Shuo Zhang
"""

from subprocess import Popen, PIPE
from shlex import split

from nordlys.config import TREC_EVAL


[docs]class TrecEval(object): """Holds evaluation results obtained using trec_eval.""" __TREC_EVAL_FLAGS = "-c -m all_trec -q" def __init__(self): self.__results = None # results[query_id][metric] = score def __eval_proc(self, qrels_file, run_file, eval_file=None): """Executes the evaluation process call and optionally saves the output to a file. :param qrels_file: name of qrels file :param run_file: name of run file :param eval_file: name of evaluation output file """ cmd_flags = " ".join([TREC_EVAL, self.__TREC_EVAL_FLAGS, qrels_file, run_file]) if eval_file is not None: # TODO save output to file pass p = Popen(split(cmd_flags), stdout=PIPE) output, err = p.communicate() output = output.decode('utf8') return output, err
[docs] def load_results(self, eval_file): """Loads results from an existing evaluation file. :param eval_file: name of evaluation file """ self.__results = {} # TODO pass
[docs] def evaluate(self, qrels_file, run_file, eval_file=None): """Evaluates a runfile using trec_eval. Optionally writes evaluation output to file. :param qrels_file: name of qrels file :param run_file: name of run file :param eval_file: name of evaluation output file """ self.__results = {} output, _ = self.__eval_proc(qrels_file, run_file, eval_file=eval_file) for line in output.splitlines(): metric, query_id, score = line.split() metric = metric.lower() if query_id == "all": # ignore "all" lines continue try: score = float(score) except ValueError: # e.g. some bad-formed lines with a "score" like '----------' continue query_data = self.__results.get(query_id, {}) query_data[metric] = score self.__results[query_id] = query_data
[docs] def get_query_ids(self): """Returns the set of queryIDs for which we have results.""" return self.__results.keys()
[docs] def get_score(self, query_id, metric): """Returns the score for a given queryID and metric. :param query_id: queryID :param metric: metric :return: score (or None if not found) """ return self.__results.get(query_id, {}).get(metric.lower(), None)