Source code for nordlys.logic.fusion.fusion_scorer

"""
Fusion Scorer
=============

Abstract class for fusion-based scoring.

:Authors: Shuo Zhang, Krisztian Balog, Dario Garigliotti
"""

from nordlys.core.retrieval.retrieval import Retrieval

[docs]class FusionScorer(object): ASSOC_MODE_BINARY = 1 ASSOC_MODE_UNIFORM = 2 """Abstract class for any fusion-based method.""" def __init__(self, index_name, association_file=None, run_id="fusion"): """ :param index_name: name of index :param association_file: association file """ self._index_name = index_name self.association_file = association_file self.assoc_obj = dict() self.assoc_doc = dict() self.run_id = run_id
[docs] def load_associations(self): """Loads the document-object associations.""" # file format: documentId objectId per line if self.association_file is not None: # you can keep the def here pass pass
[docs] def score_query(self, query, assoc_fun=None): pass
[docs] def score_queries(self, queries, output_file): """Scores all queries and optionally dumps results into an output file.""" out = open(output_file, "w") for query_id in sorted(queries): query = queries[query_id] pqo = self.score_query(query) pqo.write_trec_format(query_id, self.run_id, out) out.close()
[docs] def load_queries(self, query_file): """Loads the query file :return: query dictionary {queryID:query([term1,term2,...])} """ f = open(query_file, "r") queries = {} for line in f: tmp = line.split() query_id = tmp[0] query = tmp[1:] queries[query_id] = query f.close() return queries
# def parse(self, text): # stopwords = [ # "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", # "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", # "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"] # terms = [] # # Replace specific characters with space # chars = ["'", ".", ":", ",", "/", "(", ")", "-", "+"] # for ch in chars: # if ch in text: # text = text.replace(ch, " ") # # Tokenization # for term in text.split(): # default behavior of the split is to split on one or more whitespaces # # Lowercasing # term = term.lower() # # Stopword removal # if term in stopwords: # continue # terms.append(term) # return terms