Source code for nordlys.logic.query.query

"""
Query
=====

Class for representing a query.

TODO: add preprocessing using

:Author: Faegheh Hasibi
"""

import re


[docs]class Query(object): def __init__(self, query, qid=""): self.__qid = qid self.__raw_query = query self.__query = None # holds preprocessed query self.__preprocess() # always preprocess the query @property def query(self): return self.__query @property def raw_query(self): return self.__raw_query @property def qid(self): return self.__qid def __preprocess(self): """Pre-process the query; removes some special chars.""" # TODO make preprocessing (including stopwords removal) using Retrieval package input_str = re.sub("[^A-Za-z0-9+-]+", " ", self.raw_query) input_str = input_str.replace(" OR ", " ").replace(" AND ", " ") # removing multiple spaces self.__query = ' '.join(input_str.split()).lower()
[docs] def get_terms(self): """Gets query terms. :return: list of query terms """ return self.query.split()
[docs] def get_ngrams(self): """Finds all n-grams of the query. :return: list of n-grams """ terms = self.get_terms() ngrams = [] for i in range(1, len(terms) + 1): # number of words for start in range(0, len(terms) - i + 1): # start point ngram = terms[start] for j in range(1, i): # builds the sub-string ngram += " " + terms[start + j] ngrams.append(ngram) return ngrams