"""
Mention
=======
Class for entity mentions (used for entity linking)
- Generates all candidate entities for a mention
- Computes commonness for a mention-entity pairs
"""
import sys
from pprint import pprint
from nordlys.logic.entity.entity import Entity
[docs]class Mention(object):
def __init__(self, mention, entity, cmns_th=None):
self.__mention = mention.lower()
self.__entity = entity
self.__cmns_th = cmns_th
[docs] def get_cand_ens(self):
"""Returns all candidate entities for the mention
:return: {en:cmn_score}
"""
facc_matches = self.__get_facc_matches(self.__entity.lookup_name_facc(self.__mention))
cand_ens = self.__filter_uncommon_ens(facc_matches) if self.__cmns_th else facc_matches
dbpedia_matches = self.__get_dbpedia_matches(self.__entity.lookup_name_dbpedia(self.__mention))
for en_id in dbpedia_matches:
if en_id not in facc_matches:
cand_ens[en_id] = 0
else:
cand_ens[en_id] = facc_matches[en_id]
return cand_ens
def __get_dbpedia_matches(self, matches):
"""Returns list of DBpedia matches."""
dbp_ens = []
for field, match in matches.items():
if field == "_id":
continue
dbp_ens += list(match.keys())
return set(dbp_ens)
def __get_facc_matches(self, matches):
"""Returns entities matching the mention according to FACC.
- Computes commonness for each entity (if needed)
- Converts Freebase IDs to DBpedia
"""
# computes the denominator for commonness
facc_matches = matches.get("facc12", {})
if self.__cmns_th:
facc_matches = self.__get_commonness_scores(facc_matches)
# converts freebased IDs to DBpedia
facc_ens = {}
for entity_id, val in facc_matches.items():
dbp_ids = self.__entity.fb_to_dbp(entity_id)
if dbp_ids is None:
continue
for dbp_id in dbp_ids:
facc_ens[dbp_id] = val
return facc_ens
def __get_commonness_scores(self, en_counts):
"""Computes commonness score for a all entities matching the mention.
:param en_counts: dictionary {entity_id: count, ...}
:return: commonness scores {entity_id: commonness, ...}
"""
commonness_scores = {}
total_occurrences = sum(en_counts.values())
for en, count in en_counts.items():
commonness_scores[en] = count / total_occurrences
return commonness_scores
def __filter_uncommon_ens(self, en_cmns):
"""Filters out entities that are below the commonness threshold.
:param en_cmns: dictionary {entity_id: count, ...}
:return: filtered dictionary
"""
filtered_ens = {}
for en, cmns in en_cmns.items():
if cmns >= self.__cmns_th:
filtered_ens[en] = cmns
return filtered_ens
[docs]def main(args):
entity = Entity()
mention = Mention(args[0], entity, cmns_th=0.1)
ens = mention.get_cand_ens()
print(ens)
if __name__ == "__main__":
main(sys.argv[1:])