Source code for nordlys.core.ml.instance

"""
Instance
========

Instance class.

Features:
    - This class supports different features for an instance.
    - The features, together with the id of the instance, will be used in machine learning algorithms.
    - All Features are stored in a dictionary, where keys are feature names (self.features).

Instance properties:
    - Properties are additional side information of an instance (e.g. query_id, entity_id, ...).
    - properties are stored in a dictionary (self.properties).

This is the base instance class.
Specific type of instances can inherit form class and add more properties to the base class.

:Author: Faegheh Hasibi
"""

import json
from nordlys.config import PLOGGER


[docs]class Instance(object): """ Class attributes: ins_id: (string) features: a dictionary of feature names and values target: (string) target id or class properties: a dictionary of property names and values """ def __init__(self, id, features=None, target="0", properties=None): self.__id = id self.__features = {} if features is None else features self.__properties = {} if properties is None else properties self.target = target self.score = "" @property def id(self): return self.__id @property def features(self): return self.__features @features.setter def features(self, f): self.__features = f @property def properties(self): return self.__properties @properties.setter def properties(self, p): self.__properties = p
[docs] def add_feature(self, feature, value): """Adds a new feature to the features. :param feature: (string), feature name :param value """ self.__features[feature] = value
[docs] def get_feature(self, feature): """Returns the value of a given feature. :param feature :return value """ return self.__features.get(feature, None)
[docs] def add_property(self, property, value): """Adds a new property to the properties. :param property: (string), property name :param value """ self.__properties[property] = value
[docs] def get_property(self, property): """Returns the value of a given property. :param property :return value """ return self.__properties.get(property, None)
[docs] @classmethod def from_json(cls, ins_id, fields): """Reads an instance in JSON format and generates Instance. :param ins_id: instance id :param fields: A dictionary of fields :return (ml.Instance) """ instance = cls(ins_id) for key, value in fields.items(): if key == "target": instance.target = value elif key == "score": instance.score = value elif key == "properties": instance.__properties = value elif key == "features": for ftr_name, ftr_val in value.items(): instance.__features[ftr_name] = float(ftr_val) return instance
[docs] def to_json(self, file_name=None): """Converts instance to the JSON format. :param file_name: (string) :return JSON dump of the instance. """ json_ins = {self.__id: {"target": self.target, "score": self.score, "features": self.__features, "properties": self.__properties}} if file_name is not None: PLOGGER.info("writing instance \"" + str(self.__id) + "\" to " + file_name + "...") out = open(file_name, "w") json.dump(json_ins, out, indent=4) return json_ins
[docs] def to_str(self, feature_set=None): """Converts instances to string. :param feature_set: features to be included in the output format :return (string) tab separated string: ins_id target ftr_1 ftr_2 ... ftr_n properties """ if feature_set is None: feature_set = sorted(self.__features.keys()) out = str(self.__id) + "\t" + str(self.target) + "\t" + str(self.score) + "\t" for feature in feature_set: out += feature + ":" + str(self.__features[feature]) + "\t" for field in sorted(self.__properties.keys()): out += field + ":" + str(self.__properties[field]) + "\t" return out
[docs] def to_libsvm(self, features, qid_prop=None): """ Converts instance to the Libsvm format. - RankLib format: <target> qid:<qid> <feature>:<value> ... # <info> - Example: 3 qid:1 1:1 2:1 3:0 4:0.2 5:0 # 1A NOTE: the property used for qid(qid_prop) should hold integers :param features: the list of features that should be in the output :param qid_prop: property to be used as qid :return str, instance in the rankLib format. """ # Sets qid qid = self.__id if qid_prop is None else self.get_property(qid_prop) out = self.target + " qid:" + str(qid) + " " feature_id = 1 for feature in features: out += str(feature_id) + ":" + str(self.__features[feature]) + " " feature_id += 1 out += " # " + str(self.__id) return out
[docs]def main(): ins = Instance(1, {"f1": "0.5", "f2": "0.4"}, "rel") ins.q_id = "q1" ins.q_content = "test query" ins_file = "../../src/output/instance.txt" PLOGGER.info(ins.to_json(ins_file))
if __name__ == "__main__": main()