Source code for nordlys.core.utils.file_utils

"""
File Utils
==========

Utility methods for file handling.

:Authors: Krisztian Balog, Faegheh Hasibi
"""

import bz2
import gzip
import json
import sys
import os.path as op


[docs]class FileUtils(object):
[docs] @staticmethod def open_file_by_type(file_name, mode="r"): """Opens file (gz/text) and returns the handler. :param file_name: NTriples file :return: handler to the file """ file_name = op.expanduser(file_name) # expands '~' to the absolute home dir if file_name.endswith("bz2"): return bz2.open(file_name, mode) elif file_name.endswith("gz"): return gzip.open(file_name, mode, encoding="utf-8") else: return open(file_name, mode, encoding="utf-8")
[docs] @staticmethod def read_file_as_list(filename): """Reads in non-empty lines from a textfile (which may be gzipped/bz2ed) and returns it as a list. :param filename: """ with FileUtils.open_file_by_type(filename) as f: return [l for l in (line.strip() for line in f) if l]
[docs] @staticmethod def load_config(config): """Loads config file/dictionary. :param config: json file or a dictionary :return: config dictionary """ # config is a dictionary if type(config) == dict: return config # opens config file try: return json.load(open(op.expanduser(config))) except Exception as e: print("Error loading config file: ", e) sys.exit(1)
[docs] @staticmethod def dump_tsv(file_name, data, header=None, append=False): """Dumps the data in tsv format. :param file_name: name of file :param data: list of list :param header: list of headers :param append: if True, appends the data to the existing file """ mode = "a" if append else "w" with open(file_name, mode) as f: print("tsv file created:", file_name) if header: f.write("\t".join(header) + "\n") for line in data: f.write("\t".join([str(d) for d in line]) + "\n")
[docs]def main(): header = ["c1", "c2"] data = [[1, 2], [3, 4]] FileUtils.dump_tsv("output/test.tsv", data, header) FileUtils.dump_tsv("output/test.tsv", [[5, 6], [7, 8]], append=True)
if __name__ == "__main__": main()