How to use the lineflow.download.cache_or_load_file function in lineflow

To help you get started, we’ve selected a few lineflow examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tofunlp / lineflow / tests / test_download.py View on Github external
def test_cache_exists(self):
        creator = mock.Mock()
        loader = mock.Mock()

        file_desc, file_name = tempfile.mkstemp()

        try:
            download.cache_or_load_file(file_name, creator, loader)
        finally:
            os.close(file_desc)
            os.remove(file_name)

        self.assertFalse(creator.called)
        loader.assert_called_once_with(file_name)
github tofunlp / lineflow / lineflow / datasets / commonsenseqa.py View on Github external
"answer_key": answer_key,
                    "options": options,
                    "stem": stem
                })
            dataset[split] = temp

        with io.open(path, "wb") as f:
            pickle.dump(dataset, f)
        return dataset

    def loader(path):
        with io.open(path, "rb") as f:
            return pickle.load(f)

    pkl_path = os.path.join(root, "commonsenseqa.pkl")
    return download.cache_or_load_file(pkl_path, creator, loader)
github tofunlp / lineflow / lineflow / datasets / cnn_dailymail.py View on Github external
tgt_path = f'{split if split != "dev" else "val"}.txt.tgt.tagged'
            dataset[split] = (
                easyfile.TextFile(os.path.join(target_path, src_path)),
                easyfile.TextFile(os.path.join(target_path, tgt_path))
            )

        with io.open(path, 'wb') as f:
            pickle.dump(dataset, f)
        return dataset

    def loader(path):
        with io.open(path, 'rb') as f:
            return pickle.load(f)

    pkl_path = os.path.join(root, 'cnndm.pkl')
    return download.cache_or_load_file(pkl_path, creator, loader)
github tofunlp / lineflow / lineflow / datasets / msr_paraphrase.py View on Github external
data_path = download.cached_download(url.format(split))
            with io.open(data_path, 'r', encoding='utf-8') as f:
                f.readline()  # skip header
                reader = csv.DictReader(f, delimiter='\t', fieldnames=fieldnames)
                dataset[split] = [dict(row) for row in reader]

        with io.open(path, 'wb') as f:
            pickle.dump(dataset, f)
        return dataset

    def loader(path):
        with io.open(path, 'rb') as f:
            return pickle.load(f)

    pkl_path = os.path.join(root, 'msr_paraphrase.pkl')
    return download.cache_or_load_file(pkl_path, creator, loader)
github tofunlp / lineflow / lineflow / datasets / imdb.py View on Github external
neg_path = os.path.join(extracted_path, split, 'neg')
            dataset[split] = [x.path for x in os.scandir(pos_path)
                              if x.is_file() and x.name.endswith('.txt')] + \
                             [x.path for x in os.scandir(neg_path)
                              if x.is_file() and x.name.endswith('.txt')]

        with io.open(path, 'wb') as f:
            pickle.dump(dataset, f)
        return dataset

    def loader(path):
        with io.open(path, 'rb') as f:
            return pickle.load(f)

    pkl_path = os.path.join(root, 'aclImdb', 'imdb.pkl')
    return download.cache_or_load_file(pkl_path, creator, loader)
github tofunlp / lineflow / lineflow / datasets / penn_treebank.py View on Github external
dataset = {}
        for split in ('train', 'dev', 'test'):
            data_path = download.cached_download(url.format(split if split != 'dev' else 'valid'))
            with io.open(data_path, 'rt') as f:
                dataset[split] = [line.rstrip(os.linesep) for line in f]

        with io.open(path, 'wb') as f:
            pickle.dump(dataset, f)
        return dataset

    def loader(path):
        with io.open(path, 'rb') as f:
            return pickle.load(f)

    pkl_path = os.path.join(root, 'ptb.pkl')
    return download.cache_or_load_file(pkl_path, creator, loader)
github tofunlp / lineflow / lineflow / datasets / text_classification.py View on Github external
pickle.dump(dataset, f)
        return dataset

    def loader(path):
        with io.open(path, 'rb') as f:
            return pickle.load(f)

    assert key in urls

    if key in ('ag_news', 'dpbedia'):
        creator = list_creator
    else:
        creator = easyfile_creator

    pkl_path = os.path.join(root, f'{key}.pkl')
    return download.cache_or_load_file(pkl_path, creator, loader)