How to use the lineflow.download function in lineflow

To help you get started, we’ve selected a few lineflow examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tofunlp / lineflow / tests / datasets / test_small_parallel_enja.py View on Github external
def setUp(self):
        self.default_cache_root = download.get_cache_root()
        self.temp_dir = tempfile.mkdtemp()
        download.set_cache_root(self.temp_dir)
github tofunlp / lineflow / lineflow / datasets / wmt14.py View on Github external
tgt_path = f'{filename}.de'
            dataset[split] = (
                easyfile.TextFile(os.path.join(target_path, src_path)),
                easyfile.TextFile(os.path.join(target_path, tgt_path))
            )

        with io.open(path, 'wb') as f:
            pickle.dump(dataset, f)
        return dataset

    def loader(path):
        with io.open(path, 'rb') as f:
            return pickle.load(f)

    pkl_path = os.path.join(root, 'cnndm.pkl')
    return download.cache_or_load_file(pkl_path, creator, loader)
github tofunlp / lineflow / lineflow / datasets / squad.py View on Github external
for qa in paragraph['qas']:
                        qa['title'] = title
                        qa['context'] = context
                        temp.append(qa)
            dataset[split] = temp

        with io.open(path, 'wb') as f:
            pickle.dump(dataset, f)
        return dataset

    def loader(path):
        with io.open(path, 'rb') as f:
            return pickle.load(f)

    pkl_path = os.path.join(root, f'squad.{version_str}.pkl')
    return download.cache_or_load_file(pkl_path, creator, loader)
github tofunlp / lineflow / lineflow / datasets / wikitext.py View on Github external
def easyfile_creator(path):
        archive_path = download.cached_download(url)
        with zipfile.ZipFile(archive_path, 'r') as archive:
            print(f'Extracting to {root}...')
            archive.extractall(root)

        dataset = {}
        for split in ('train', 'dev', 'test'):
            filename = 'wiki.{}.tokens'.format(split if split != 'dev' else 'valid')
            dataset[split] = easyfile.TextFile(os.path.join(root, name, filename))

        with io.open(path, 'wb') as f:
            pickle.dump(dataset, f)
        return dataset