How to use the lineflow.core.ConcatDataset function in lineflow

To help you get started, we’ve selected a few lineflow examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tofunlp / lineflow / tests / test_core.py View on Github external
def setUp(self):
        self.n = 5
        self.base = range(100)
        self.data = ConcatDataset(*[self.base for _ in range(5)])
github tofunlp / lineflow / tests / test_text.py View on Github external
def test_concats_multiple_files(self):
        fp = self.fp
        lines = self.lines

        data = TextDataset([fp.name, fp.name], mode='concat')
        for x, y in zip(data, lines + lines):
            self.assertEqual(x, y)
        for j, y in enumerate(lines + lines):
            self.assertEqual(data[j], y)
        self.assertEqual(len(data), len(lines) * 2)
        self.assertEqual(data._length, len(lines) * 2)

        self.assertEqual(data[len(data) - 1], lines[-1])
        self.assertIsInstance(data._dataset, lineflow.core.ConcatDataset)
        self.assertIsInstance(data.map(lambda x: x)._dataset, TextDataset)
github tofunlp / lineflow / tests / test_core.py View on Github external
def test_dunder_add(self):
        data = self.data + self.data + self.data
        expected = list(self.base) * 3
        self.assertSequenceEqual(data, expected)
        self.assertIsInstance(data, ConcatDataset)
github tofunlp / lineflow / lineflow / text.py View on Github external
def __init__(self,
                 paths: Union[str, List[str]],
                 encoding: str = 'utf-8',
                 mode: str = 'zip') -> None:
        if isinstance(paths, str):
            dataset = easyfile.TextFile(paths, encoding)
        elif isinstance(paths, list):
            if mode == 'zip':
                dataset = ZipDataset(*[easyfile.TextFile(p, encoding) for p in paths])
            elif mode == 'concat':
                dataset = ConcatDataset(*[easyfile.TextFile(p, encoding) for p in paths])
            else:
                raise ValueError(f"only 'zip' and 'concat' are valid for 'mode', but '{mode}' is given.")

        super().__init__(dataset)
github tofunlp / lineflow / lineflow / core.py View on Github external
def lineflow_concat(*datasets: List[DatasetMixin]) -> ConcatDataset:
    return ConcatDataset(*datasets)
github tofunlp / lineflow / lineflow / core.py View on Github external
def __add__(self, other: 'Dataset') -> 'ConcatDataset':
        return ConcatDataset(self, other)