How to use the clevercsv.dialect.SimpleDialect function in clevercsv

To help you get started, we’ve selected a few clevercsv examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alan-turing-institute / CleverCSV / tests / test_unit / test_wrappers.py View on Github external
def test_stream_csv(self):
        table = [["A", "B", "C"], [1, 2, 3], [4, 5, 6]]
        dialect = SimpleDialect(delimiter=";", quotechar="", escapechar="")
        with self.subTest(name="simple"):
            self._stream_test(table, dialect)

        table = [["A,0", "B", "C"], [1, 2, 3], [4, 5, 6]]
        dialect = SimpleDialect(delimiter=",", quotechar="", escapechar="\\")
        with self.subTest(name="escaped"):
            self._stream_test(table, dialect)

        table = [["A,0", "B", "C"], [1, 2, 3], [4, 5, 6]]
        dialect = SimpleDialect(delimiter=",", quotechar='"', escapechar="")
        with self.subTest(name="quoted"):
            self._stream_test(table, dialect)

        table = [['a"A,0"b', "B", "C"], [1, 2, 3], [4, 5, 6]]
        dialect = SimpleDialect(delimiter=",", quotechar='"', escapechar="")
        with self.subTest(name="double"):
github alan-turing-institute / CleverCSV / tests / test_unit / test_console.py View on Github external
def test_code_4(self):
        table = [["Å", "B,D", "C"], [1, 2, 3], [4, 5, 6]]
        dialect = SimpleDialect(delimiter=",", quotechar="", escapechar="\\")
        encoding = "ISO-8859-1"
        tmpfname = self._build_file(table, dialect, encoding=encoding)

        application = build_application()
        command = application.find("code")
        tester = CommandTester(command)
        tester.execute(tmpfname)

        exp = f"""\

# Code generated with CleverCSV version {__version__}

import clevercsv

with open("{tmpfname}", "r", newline="", encoding="{encoding}") as fp:
    reader = clevercsv.reader(fp, delimiter=",", quotechar="", escapechar="\\\\")
github alan-turing-institute / CleverCSV / tests / test_unit / test_detect_pattern.py View on Github external
def test_abstraction_10(self):
        out = detect_pattern.make_abstraction(
            'A,"B,C|"D"', SimpleDialect(delimiter=",", quotechar='"', escapechar="|")
        )
        exp = "CDC"
        self.assertEqual(exp, out)
github alan-turing-institute / CleverCSV / tests / test_unit / test_detect_pattern.py View on Github external
def test_abstraction_1(self):
        out = detect_pattern.make_abstraction(
            "A,B,C", SimpleDialect(delimiter=",", quotechar="", escapechar="")
        )
        exp = "CDCDC"
        self.assertEqual(exp, out)
github alan-turing-institute / CleverCSV / tests / test_unit / test_detect_type.py View on Github external
def test_type_score_3(self):
        # theta_3 from paper
        cells = [
            ["7,5", " Mon, Jan 12", "6,40"],
            ["100", " Fri, Mar 21", "8,23"],
            ["8,2", " Thu, Sep 17", "2,71"],
            ["538,0", "", "7,26"],
            ["N/A", " Wed, Oct 4", "6,93"],
        ]
        data = "\r".join([";".join(x) for x in cells])
        dialect = SimpleDialect(delimiter=";", quotechar='"', escapechar="")
        out = type_score(data, dialect)
        exp = 11 / 15
        self.assertAlmostEqual(exp, out)
github alan-turing-institute / CleverCSV / tests / test_unit / test_normal_forms.py View on Github external
def test_form_5(self):
        dialect = SimpleDialect(delimiter=",", quotechar='"', escapechar="")

        self.assertTrue(is_form_5('"A,B"\n"1,2"\n"3,4"', dialect))
        self.assertTrue(is_form_5('"A,B"\n"1,"\n"2,3"', dialect))

        self.assertFalse(is_form_5("A,B\n1,2\n3,4", dialect))
        self.assertFalse(is_form_5("A,B\n1,\n2,3", dialect))
        self.assertFalse(is_form_5('"A,""B"""\n"1,"\n"2,3"', dialect))
github alan-turing-institute / CleverCSV / tests / test_unit / test_detect_pattern.py View on Github external
def test_pattern_score_2(self):
        # theta_2 from paper
        data = (
            "7,5; Mon, Jan 12;6,40\n100; Fri, Mar 21;8,23\n8,2; Thu, Sep 17;"
            '2,71\n538,0;;7,26\n"NA"; Wed, Oct 4;6,93'
        )
        d = SimpleDialect(delimiter=";", quotechar="", escapechar="")
        out = detect_pattern.pattern_score(data, d)
        exp = 10 / 3
        self.assertAlmostEqual(exp, out)
github alan-turing-institute / CleverCSV / tests / test_unit / test_console.py View on Github external
def test_detect_base(self):
        table = [["A", "B", "C"], [1, 2, 3], [4, 5, 6]]
        dialect = SimpleDialect(delimiter=";", quotechar="", escapechar="")
        with self.subTest(name="simple"):
            self._detect_test_wrap(table, dialect)

        table = [["A,0", "B", "C"], [1, 2, 3], [4, 5, 6]]
        dialect = SimpleDialect(delimiter=",", quotechar="", escapechar="\\")
        with self.subTest(name="escaped"):
            self._detect_test_wrap(table, dialect)

        table = [["A,0", "B", "C"], [1, 2, 3], [4, 5, 6]]
        dialect = SimpleDialect(delimiter=",", quotechar='"', escapechar="")
        with self.subTest(name="quoted"):
            self._detect_test_wrap(table, dialect)

        table = [['a"A,0"b', "B", "C"], [1, 2, 3], [4, 5, 6]]
        dialect = SimpleDialect(delimiter=",", quotechar='"', escapechar="")
        with self.subTest(name="double"):
github alan-turing-institute / CleverCSV / clevercsv / potential_dialects.py View on Github external
# escapechars are those that precede a delimiter or quotechar
    for u, v in pairwise(data):
        if not is_potential_escapechar(u, encoding):
            continue
        for delim, quotechar in itertools.product(delims, quotechars):
            if v == delim or v == quotechar:
                escapechars[(delim, quotechar)].add(u)

    # remove dialects where the delimiter is always masked by quotes.
    dialects = []
    for delim in delims:
        for quotechar in quotechars:
            for escapechar in escapechars[(delim, quotechar)]:
                if masked_by_quotechar(data, quotechar, escapechar, delim):
                    continue
                d = SimpleDialect(delim, quotechar, escapechar)
                dialects.append(d)
    return dialects
github alan-turing-institute / CleverCSV / clevercsv / read.py View on Github external
def _make_simple_dialect(self, dialect, **fmtparams):
        if isinstance(dialect, str):
            sd = SimpleDialect.from_csv_dialect(csv.get_dialect(dialect))
        elif isinstance(dialect, csv.Dialect):
            sd = SimpleDialect.from_csv_dialect(dialect)
        elif isinstance(dialect, SimpleDialect):
            sd = dialect
        else:
            raise ValueError("Unknown dialect type: %r" % dialect)
        for key, value in fmtparams.items():
            if key in ["delimiter", "quotechar", "escapechar", "strict"]:
                setattr(sd, key, value)
        sd.validate()
        return sd