How to use the clevercsv.detect.Detector function in clevercsv

To help you get started, we’ve selected a few clevercsv examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alan-turing-institute / CleverCSV / tests / test_unit / test_detect.py View on Github external
def test_detect(self):
        # Adapted from CPython
        detector = Detector()
        dialect = detector.detect(self.sample1)
        self.assertEqual(dialect.delimiter, ",")
        self.assertEqual(dialect.quotechar, "")
        self.assertEqual(dialect.escapechar, "")

        dialect = detector.detect(self.sample2)
        self.assertEqual(dialect.delimiter, ":")
        self.assertEqual(dialect.quotechar, "'")
        self.assertEqual(dialect.escapechar, "")
github alan-turing-institute / CleverCSV / tests / test_unit / test_detect.py View on Github external
def test_has_header_regex_special_delimiter(self):
        detector = Detector()
        self.assertEqual(detector.has_header(self.sample8), False)
        self.assertEqual(
            detector.has_header(self.header2 + self.sample8), True
        )
github alan-turing-institute / CleverCSV / tests / test_unit / test_detect.py View on Github external
def test_delimiters(self):
        # Adapted from CPython
        detector = Detector()
        dialect = detector.detect(self.sample3)
        self.assertIn(dialect.delimiter, self.sample3)
        dialect = detector.detect(self.sample3, delimiters="?,")
        self.assertEqual(dialect.delimiter, "?")
        dialect = detector.detect(self.sample3, delimiters="/,")
        self.assertEqual(dialect.delimiter, "/")
        dialect = detector.detect(self.sample4)
        self.assertEqual(dialect.delimiter, ";")
        dialect = detector.detect(self.sample5)
        self.assertEqual(dialect.delimiter, "\t")
        dialect = detector.detect(self.sample6)
        self.assertEqual(dialect.delimiter, "|")
        dialect = detector.detect(self.sample7)
        self.assertEqual(dialect.delimiter, "|")
        self.assertEqual(dialect.quotechar, "'")
        dialect = detector.detect(self.sample8)
github alan-turing-institute / CleverCSV / tests / test_unit / test_detect.py View on Github external
def test_has_header(self):
        detector = Detector()
        self.assertEqual(detector.has_header(self.sample1), False)
        self.assertEqual(
            detector.has_header(self.header1 + self.sample1), True
        )
github alan-turing-institute / CleverCSV / clevercsv / wrappers.py View on Github external
-------
    rows: generator
        Returns file as a generator over rows as dictionaries.

    Raises
    ------
    NoDetectionResult
        When the dialect detection fails.

    """
    if encoding is None:
        encoding = get_encoding(filename)
    with open(filename, "r", newline="", encoding=encoding) as fid:
        if dialect is None:
            data = fid.read(num_chars) if num_chars else fid.read()
            dialect = Detector().detect(data, verbose=verbose)
            fid.seek(0)
        r = DictReader(fid, dialect=dialect)
        for row in r:
            yield row
github alan-turing-institute / CleverCSV / clevercsv / wrappers.py View on Github external
Additional keyword arguments for the ``pandas.read_csv`` function. You 
        can specify the file encoding here if needed, and it will be used 
        during dialect detection.

    """
    if not (os.path.exists(filename) and os.path.isfile(filename)):
        raise ValueError("Filename must be a regular file")
    pd = import_optional_dependency("pandas")

    # Use provided encoding or detect it, and record it for pandas
    enc = kwargs.get("encoding") or get_encoding(filename)
    kwargs["encoding"] = enc

    with open(filename, "r", newline="", encoding=enc) as fid:
        data = fid.read(num_chars) if num_chars else fid.read()
        dialect = Detector().detect(data)
    csv_dialect = dialect.to_csv_dialect()

    # This is used to catch pandas' warnings when a dialect is supplied.
    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="^Conflicting values for .*",
            category=pd.errors.ParserWarning,
        )
        df = pd.read_csv(filename, *args, dialect=csv_dialect, **kwargs)
    return df
github alan-turing-institute / CleverCSV / clevercsv / wrappers.py View on Github external
method : str
        Dialect detection method to use. Either 'normal' for normal form 
        detection, 'consistency' for the consistency measure, or 'auto' for 
        first normal and then consistency.

    Returns
    -------
    dialect : SimpleDialect
        The detected dialect as a :class:`SimpleDialect`, or None if detection 
        failed.

    """
    enc = encoding or get_encoding(filename)
    with open(filename, "r", newline="", encoding=enc) as fp:
        data = fp.read(num_chars) if num_chars else fp.read()
        dialect = Detector().detect(data, verbose=verbose, method=method)
    return dialect
github alan-turing-institute / CleverCSV / clevercsv / wrappers.py View on Github external
-------
    rows: generator
        Returns file as a generator over rows.

    Raises
    ------
    NoDetectionResult
        When the dialect detection fails.

    """
    if encoding is None:
        encoding = get_encoding(filename)
    with open(filename, "r", newline="", encoding=encoding) as fid:
        if dialect is None:
            data = fid.read(num_chars) if num_chars else fid.read()
            dialect = Detector().detect(data, verbose=verbose)
            if dialect is None:
                raise NoDetectionResult()
            fid.seek(0)
        r = reader(fid, dialect)
        yield from r