How to use the clevercsv.utils.get_encoding function in clevercsv

To help you get started, we’ve selected a few clevercsv examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alan-turing-institute / CleverCSV / clevercsv / wrappers.py View on Github external
verbose: bool
        Whether or not to show detection progress.

    Returns
    -------
    rows: generator
        Returns file as a generator over rows as dictionaries.

    Raises
    ------
    NoDetectionResult
        When the dialect detection fails.

    """
    if encoding is None:
        encoding = get_encoding(filename)
    with open(filename, "r", newline="", encoding=encoding) as fid:
        if dialect is None:
            data = fid.read(num_chars) if num_chars else fid.read()
            dialect = Detector().detect(data, verbose=verbose)
            fid.seek(0)
        r = DictReader(fid, dialect=dialect)
        for row in r:
            yield row
github alan-turing-institute / CleverCSV / comparison / detector_clevercsv.py View on Github external
dialect = det.detect(sample)
    except clevercsv.Error:
        raise DetectionError

    if dialect is None:
        return None
    return dict(
        delimiter=dialect.delimiter,
        quotechar=dialect.quotechar,
        escapechar=dialect.escapechar,
    )

if __name__ == "__main__":
    if len(sys.argv) > 1:
        filename = sys.argv[1]
        encoding = clevercsv.utils.get_encoding(filename)
        print(detector(filename, encoding))
    else:
        print(f"Usage: {sys.argv[0]} filename", file=sys.stderr)
github alan-turing-institute / CleverCSV / clevercsv / wrappers.py View on Github external
Note that using less than the entire file will speed up detection, but 
        can reduce the accuracy of the detected dialect.

    **kwargs:
        Additional keyword arguments for the ``pandas.read_csv`` function. You 
        can specify the file encoding here if needed, and it will be used 
        during dialect detection.

    """
    if not (os.path.exists(filename) and os.path.isfile(filename)):
        raise ValueError("Filename must be a regular file")
    pd = import_optional_dependency("pandas")

    # Use provided encoding or detect it, and record it for pandas
    enc = kwargs.get("encoding") or get_encoding(filename)
    kwargs["encoding"] = enc

    with open(filename, "r", newline="", encoding=enc) as fid:
        data = fid.read(num_chars) if num_chars else fid.read()
        dialect = Detector().detect(data)
    csv_dialect = dialect.to_csv_dialect()

    # This is used to catch pandas' warnings when a dialect is supplied.
    with warnings.catch_warnings():
        warnings.filterwarnings(
            "ignore",
            message="^Conflicting values for .*",
            category=pd.errors.ParserWarning,
        )
        df = pd.read_csv(filename, *args, dialect=csv_dialect, **kwargs)
    return df
github alan-turing-institute / CleverCSV / clevercsv / wrappers.py View on Github external
verbose : bool
        Enable verbose mode during detection.

    method : str
        Dialect detection method to use. Either 'normal' for normal form 
        detection, 'consistency' for the consistency measure, or 'auto' for 
        first normal and then consistency.

    Returns
    -------
    dialect : SimpleDialect
        The detected dialect as a :class:`SimpleDialect`, or None if detection 
        failed.

    """
    enc = encoding or get_encoding(filename)
    with open(filename, "r", newline="", encoding=enc) as fp:
        data = fp.read(num_chars) if num_chars else fp.read()
        dialect = Detector().detect(data, verbose=verbose, method=method)
    return dialect
github alan-turing-institute / CleverCSV / clevercsv / wrappers.py View on Github external
verbose: bool
        Whether or not to show detection progress.

    Returns
    -------
    rows: generator
        Returns file as a generator over rows.

    Raises
    ------
    NoDetectionResult
        When the dialect detection fails.

    """
    if encoding is None:
        encoding = get_encoding(filename)
    with open(filename, "r", newline="", encoding=encoding) as fid:
        if dialect is None:
            data = fid.read(num_chars) if num_chars else fid.read()
            dialect = Detector().detect(data, verbose=verbose)
            if dialect is None:
                raise NoDetectionResult()
            fid.seek(0)
        r = reader(fid, dialect)
        yield from r