How to use the agate.csv function in agate

To help you get started, we’ve selected a few agate examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wireservice / csvkit / csvkit / utilities / in2csv.py View on Github external
kwargs.update(self.reader_kwargs)
            kwargs['sniff_limit'] = self.args.sniff_limit

        if filetype in ('xls', 'xlsx'):
            kwargs['header'] = not self.args.no_header_row

        if filetype not in ('dbf', 'geojson', 'json', 'ndjson'):  # csv, fixed, xls, xlsx
            kwargs['skip_lines'] = self.args.skip_lines

        if filetype != 'dbf':
            kwargs['column_types'] = self.get_column_types()

        # Convert the file.
        if filetype == 'csv' and self.args.no_inference and not self.args.no_header_row and not self.args.skip_lines and self.args.sniff_limit == 0:
            reader = agate.csv.reader(self.input_file, **self.reader_kwargs)
            writer = agate.csv.writer(self.output_file, **self.writer_kwargs)
            writer.writerows(reader)
        elif filetype == 'fixed':
            self.output_file.write(fixed2csv(self.input_file, schema, output=self.output_file, **kwargs))
        elif filetype == 'geojson':
            self.output_file.write(geojson2csv(self.input_file, **kwargs))
        elif filetype in ('csv', 'dbf', 'json', 'ndjson', 'xls', 'xlsx'):
            if filetype == 'csv':
                table = agate.Table.from_csv(self.input_file, **kwargs)
            elif filetype == 'json':
                table = agate.Table.from_json(self.input_file, key=self.args.key, **kwargs)
            elif filetype == 'ndjson':
                table = agate.Table.from_json(self.input_file, key=self.args.key, newline=True, **kwargs)
            elif filetype == 'xls':
                table = agate.Table.from_xls(self.input_file, sheet=self.args.sheet, encoding_override=self.args.encoding_xls, **kwargs)
            elif filetype == 'xlsx':
                table = agate.Table.from_xlsx(self.input_file, sheet=self.args.sheet, **kwargs)
github wireservice / csvkit / csvkit / utilities / csvpy.py View on Github external
if self.input_file == sys.stdin:
            self.argparser.error('csvpy cannot accept input as piped data via STDIN.')

        # Attempt reading filename, will cause lazy loader to access file and raise error if it does not exist
        filename = self.input_file.name

        if self.args.as_dict:
            klass = agate.csv.DictReader
            class_name = 'agate.csv.DictReader'
            variable_name = 'reader'
        elif self.args.as_agate:
            klass = agate.Table.from_csv
            class_name = 'agate.Table'
            variable_name = 'table'
        else:
            klass = agate.csv.reader
            class_name = 'agate.csv.reader'
            variable_name = 'reader'

        variable = klass(self.input_file, **self.reader_kwargs)

        welcome_message = 'Welcome! "%s" has been loaded in an %s object named "%s".' % (filename, class_name, variable_name)

        try:
            from IPython.frontend.terminal.embed import InteractiveShellEmbed
            exec('%s = variable' % variable_name)
            ipy = InteractiveShellEmbed(banner1=welcome_message)
            ipy()
        except ImportError:
            import code
            code.interact(welcome_message, local={variable_name: variable})
github wireservice / csvkit / csvkit / utilities / in2csv.py View on Github external
if filetype == 'csv':
            kwargs.update(self.reader_kwargs)
            kwargs['sniff_limit'] = self.args.sniff_limit

        if filetype in ('xls', 'xlsx'):
            kwargs['header'] = not self.args.no_header_row

        if filetype not in ('dbf', 'geojson', 'json', 'ndjson'):  # csv, fixed, xls, xlsx
            kwargs['skip_lines'] = self.args.skip_lines

        if filetype != 'dbf':
            kwargs['column_types'] = self.get_column_types()

        # Convert the file.
        if filetype == 'csv' and self.args.no_inference and not self.args.no_header_row and not self.args.skip_lines and self.args.sniff_limit == 0:
            reader = agate.csv.reader(self.input_file, **self.reader_kwargs)
            writer = agate.csv.writer(self.output_file, **self.writer_kwargs)
            writer.writerows(reader)
        elif filetype == 'fixed':
            self.output_file.write(fixed2csv(self.input_file, schema, output=self.output_file, **kwargs))
        elif filetype == 'geojson':
            self.output_file.write(geojson2csv(self.input_file, **kwargs))
        elif filetype in ('csv', 'dbf', 'json', 'ndjson', 'xls', 'xlsx'):
            if filetype == 'csv':
                table = agate.Table.from_csv(self.input_file, **kwargs)
            elif filetype == 'json':
                table = agate.Table.from_json(self.input_file, key=self.args.key, **kwargs)
            elif filetype == 'ndjson':
                table = agate.Table.from_json(self.input_file, key=self.args.key, newline=True, **kwargs)
            elif filetype == 'xls':
                table = agate.Table.from_xls(self.input_file, sheet=self.args.sheet, encoding_override=self.args.encoding_xls, **kwargs)
            elif filetype == 'xlsx':
github wireservice / csvkit / csvkit / utilities / csvclean.py View on Github external
base = splitext(self.input_file.name)[0]

            with open('%s_out.csv' % base, 'w') as f:
                clean_writer = agate.csv.writer(f, **self.writer_kwargs)

                checker = RowChecker(reader)
                clean_writer.writerow(checker.column_names)

                for row in checker.checked_rows():
                    clean_writer.writerow(row)

            if checker.errors:
                error_filename = '%s_err.csv' % base

                with open(error_filename, 'w') as f:
                    error_writer = agate.csv.writer(f, **self.writer_kwargs)

                    error_header = ['line_number', 'msg']
                    error_header.extend(checker.column_names)
                    error_writer.writerow(error_header)

                    error_count = len(checker.errors)

                    for e in checker.errors:
                        error_writer.writerow(self._format_error_row(e))

                self.output_file.write('%i error%s logged to %s\n' % (error_count, '' if error_count == 1 else 's', error_filename))
            else:
                self.output_file.write('No errors.\n')

            if checker.joins:
                self.output_file.write('%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins))
github wireservice / csvkit / csvkit / convert / fixed.py View on Github external
def __init__(self, schema):
        self.fields = []  # A list of FixedWidthFields

        schema_reader = agate.csv.reader(schema)
        schema_decoder = SchemaDecoder(next(schema_reader))

        for i, row in enumerate(schema_reader):
            try:
                self.fields.append(schema_decoder(row))
            except Exception as e:
                raise ValueError("Error reading schema at line %i: %s" % (i + 2, e))
github wireservice / csvkit / csvkit / table.py View on Github external
to be parsed, type inferred, etc. However, their order/index property will reflect the
        original data (e.g. column 8 will still be "order" 7, even if it's the third column
        in the resulting Table.
        """
        # This bit of nonsense is to deal with "files" from stdin,
        # which are not seekable and thus must be buffered
        contents = f.read()

        # sniff_limit == 0 means do not sniff
        if sniff_limit is None:
            kwargs['dialect'] = sniff_dialect(contents)
        elif sniff_limit > 0:
            kwargs['dialect'] = sniff_dialect(contents[:sniff_limit])

        f = six.StringIO(contents)
        rows = agate.csv.reader(f, **kwargs)

        try:
            if no_header_row:
                # Peek at a row to infer column names from, and put it back on top
                row = next(rows)
                rows = itertools.chain([row], rows)
                headers = make_default_headers(len(row))
            else:
                headers = next(rows)
        except StopIteration:
            # The file is `/dev/null`.
            headers = []
            pass

        if no_header_row or column_ids:
            column_ids = parse_column_identifiers(column_ids, headers, column_offset)