Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
kwargs.update(self.reader_kwargs)
kwargs['sniff_limit'] = self.args.sniff_limit
if filetype in ('xls', 'xlsx'):
kwargs['header'] = not self.args.no_header_row
if filetype not in ('dbf', 'geojson', 'json', 'ndjson'): # csv, fixed, xls, xlsx
kwargs['skip_lines'] = self.args.skip_lines
if filetype != 'dbf':
kwargs['column_types'] = self.get_column_types()
# Convert the file.
if filetype == 'csv' and self.args.no_inference and not self.args.no_header_row and not self.args.skip_lines and self.args.sniff_limit == 0:
reader = agate.csv.reader(self.input_file, **self.reader_kwargs)
writer = agate.csv.writer(self.output_file, **self.writer_kwargs)
writer.writerows(reader)
elif filetype == 'fixed':
self.output_file.write(fixed2csv(self.input_file, schema, output=self.output_file, **kwargs))
elif filetype == 'geojson':
self.output_file.write(geojson2csv(self.input_file, **kwargs))
elif filetype in ('csv', 'dbf', 'json', 'ndjson', 'xls', 'xlsx'):
if filetype == 'csv':
table = agate.Table.from_csv(self.input_file, **kwargs)
elif filetype == 'json':
table = agate.Table.from_json(self.input_file, key=self.args.key, **kwargs)
elif filetype == 'ndjson':
table = agate.Table.from_json(self.input_file, key=self.args.key, newline=True, **kwargs)
elif filetype == 'xls':
table = agate.Table.from_xls(self.input_file, sheet=self.args.sheet, encoding_override=self.args.encoding_xls, **kwargs)
elif filetype == 'xlsx':
table = agate.Table.from_xlsx(self.input_file, sheet=self.args.sheet, **kwargs)
if self.input_file == sys.stdin:
self.argparser.error('csvpy cannot accept input as piped data via STDIN.')
# Attempt reading filename, will cause lazy loader to access file and raise error if it does not exist
filename = self.input_file.name
if self.args.as_dict:
klass = agate.csv.DictReader
class_name = 'agate.csv.DictReader'
variable_name = 'reader'
elif self.args.as_agate:
klass = agate.Table.from_csv
class_name = 'agate.Table'
variable_name = 'table'
else:
klass = agate.csv.reader
class_name = 'agate.csv.reader'
variable_name = 'reader'
variable = klass(self.input_file, **self.reader_kwargs)
welcome_message = 'Welcome! "%s" has been loaded in an %s object named "%s".' % (filename, class_name, variable_name)
try:
from IPython.frontend.terminal.embed import InteractiveShellEmbed
exec('%s = variable' % variable_name)
ipy = InteractiveShellEmbed(banner1=welcome_message)
ipy()
except ImportError:
import code
code.interact(welcome_message, local={variable_name: variable})
if filetype == 'csv':
kwargs.update(self.reader_kwargs)
kwargs['sniff_limit'] = self.args.sniff_limit
if filetype in ('xls', 'xlsx'):
kwargs['header'] = not self.args.no_header_row
if filetype not in ('dbf', 'geojson', 'json', 'ndjson'): # csv, fixed, xls, xlsx
kwargs['skip_lines'] = self.args.skip_lines
if filetype != 'dbf':
kwargs['column_types'] = self.get_column_types()
# Convert the file.
if filetype == 'csv' and self.args.no_inference and not self.args.no_header_row and not self.args.skip_lines and self.args.sniff_limit == 0:
reader = agate.csv.reader(self.input_file, **self.reader_kwargs)
writer = agate.csv.writer(self.output_file, **self.writer_kwargs)
writer.writerows(reader)
elif filetype == 'fixed':
self.output_file.write(fixed2csv(self.input_file, schema, output=self.output_file, **kwargs))
elif filetype == 'geojson':
self.output_file.write(geojson2csv(self.input_file, **kwargs))
elif filetype in ('csv', 'dbf', 'json', 'ndjson', 'xls', 'xlsx'):
if filetype == 'csv':
table = agate.Table.from_csv(self.input_file, **kwargs)
elif filetype == 'json':
table = agate.Table.from_json(self.input_file, key=self.args.key, **kwargs)
elif filetype == 'ndjson':
table = agate.Table.from_json(self.input_file, key=self.args.key, newline=True, **kwargs)
elif filetype == 'xls':
table = agate.Table.from_xls(self.input_file, sheet=self.args.sheet, encoding_override=self.args.encoding_xls, **kwargs)
elif filetype == 'xlsx':
base = splitext(self.input_file.name)[0]
with open('%s_out.csv' % base, 'w') as f:
clean_writer = agate.csv.writer(f, **self.writer_kwargs)
checker = RowChecker(reader)
clean_writer.writerow(checker.column_names)
for row in checker.checked_rows():
clean_writer.writerow(row)
if checker.errors:
error_filename = '%s_err.csv' % base
with open(error_filename, 'w') as f:
error_writer = agate.csv.writer(f, **self.writer_kwargs)
error_header = ['line_number', 'msg']
error_header.extend(checker.column_names)
error_writer.writerow(error_header)
error_count = len(checker.errors)
for e in checker.errors:
error_writer.writerow(self._format_error_row(e))
self.output_file.write('%i error%s logged to %s\n' % (error_count, '' if error_count == 1 else 's', error_filename))
else:
self.output_file.write('No errors.\n')
if checker.joins:
self.output_file.write('%i rows were joined/reduced to %i rows after eliminating expected internal line breaks.\n' % (checker.rows_joined, checker.joins))
def __init__(self, schema):
self.fields = [] # A list of FixedWidthFields
schema_reader = agate.csv.reader(schema)
schema_decoder = SchemaDecoder(next(schema_reader))
for i, row in enumerate(schema_reader):
try:
self.fields.append(schema_decoder(row))
except Exception as e:
raise ValueError("Error reading schema at line %i: %s" % (i + 2, e))
to be parsed, type inferred, etc. However, their order/index property will reflect the
original data (e.g. column 8 will still be "order" 7, even if it's the third column
in the resulting Table.
"""
# This bit of nonsense is to deal with "files" from stdin,
# which are not seekable and thus must be buffered
contents = f.read()
# sniff_limit == 0 means do not sniff
if sniff_limit is None:
kwargs['dialect'] = sniff_dialect(contents)
elif sniff_limit > 0:
kwargs['dialect'] = sniff_dialect(contents[:sniff_limit])
f = six.StringIO(contents)
rows = agate.csv.reader(f, **kwargs)
try:
if no_header_row:
# Peek at a row to infer column names from, and put it back on top
row = next(rows)
rows = itertools.chain([row], rows)
headers = make_default_headers(len(row))
else:
headers = next(rows)
except StopIteration:
# The file is `/dev/null`.
headers = []
pass
if no_header_row or column_ids:
column_ids = parse_column_identifiers(column_ids, headers, column_offset)