How to use the csvkit.reader function in csvkit

To help you get started, we’ve selected a few csvkit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wireservice / csvkit / tests / test_py3.py View on Github external
def test_writer_alias(self):
        output = six.StringIO()
        writer = csvkit.writer(output)
        writer.writerow(['a', 'b', 'c'])
        writer.writerow(['1', '2', '3'])
        writer.writerow(['4', '5', u'ʤ'])

        written = six.StringIO(output.getvalue())

        reader = csvkit.reader(written)
        self.assertEqual(next(reader), ['a', 'b', 'c'])
        self.assertEqual(next(reader), ['1', '2', '3'])
        self.assertEqual(next(reader), ['4', '5', u'ʤ'])
github wireservice / csvkit / tests / test_py2.py View on Github external
def test_writer_alias(self):
        output = six.StringIO()
        writer = csvkit.writer(output, encoding='utf-8')
        self.assertEqual(writer._eight_bit, True)
        writer.writerow(['a', 'b', 'c'])
        writer.writerow(['1', '2', '3'])
        writer.writerow(['4', '5', u'ʤ'])

        written = six.StringIO(output.getvalue())

        reader = csvkit.reader(written, encoding='utf-8')
        self.assertEqual(next(reader), ['a', 'b', 'c'])
        self.assertEqual(next(reader), ['1', '2', '3'])
        self.assertEqual(next(reader), ['4', '5', u'ʤ'])
github cirlabs / django-project-template / lib / management / commands / csv_to_model.py View on Github external
def handle(self, *args, **options):
        # read in CSV
        print("This is an auto-generated Django model module \
            created by apps.core.commands.")
        print("from django.contrib.gis.db import models\n")

        with open(args[0], 'rb') as csvfile:

            reader = csvkit.reader(csvfile)
            headers = reader.next()
            print("class GeneratedModel(models.Model):")

            for row in headers:
                # take the row, slugify it
                # and replace the hyphens with underscores
                field = slugify(row).replace('-', '_')
                print("    %s = models.CharField(max_length=255)" % field)

            print("\n")
github california-civic-data-coalition / django-calaccess-raw-data / calaccess_raw / management / commands / cleancalaccessrawfile.py View on Github external
def get_headers(self):
        """
        Returns the headers from the TSV file.
        """
        with open(self.tsv_path, "r") as tsv_file:
            tsv_reader = csvkit.reader(tsv_file, delimiter=str("\t"))
            try:
                return next(tsv_reader)
            except StopIteration:
                return []
github CartoDB / cartodb / lib / importer / misc / csv_remove_newlines.py View on Github external
#!/usr/bin/env python

# Remove newline chars from CSV "cells"
# Input is taken from stdin and output spit to stdout

import csvkit
import sys

reader = csvkit.reader(sys.stdin)
writer = csvkit.writer(sys.stdout)
for row in reader:
  for i in range(0, len(row)):
    if isinstance(row[i], str):
      if "\n" in row[i]:
        row[i] = row[i].replace("\n", '')
  writer.writerow(row)
github california-civic-data-coalition / django-calaccess-raw-data / calaccess_raw / management / commands / loadcalaccessrawfile.py View on Github external
'cleancalaccessrawfile {0}`).'.format(self.model._meta.db_table)
            )

        # Get the row count from the source CSV
        with open(self.csv, 'r') as infile:
            self.csv_row_count = max(sum(1 for line in infile) - 1, 0)

        # Quit if the CSV is empty.
        if not self.csv_row_count:
            if self.verbosity > 2:
                self.failure("{} is empty.".format(self.csv))
            return

        # Get the headers from the source CSV
        with open(self.csv, 'r') as infile:
            csv_reader = reader(infile)
            self.csv_headers = next(csv_reader)

        # store the start time for the load
        raw_file.load_start_datetime = now()
        # reset the finish time for the load
        raw_file.load_finish_datetime = None
        # save here in case command doesn't finish
        raw_file.save()

        # Load table
        if self.verbosity > 2:
            self.log(" Loading {}".format(options['model_name']))
        self.load()

        # add load counts to raw_file_record
        raw_file.load_columns_count = len(self.model._meta.fields)