How to use the csvkit.CSVKitReader function in csvkit

To help you get started, we’ve selected a few csvkit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wireservice / csvkit / tests / test_py2.py View on Github external
def test_utf8(self):
        with open('examples/test_utf8.csv') as f:
            reader = csvkit.CSVKitReader(f, encoding='utf-8')
            self.assertEqual(next(reader), ['a', 'b', 'c'])
            self.assertEqual(next(reader), ['1', '2', '3'])
            self.assertEqual(next(reader), ['4', '5', u'ʤ'])
github caciviclab / disclosure-backend / calaccess_raw / management / commands / loadcalaccessrawfile.py View on Github external
def get_headers(self, csv_path):
        """
        Returns the column headers from the csv as a list.
        """
        with open(csv_path, 'r') as infile:
            csv_reader = CSVKitReader(infile)
            headers = next(csv_reader)
        return headers
github california-civic-data-coalition / django-calaccess-raw-data / calaccess / clean_data.py View on Github external
def car_wash(clean_data, file_name):
    new_csv_name = file_name.lower() + '.csv'
    new_csv_path = os.path.join(clean_data_dir, new_csv_name)
    outfile = open(new_csv_path, 'wb')
    writer = CSVKitWriter(outfile, quoting=csv.QUOTE_ALL)
    infile = StringIO(clean_data)
    for line in infile:
        l = line.decode("ascii", "replace").encode('utf-8')
        reader = CSVKitReader(StringIO(l), delimiter='\t')
        writer.writerow(reader.next())
    outfile.close()
    infile.close()
github metagriffin / csvsed / csvsed / cli.py View on Github external
def main(self):
    reader = CSVKitReader(self.args.file, **self.reader_kwargs)
    cnames = reader.next()
    cids   = parse_column_identifiers(self.args.columns, cnames, self.args.zero_based)
    mods   = {idx: self.args.expr for idx in cids}
    output = CSVKitWriter(self.output_file, **self.writer_kwargs)
    reader = sed.CsvFilter(reader, mods, header=False)
    output.writerow(cnames)
    for row in reader:
      output.writerow(row)
github pandaproject / panda / panda / tasks / import_csv.py View on Github external
task_status = dataset.current_task
        task_status.begin(ugettext('Preparing to import'))

        line_count = self._count_lines(upload.get_path())

        if self.is_aborted():
            task_status.abort('Aborted during preperation')

            log.warning('Import aborted, dataset_slug: %s' % dataset_slug)

            return

        f = open(upload.get_path(), 'r')

        reader = CSVKitReader(f, encoding=upload.encoding, **upload.dialect_as_parameters())
        reader.next()

        add_buffer = []
        data_typer = DataTyper(dataset.column_schema)
        throttle = config_value('PERF', 'TASK_THROTTLE')

        i = 0

        while True:
            # The row number which is about to be read, for error handling and indexing
            i += 1

            try:
                row = reader.next()
            except StopIteration:
                i -= 1
github pandaproject / panda / panda / utils / csvdata.py View on Github external
def guess_column_types(path, dialect, sample_size, encoding='utf-8'):
    """
    Guess column types based on a sample of data.
    """
    with open(path, 'r') as f:
        reader = CSVKitReader(f, encoding=encoding, **dialect)
        headers = reader.next()

        sample = islice(reader, sample_size)
        normal_types, normal_values = normalize_table(sample)

        type_names = []

        for t in normal_types:
            if t is NoneType:
                type_names.append(None)
            else:
                type_names.append(t.__name__)

        # If a final column had no values csvkit will have dropped it
        while len(type_names) < len(headers):
            type_names.append(None)
github pandaproject / panda / redd / utils.py View on Github external
def csv_sample_data(f, dialect, sample_size=settings.PANDA_SAMPLE_DATA_ROWS):
    reader = CSVKitReader(f, **dialect)
    reader.next() # skip headers
        
    samples = []

    for row in islice(reader, sample_size):
        samples.append(row)

    return samples
github pandaproject / panda / panda / admin.py View on Github external
user_data = request.POST.get('user-data', '') 

                if not user_data:
                    raise Exception(_('No user data provided.'))

                context['user_data'] = user_data

                try:
                    csv_dialect = csvkit_sniff(user_data)
                except UnicodeDecodeError:
                    raise Exception(_('Only UTF-8 data is supported.'))

                if not csv_dialect:
                    raise Exception(_('Unable to determine the format of the data you entered. Please ensure it is valid CSV data.'))

                reader = CSVKitReader(StringIO(user_data), dialect=csv_dialect)

                emails = 0

                for i, row in enumerate(reader):
                    if len(row) < 4:
                        raise Exception(_('Row %i has less than 4 columns.') % i)
                    if len(row) > 4:
                        raise Exception(_('Row %i has more than 4 columns.') % i)

                    if UserProxy.objects.filter(email=row[0]).count():
                        raise Exception(_('User "%s" already exists')  % row[0])

                    user = UserProxy.objects.create_user(row[0], row[0], row[1] or None)
                    user.is_active = bool(row[1]) # active if a password is provided
                    user.first_name = row[2]
                    user.last_name = row[3]