How to use the csvkit.CSVKitWriter function in csvkit

To help you get started, we’ve selected a few csvkit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wireservice / csvkit / tests / test_py3.py View on Github external
def test_utf8(self):
        output = six.StringIO()
        writer = csvkit.CSVKitWriter(output)
        writer.writerow(['a', 'b', 'c'])
        writer.writerow(['1', '2', '3'])
        writer.writerow(['4', '5', u'ʤ'])

        written = six.StringIO(output.getvalue())

        reader = csvkit.CSVKitReader(written)
        self.assertEqual(next(reader), ['a', 'b', 'c'])
        self.assertEqual(next(reader), ['1', '2', '3'])
        self.assertEqual(next(reader), ['4', '5', u'ʤ'])
github metagriffin / csvsed / csvsed / sed.py View on Github external
super(E_modifier, self).__init__()
    if not spec or len(spec) < 3 or spec[0] != 'e':
      raise InvalidModifierSpec(spec)
    espec = spec.split(spec[1])
    if len(espec) != 3:
      raise InvalidModifierSpec(spec)
    espec[2] = espec[2].lower()
    self.command = espec[1]
    self.index   = 1 if 'i' in espec[2] else None
    self.csv     = 'c' in espec[2]
    if not self.csv:
      return
    self.proc = subprocess.Popen(
      self.command, shell=True, bufsize=0,
      stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    self.writer = csvkit.CSVKitWriter(self.proc.stdin)
    # note: not using csvkit's reader because there is no easy way of
    # making it not read-ahead (which breaks the "continuous" mode).
    # self.reader = csvkit.CSVKitReader(self.proc.stdout)
    # todo: fix csvkit so that it can be used in non-read-ahead mode.
    self.reader = csv.reader(ReadlineIterator(self.proc.stdout))
  def __call__(self, value):
github california-civic-data-coalition / django-calaccess-raw-data / calaccess / clean_data.py View on Github external
def car_wash(clean_data, file_name):
    new_csv_name = file_name.lower() + '.csv'
    new_csv_path = os.path.join(clean_data_dir, new_csv_name)
    outfile = open(new_csv_path, 'wb')
    writer = CSVKitWriter(outfile, quoting=csv.QUOTE_ALL)
    infile = StringIO(clean_data)
    for line in infile:
        l = line.decode("ascii", "replace").encode('utf-8')
        reader = CSVKitReader(StringIO(l), delimiter='\t')
        writer.writerow(reader.next())
    outfile.close()
    infile.close()
github pandaproject / panda / panda / tasks / export_search.py View on Github external
total_n = 0
        throttle = config_value('PERF', 'TASK_THROTTLE')

        for dataset_slug in datasets:
            try:
                dataset = Dataset.objects.get(slug=dataset_slug)
            except Dataset.DoesNotExist:
                log.warning('Skipping part of export due to Dataset being deleted, dataset_slug: %s' % dataset_slug)

                continue

            filename = '%s.csv' % dataset_slug
            file_path = os.path.join(path, filename)

            f = open(file_path, 'w')
            writer = CSVKitWriter(f)
            
            # Header
            writer.writerow([c['name'] for c in dataset.column_schema])
                
            response = solr.query(
                settings.SOLR_DATA_CORE,
                query,
                offset=0,
                limit=0
            )

            # Update dataset and total counts for progress tracking
            datasets[dataset_slug] = response['response']['numFound']
            total_count = sum(datasets.values())

            n = 0
github california-civic-data-coalition / django-calaccess-campaign-browser / example / toolbox / management / commands / exportcalaccesscampaigncandidates.py View on Github external
f.filer_id_raw,
            f.xref_filer_id,
            f.name,
            f.party
        FROM %(candidate)s as c
        INNER JOIN %(office)s as o
        ON c.office_id = o.id
        INNER JOIN %(filer)s as f
        ON c.filer_id = f.id
        """ % dict(
            candidate=models.Candidate._meta.db_table,
            office=models.Office._meta.db_table,
            filer=models.Filer._meta.db_table,
        )
        self.cursor.execute(sql)
        writer = CSVKitWriter(open("./candidates.csv", 'wb'))
        writer.writerow([
            'office_name',
            'office_seat',
            'filer_id',
            'xref_filer_id',
            'name',
            'party'
        ])
        writer.writerows(self.cursor.fetchall())
github metagriffin / csvsed / csvsed / cli.py View on Github external
def main(self):
    reader = CSVKitReader(self.args.file, **self.reader_kwargs)
    cnames = reader.next()
    cids   = parse_column_identifiers(self.args.columns, cnames, self.args.zero_based)
    mods   = {idx: self.args.expr for idx in cids}
    output = CSVKitWriter(self.output_file, **self.writer_kwargs)
    reader = sed.CsvFilter(reader, mods, header=False)
    output.writerow(cnames)
    for row in reader:
      output.writerow(row)
github caciviclab / disclosure-backend / disclosure / management / commands / dumpuniquecontributors.py View on Github external
ORDER BY
            last_name,
            first_name,
            suffix,
            title,
            city,
            state,
            occupation,
            employer
        """ % dict(
            rcpt=models.RcptCd._meta.db_table,
            loan=models.LoanCd._meta.db_table,
            s497=models.S497Cd._meta.db_table,
        )
        self.cursor.execute(sql)
        writer = CSVKitWriter(open("./contributors.csv", 'wb'))
        writer.writerow([
            'title',
            'first_name',
            'last_name',
            'suffix',
            'occupation',
            'employer',
            'address1',
            'address2',
            'city',
            'state',
            'zipcode',
            'committee_id',
            'count'
        ])
        writer.writerows(self.cursor.fetchall())