How to use the unicodecsv.DictWriter function in unicodecsv

To help you get started, we’ve selected a few unicodecsv examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github CivicSpleen / ambry / test / functional / test_schema.py View on Github external
rows = []
        with b.source_fs.open('source_schema.csv',encoding='utf8') as f:
            r = csv.reader(f)
            headers = next(r)

            for row in r:
                d = dict(zip(headers, row))
                d['dest_header'] = 'X'+d['source_header']
                rows.append(d)

        # Fails with: TypeError: must be unicode, not str
        # with b.source_fs.open('source_schema.csv', 'w',encoding='utf8') as f:

        path = b.source_fs.getsyspath('source_schema.csv')
        with open(path, 'w') as f:
            w = csv.DictWriter(f,fieldnames=headers)
            w.writeheader()
            for row in rows:
                w.writerow(row)

        b.sync_in()

        self.assertEqual([u'int', u'float', u'string', u'time', u'date'],
                         [ c.source_header for c in b.dataset.source_table('types1').columns])


        b.clean_ingested()
        b.ingest(tables=['types'])

        self.assertEqual([u'int', u'float', u'string', u'time', u'date'],
                         [ c.source_header for c in b.dataset.source_table('types1').columns])
github openaddresses / openaddresses / scripts / es / gml_to_csv.py View on Github external
lookup_key = self.strip_hash.sub('', attrs['xlink:href'])

            if self.lookup['thoroughfare'].get(lookup_key) is not None:
                self.object['street'] = self.lookup['thoroughfare'].get(lookup_key)
            elif self.lookup['admin'].get(lookup_key) is not None:
                self.object['admin'] = self.lookup['admin'].get(lookup_key)
            elif self.lookup['postal'].get(lookup_key) is not None:
                self.object['postcode'] = self.lookup['postal'].get(lookup_key)

        # detect SRS, create CSV writer if necessary
        if name == 'gml:Point':
            self.srs = attrs.get('srsName', None)
            if self.srs is not None:
                self.srs = self.srs.split(':')[-1]
                if not self.srs in self.writers:
                    self.writers[self.srs] = csv.DictWriter(open(self.out_dir + 'es-%s.csv' % self.srs, 'a'), ('lon', 'lat', 'number', 'street', 'postcode', 'admin'))
                    self.writers[self.srs].writeheader()
github occrp-attic / ingestors / ingestors / tabular / messy.py View on Github external
def generate_csv(self, sheet, row_set, temp_dir):
        out_path = safe_filename(row_set.name, extension='csv')
        out_path = join_path(temp_dir, out_path)
        offset, headers = headers_guess(row_set.sample)
        row_set.register_processor(headers_processor(headers))
        row_set.register_processor(offset_processor(offset + 1))
        with open(out_path, 'w') as fh:
            writer = None
            for row in row_set:
                try:
                    if writer is None:
                        writer = DictWriter(fh, [c.column for c in row])
                        writer.writeheader()
                    data = {c.column: string_value(c.value) for c in row}
                    writer.writerow(data)
                except Exception as ex:
                    log.exception(ex)

        child_id = join_path(self.result.id, row_set.name)
        self.manager.handle_child(self.result, out_path,
                                  id=child_id,
                                  title=row_set.name,
                                  mime_type='text/csv')
github mgaitan / preciosa / preciosa / datos / management / commands / sucursales_osm.py View on Github external
)

        geojson = args[0]
        if geojson.startswith('http'):
            fh = urllib2.urlopen(geojson)
        else:
            fh = open(args[0])
        self.data = json.load(fh)

        suc_dir = os.path.join(settings.DATASETS_ROOT, 'sucursales')
        if not os.path.exists(suc_dir):
            os.makedirs(suc_dir)

        FILENAME = self.FILENAME % datetime.now().strftime("%Y-%m-%d-%H%M%S")
        FILENAME = os.path.join(suc_dir, FILENAME)
        writer = unicodecsv.DictWriter(open(FILENAME, 'wb'),
                                       fieldnames=self.get_columnas())
        writer.writeheader()
        bar = Bar('Convirtiendo ', suffix='%(percent)d%%')
        for feature in bar.iter(self.entrada()):
            sucursal = self.parse_sucursal(feature)
            writer.writerow(sucursal)
github SEL-Columbia / formhub / utils / bamboo.py View on Github external
raise NoRecordsFoundError
        else:
            # we should only do it for count == 1 but eh.

            csv_buf = getbuff()

            if only_last:
                pifilter = [pifilter[0]]

            rows = [pi.to_dict_for_mongo() for pi in pifilter]

            if headers_to_use is None:
                headers_to_use = [key for key in rows[0].keys()
                                  if not key.startswith('_')]

            w = unicodecsv.DictWriter(csv_buf, fieldnames=headers_to_use,
                                      extrasaction='ignore',
                                      lineterminator='\n',
                                      encoding='utf-8')
            if with_header:
                w.writeheader()
            w.writerows(rows)
            csv_buf.flush()

            if not csv_buf.len:
                raise NoRecordsFoundError

            return csv_buf.getvalue()
github PaloAltoNetworks / minemeld-core / minemeld / flask / feedredis.py View on Github external
# check if bom should be generated
    ubom = kwargs.pop('ubom', None)
    if ubom is None:
        ubom = False
    else:
        ubom = int(ubom[0])

    cstart = start

    if ubom:
        LOG.debug('BOM')
        yield '\xef\xbb\xbf'

    with _buffer() as current_line:
        w = unicodecsv.DictWriter(
            current_line,
            fieldnames=columns,
            encoding='utf-8'
        )

        if header:
            w.writeheader()
            yield current_line.getvalue()

        while cstart < (start + num):
            ilist = zrange(feed, cstart,
                           cstart - 1 + min(start + num - cstart, FEED_INTERVAL))

            for indicator in ilist:
                v = SR.hget(feed + '.value', indicator)
                v = None if v is None else json.loads(v)
github openelections / openelections-core / openelex / us / ar / datasource.py View on Github external
if subjurisdiction.url not in self.no_precinct_urls:
                url_paths.append({
                    'date': election['start_date'],
                    'office': '',
                    'race_type': election['race_type'],
                    'party': '',
                    'special': election['special'],
                    'url': subjurisdiction.report_url(fmt),
                    'reporting_level': 'precinct',
                    'jurisdiction': subjurisdiction.name,
                })

        with open(url_paths_filename, 'wb') as f:
            fieldnames = ['date', 'office', 'race_type', 'party',
                'special', 'url', 'reporting_level', 'jurisdiction']
            writer = unicodecsv.DictWriter(f, fieldnames)
            writer.writeheader()
            writer.writerows(url_paths)

        return url_paths
github censusreporter / census-table-metadata / process_merge.py View on Github external
'denominator_column_id',
        'topics'
    ]
    table_csv = unicodecsv.DictWriter(table_file, table_metadata_fieldnames)
    table_csv.writeheader()

    with open("%s/census_column_metadata.csv" % root_dir, 'wb') as column_file:
        column_metadata_fieldnames = [
            'table_id',
            'line_number',
            'column_id',
            'column_title',
            'indent',
            'parent_column_id'
        ]
        column_csv = unicodecsv.DictWriter(column_file, column_metadata_fieldnames)
        column_csv.writeheader()

        for table_id, table in sorted(tables.items()):
            if not table:
                # don't write out a table that was marked to be skipped on purpose
                continue

            columns = table.pop('columns')
            table_csv.writerow(table)
            for column in sorted(columns, key=lambda a: a['column_id']):
                column_csv.writerow(column)
github ourresearch / oadoi / bigquery_import.py View on Github external
dataset_ref = client.dataset(dataset_id)
    table_ref = dataset_ref.table(table_id)
    table = client.get_table(table_ref)
    fieldnames = [schema.name for schema in table.schema]

    query = ('SELECT * FROM `unpaywall-bhd.{}` '.format(bq_tablename))
    query_job = client.query(
        query,
        # Location must match that of the dataset(s) referenced in the query.
        location='US')  # API request - starts the query

    rows = list(query_job)

    with open(temp_data_filename, 'wb') as f:
        # delimiter workaround from https://stackoverflow.com/questions/43048618/csv-reader-refuses-tab-delimiter?noredirect=1&lq=1#comment73182042_43048618
        writer = unicodecsv.DictWriter(f, fieldnames=fieldnames, delimiter=str(u'\t').encode('utf-8'))
        if header:
            writer.writeheader()
        for row in rows:
            writer.writerow(dict(zip(fieldnames, row)))

    print('Saved {} rows from {}.'.format(len(rows), bq_tablename))
    return fieldnames