How to use the csvkit.DictWriter function in csvkit

To help you get started, we’ve selected a few csvkit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wireservice / csvkit / tests / test_py2.py View on Github external
def test_writer_alias(self):
        writer = csvkit.DictWriter(self.output, ['a', 'b', 'c'])
        writer.writeheader()
        writer.writerow({
            u'a': u'1',
            u'b': u'2',
            u'c': u'☃'
        })

        result = self.output.getvalue()

        self.assertEqual(result, 'a,b,c\n1,2,☃\n')
github wireservice / csvkit / tests / test_py3.py View on Github external
def test_writer_alias(self):
        writer = csvkit.DictWriter(self.output, ['a', 'b', 'c'])
        writer.writeheader()
        writer.writerow({
            u'a': u'1',
            u'b': u'2',
            u'c': u'☃'
        })

        result = self.output.getvalue()

        self.assertEqual(result, 'a,b,c\n1,2,☃\n')
github medialab / ricardo_data / database_scripts / custom_exports.py View on Github external
RICentities[partner[0]]['names in source (partner)'] = "; ".join(set(partner[1].split('|')))
    RICentities[partner[0]]['sources (partner)'] = "; ".join(set(partner[2].split('|')))
    RICentities[partner[0]]['bilateral periods (partner)'] = ','.join('-'.join(str(e) for e in p) for p in reduce_years_list_into_periods(partner[3].split(',')))
    RICentities[partner[0]]['nb flows (partner)'] = partner[4]
    if 'total nb flows' in RICentities[partner[0]]:
      RICentities[partner[0]]['total nb flows'] += partner[4]
    else:
      RICentities[partner[0]]['total nb flows'] = partner[4]

  with open(output_filename, "w") as f :
    hs = ['RICname', 'RICtype', 'continent', 'COW code', 'total nb flows',
    'nb flows (reporting)', 'nb flows (partner)',
    'names in source (reporting)', 'names in source (partner)',
    'bilateral periods (reporting)', 'bilateral periods (partner)',
    'sources (reporting)', 'sources (partner)'] 
    dw = csvkit.DictWriter(f, fieldnames= hs )
    dw.writeheader()
    dw.writerows(sorted((r for r in RICentities.values() if 'total nb flows' in r),key =lambda r:-1*r['total nb flows']))
    return 0
  return 1
github medialab / ricardo_data / update_data_scripts / 20170926_new_sources_format / import_new_sources_format_in_csv_data.py View on Github external
# output the new sources file
    with open('sources.csv', 'w') as of:
        output = csvkit.DictWriter(of, sources[0].keys())
        output.writeheader()
        output.writerows(sources)
    

# delete source_types.csv (by hand through git)
    # patch flows and exchange_rates through csvkit directly on csv
    # check for missing sources on the way
    missingSources = set()
    with open('../../csv_data/flows.csv', 'r') as f:
        with open('../../csv_data/new_flows.csv','w') as nf:
            flows = csvkit.DictReader(f)
            newFlows = csvkit.DictWriter(nf, flows.fieldnames)
            newFlows.writeheader()
            for flow in flows:
                if flow['source'] in swapSources:
                    flow['source'] = swapSources[flow['source']]
                else :
                    missingSources.add(flow['source'])
                newFlows.writerow(flow)
 

    with open('../../csv_data/exchange_rates.csv', 'r') as f:
        with open('../../csv_data/new_exchange_rates.csv','w') as nf:
            rates = csvkit.DictReader(f)
            newRates = csvkit.DictWriter(nf, rates.fieldnames)
            newRates.writeheader()
            for rate in rates:
                if rate['source'] in swapSources:
github medialab / ricardo_data / update_data_scripts / 20170926_new_sources_format / import_new_sources_format_in_csv_data.py View on Github external
del source[unneededColumn]

    #vérifier l'unicité des slug
    print "%s : nb de sources"%len(sources)
    uniquSlugsInSwap = set(swapSources.values())
    print "%s nombre de slugs uniques in swap"%len(uniquSlugsInSwap)
    uniquSlugsInSource = set(source['slug'] for source in sources) 
    print "%s nombre de slugs uniques in sources"%len(uniquSlugsInSource)
    print "in swap not in source :"
    print "\n".join([slug.encode('utf8') for slug in uniquSlugsInSwap - uniquSlugsInSource])
    print "\n".join([slug.encode('utf8') for slug,ss in itertools.groupby(sources, lambda s: s['slug']) if len(list(ss))>1])


    # output the new sources file
    with open('sources.csv', 'w') as of:
        output = csvkit.DictWriter(of, sources[0].keys())
        output.writeheader()
        output.writerows(sources)
    

# delete source_types.csv (by hand through git)
    # patch flows and exchange_rates through csvkit directly on csv
    # check for missing sources on the way
    missingSources = set()
    with open('../../csv_data/flows.csv', 'r') as f:
        with open('../../csv_data/new_flows.csv','w') as nf:
            flows = csvkit.DictReader(f)
            newFlows = csvkit.DictWriter(nf, flows.fieldnames)
            newFlows.writeheader()
            for flow in flows:
                if flow['source'] in swapSources:
                    flow['source'] = swapSources[flow['source']]
github openva / crump / file_extractor.py View on Github external
json_file.seek(-1, os.SEEK_END)
                        json_file.truncate()
                        json_file.write(']')
                    # new file - grab proper field map and do file setup
                    current_map = field_maps[file_number]["map"]
                    current_name = field_maps[file_number]["name"]
                    csv_name = current_name.replace(".yaml", ".csv")
                    json_name = current_name.replace(".yaml", ".json")
                    last_file = file_number
                    csv_file = open("output/"+csv_name, 'wb')
                    json_file = open("output/"+json_name, 'wb')
                    field_names = []
                    for field in current_map:
                        field_names.append(field["name"])
                    field_tuple = tuple(field for field in field_names)
                    csv_writer = csvkit.DictWriter(csv_file, field_tuple)
                    
                    csv_writer.writeheader()
                    print "Creating",csv_name.replace(".csv","")
                    # Start a new JSON file with an opening bracket.
                    json_file.write('[')
                # break the line out into pieces
                line = {}
                for field in current_map:
                    start = int(field["start"])
                    length = int(field["length"])
                    name = field["name"]
                    end = start + length
                    line[name] = current_line[1][start:end].strip()
                    if "corp-id" in name:
                        corp_id = line[name]
                try:
github medialab / ricardo_data / database_scripts / custom_exports.py View on Github external
years = [str(y) for y in range(min_year, max_year+1)]
  
  nb_entities_in_ft_and_ricardo = dict((y,0) for y in years)
  nb_entities_in_ricardo_not_in_ft = dict((y,0) for y in years)
  
  for r in RICentities.values():
    for y in years:
      if y in r and 'ft' in r[y]:
        nb_entities_in_ft_and_ricardo[y] += 1
      elif y in r:
        nb_entities_in_ricardo_not_in_ft[y] += 1

  
  with open(output_filename, "w") as f :
    hs = ['RICname', 'type', 'continent', 'COW_code', 'nb_flows_as_reporting', 'nb_flows_as_partner'] + [y for y in years] 
    dw = csvkit.DictWriter(f, fieldnames= hs )
    ft_reportings_by_year['nb_flows_as_partner'] = 'nb FT reportings'
    nb_entities_in_ft_and_ricardo['nb_flows_as_partner']= 'nb in FT & RIC'
    nb_entities_in_ricardo_not_in_ft['nb_flows_as_partner']= 'nb in RIC not in FT'
    dw.writeheader()
    dw.writerow(ft_reportings_by_year)
    dw.writerow(nb_entities_in_ft_and_ricardo)
    dw.writerow(nb_entities_in_ricardo_not_in_ft)
    dw.writerows(sorted((r for r in RICentities.values()),key =lambda r:-1*(r['nb_flows_as_reporting']+r['nb_flows_as_partner'])))
    return 0
  return 1
github medialab / ricardo_data / update_data_scripts / 20170926_new_sources_format / import_new_sources_format_in_csv_data.py View on Github external
with open('../../csv_data/new_flows.csv','w') as nf:
            flows = csvkit.DictReader(f)
            newFlows = csvkit.DictWriter(nf, flows.fieldnames)
            newFlows.writeheader()
            for flow in flows:
                if flow['source'] in swapSources:
                    flow['source'] = swapSources[flow['source']]
                else :
                    missingSources.add(flow['source'])
                newFlows.writerow(flow)
 

    with open('../../csv_data/exchange_rates.csv', 'r') as f:
        with open('../../csv_data/new_exchange_rates.csv','w') as nf:
            rates = csvkit.DictReader(f)
            newRates = csvkit.DictWriter(nf, rates.fieldnames)
            newRates.writeheader()
            for rate in rates:
                if rate['source'] in swapSources:
                    rate['source'] = swapSources[rate['source']]
                else :
                    missingSources.add(rate['source'])
                newRates.writerow(rate)


    with open('missing_sources.list','w') as ms:
        csvkit.writer(ms).writerows([_] for _ in missingSources)