How to use the csvkit.DictReader function in csvkit

To help you get started, we’ve selected a few csvkit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wireservice / csvkit / tests / test_py2.py View on Github external
def test_reader_alias(self):
        reader = csvkit.DictReader(self.f)

        self.assertEqual(next(reader), {
            u'a': u'1',
            u'b': u'2',
            u'c': u'3'
        })
github waldoj / frostline / frostline.py View on Github external
cursor.execute("CREATE TABLE zip(zipcode TEXT PRIMARY KEY NOT NULL, "
            + "zone TEXT, temperatures TEXT, city TEXT, state TEXT, latitude INTEGER, longitude INTEGER)")
        db.commit()

        # Import the CSV file into the database
        with open('zipcodes.csv','rb') as zips:
            dr = csvkit.DictReader(zips)
            to_db = [(i['zipcode'], i['city'], i['state'], i['latitude'], i['longitude']) for i in dr]
        cursor.executemany("INSERT INTO zip (zipcode, city, state, latitude, longitude) VALUES (?, ?, ?, ?, ?);", to_db)
        db.commit()

    # Now load our climate data.
    zone_files = [1, 2, 3, 4]
    for zone_file in zone_files:
        with open(str(zone_file) + '.csv','rb') as zips:
            dr = csvkit.DictReader(zips)
            to_db = [(i['zone'], i['trange'], i['zipcode']) for i in dr]
        cursor.executemany("UPDATE zip SET zone=?, temperatures=? WHERE zipcode=?;", to_db)
        db.commit()
        os.remove(str(zone_file) + '.csv')

    # Close our database connection.
    db.close()
github medialab / ricardo_data / update_data_scripts / 20170926_new_sources_format / import_new_sources_format_in_csv_data.py View on Github external
import csvkit
import itertools
import re

# utilities
nonLetters = re.compile(r'\W', re.UNICODE)

def slugify(source):
    slug = lambda s : ''.join([re.sub(nonLetters,'',w).capitalize() for w in s.split(' ')])
    fields = ['author','name', 'country', 'volume_date', 'volume_number', 'pages']
    return '_'.join(slug(source[f]) for f in fields if source[f] and slug(source[f]))


# read 'new_sources.csv'
with open('new_sources.csv', 'r') as f:
    new_sources = list(csvkit.DictReader(f))
    
    swapSources = {}
    toDeleteSourcesSlugs = []

    # refaire tourner les slugs
    sources = []

    for source in new_sources:

        source['new_slug'] = slugify(source)
        # create swap source slug dictionnary to update flow and currency later based on to be removed column
        swapSources[source['slug']] = slugify(source)
        # remove uneeded lines 
        if source['put x to remove'] == '':
            sources.append(source)
github california-civic-data-coalition / django-calaccess-raw-data / calaccess_raw / annotations / __init__.py View on Github external
def load_forms():
    """
    Load all the FilingForm objects from the source CSV.
    """
    this_dir = os.path.dirname(__file__)

    # Read in forms
    form_path = os.path.join(this_dir, 'forms.csv')
    with open(form_path, 'r') as form_obj:
        form_reader = csvkit.DictReader(form_obj)
        form_list = [FilingForm(**row) for row in form_reader]

    # Read in sections
    section_path = os.path.join(this_dir, 'sections.csv')
    with open(section_path, 'r') as section_obj:
        section_reader = csvkit.DictReader(section_obj)
        for section in section_reader:
            form = next((x for x in form_list if x.id == section['form_id']))
            form.add_section(**section)

    # Pass it out
    return form_list
github medialab / ricardo_data / update_data_scripts / 20170926_new_sources_format / import_new_sources_format_in_csv_data.py View on Github external
# output the new sources file
    with open('sources.csv', 'w') as of:
        output = csvkit.DictWriter(of, sources[0].keys())
        output.writeheader()
        output.writerows(sources)
    

# delete source_types.csv (by hand through git)
    # patch flows and exchange_rates through csvkit directly on csv
    # check for missing sources on the way
    missingSources = set()
    with open('../../csv_data/flows.csv', 'r') as f:
        with open('../../csv_data/new_flows.csv','w') as nf:
            flows = csvkit.DictReader(f)
            newFlows = csvkit.DictWriter(nf, flows.fieldnames)
            newFlows.writeheader()
            for flow in flows:
                if flow['source'] in swapSources:
                    flow['source'] = swapSources[flow['source']]
                else :
                    missingSources.add(flow['source'])
                newFlows.writerow(flow)
 

    with open('../../csv_data/exchange_rates.csv', 'r') as f:
        with open('../../csv_data/new_exchange_rates.csv','w') as nf:
            rates = csvkit.DictReader(f)
            newRates = csvkit.DictWriter(nf, rates.fieldnames)
            newRates.writeheader()
            for rate in rates:
github california-civic-data-coalition / django-calaccess-raw-data / calaccess_raw / annotations / __init__.py View on Github external
def load_forms():
    """
    Load all the FilingForm objects from the source CSV.
    """
    this_dir = os.path.dirname(__file__)

    # Read in forms
    form_path = os.path.join(this_dir, 'forms.csv')
    with open(form_path, 'r') as form_obj:
        form_reader = csvkit.DictReader(form_obj)
        form_list = [FilingForm(**row) for row in form_reader]

    # Read in sections
    section_path = os.path.join(this_dir, 'sections.csv')
    with open(section_path, 'r') as section_obj:
        section_reader = csvkit.DictReader(section_obj)
        for section in section_reader:
            form = next((x for x in form_list if x.id == section['form_id']))
            form.add_section(**section)

    # Pass it out
    return form_list
github medialab / ricardo_data / update_data_scripts / 20170926_new_sources_format / import_new_sources_format_in_csv_data.py View on Github external
with open('../../csv_data/flows.csv', 'r') as f:
        with open('../../csv_data/new_flows.csv','w') as nf:
            flows = csvkit.DictReader(f)
            newFlows = csvkit.DictWriter(nf, flows.fieldnames)
            newFlows.writeheader()
            for flow in flows:
                if flow['source'] in swapSources:
                    flow['source'] = swapSources[flow['source']]
                else :
                    missingSources.add(flow['source'])
                newFlows.writerow(flow)
 

    with open('../../csv_data/exchange_rates.csv', 'r') as f:
        with open('../../csv_data/new_exchange_rates.csv','w') as nf:
            rates = csvkit.DictReader(f)
            newRates = csvkit.DictWriter(nf, rates.fieldnames)
            newRates.writeheader()
            for rate in rates:
                if rate['source'] in swapSources:
                    rate['source'] = swapSources[rate['source']]
                else :
                    missingSources.add(rate['source'])
                newRates.writerow(rate)


    with open('missing_sources.list','w') as ms:
        csvkit.writer(ms).writerows([_] for _ in missingSources)