How to use the unicodecsv.DictReader function in unicodecsv

To help you get started, we’ve selected a few unicodecsv examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github biocommons / hgvs / tests / test_hgvs_variantmapper_gcp.py View on Github external
def gcp_file_reader(fn):
    rdr = csv.DictReader(open(fn, 'r'), delimiter=str('\t'))
    for rec in rdr:
        if rec['id'].startswith('#'):
            continue
        yield rec
github CivicSpleen / ambry / test / bundles / cde.ca.gov / api-combined / bundle.py View on Github external
code_set = self.filesystem.read_yaml(
            self.config.build.codes_file.format(table_name))
            
        except IOError:
            # For the first run, when the field analysis hasn't yet been done. 
            from collections import defaultdict
            datatypes = defaultdict(lambda:{'type':'varchar'})
            code_set = defaultdict(lambda:[])
            
        with self.session as s:
            table = self.schema.add_table('api_{}'.format(group))

            with open(self.filesystem.path('meta',
                        'most_recent_fields_{}.csv'.format(group))) as f:
                reader = csv.DictReader(f)
        
                for row in reader:
                    
                    if row['name'] == 'id':
                        pk = True
                    else:
                        pk = False
                    
                    datatype = datatypes[row['name']]['type']
     
                    c = self.schema.add_column(table, row['name'],
                        description=row['description'],
                        datatype=datatype if not pk else 'integer',
                        is_primary_key = pk,
                        data = {'codes':','.join(code_set[row['name']]) 
                                if row['name'] in code_set else None}
github SeldonIO / seldon-server / python / seldon / shell / import_items_utils.py View on Github external
def doAttrInserts(csv_file, db):
	inserts = defaultdict(list)
	insertNum = 0
	with open(csv_file) as csvFile:
		reader = unicodecsv.DictReader(csvFile,encoding='utf-8')
		for line in reader:
			for field_name in line:
				if field_name == 'id' or field_name== 'name':
					continue
				attr_type = available_attrs[str(field_name)][1]
				inserts[attr_type].append({'attr_name': field_name, 'value': line[field_name], 'id': line['id']})
				if len(inserts[attr_type]) > DB_BATCH_SIZE:
					insertNum+=1
					reallyDoInserts(inserts[attr_type], attr_insert_map[attr_type], insertNum, db)
					del inserts[attr_type]
	for index, insert_label in enumerate(inserts, start=1):
		insertNum+=1
		reallyDoInserts(inserts[insert_label], attr_insert_map[insert_label], insertNum, db)
	db.commit()
	print 'finished attribute inserts'
github openelections / openelections-core / openelex / us / fl / load.py View on Github external
def load(self):
        self._common_kwargs = self._build_common_election_kwargs()
        self._common_kwargs['reporting_level'] = 'precinct'
        # Store result instances for bulk loading
        results = []
        fieldnames = ['county_code', 'county_name', 'election_number', 'election_date', 'election_name', 'precinct_id', 'polling_location', 'registered_voters', 'registered_republicans', 'registered_democrats', 'registered_others', 'contest_name', 'district', 'contest_code', 'candidate', 'party', 'candidate_id', 'doe_candidate_number', 'votes']
        with self._file_handle as tsvfile:
            tsv = [x.replace('\0', '') for x in tsvfile] # remove NULL bytes
            reader = unicodecsv.DictReader(tsv, fieldnames=fieldnames, delimiter='\t')
            for row in reader:
                if self._skip_row(row):
                    continue
                results.append(self._prep_precinct_result(row))
        RawResult.objects.insert(results)
github alephdata / opensanctions / sources / us_bis_denied / parse.py View on Github external
def usbis_parse(csvfile):
    with open(csvfile, 'r') as fh:
        for row in unicodecsv.DictReader(fh, delimiter='\t'):
            parse_row(row)
github openelections / openelections-core / openelex / us / or / load.py View on Github external
def load(self):
        self._common_kwargs = self._build_common_election_kwargs()
        self._common_kwargs['reporting_level'] = 'precinct'
        # Store result instances for bulk loading
        results = []

        with self._file_handle as csvfile:
            reader = unicodecsv.DictReader(csvfile, encoding='latin-1')
            next(reader, None)
            for row in reader:
                if self._skip_row(row):
                    continue
                if row['votes'] == 'X':
                    continue
                rr_kwargs = self._common_kwargs.copy()
                rr_kwargs['primary_party'] = row['party'].strip()
                rr_kwargs.update(self._build_contest_kwargs(row))
                rr_kwargs.update(self._build_candidate_kwargs(row))
                jurisdiction = row['precinct'].strip()
                county_ocd_id = [c for c in self.datasource._jurisdictions() if c['county'].strip().upper() == row['county'].strip().upper()][0]['ocd_id']
                rr_kwargs.update({
                    'party': row['party'].strip(),
                    'jurisdiction': jurisdiction,
                    'parent_jurisdiction': row['county'],
github ecds / voyages / voyages / apps / voyage / management / commands / import_AfricanNames.py View on Github external
insert.append(c)
                    countries[key] = c
                    self.next_country_id += 1
            return c

        def get_place_id(name):
            return get_fuzzy(places, name, 'Port', True)

        def is_blank(s):
            return len(s.strip()) == 0

        updated = []
        missing_voyage_ids = []
        errors = {}
        with open(csv_path) as csvfile:
            reader = unicodecsv.DictReader(csvfile)
            count = 0
            for r in reader:
                pk = int(r['ID'])
                a = names.pop(pk, None) 
                if not a:
                    a = AfricanName()
                    a.slave_id = pk
                updated.append(a)
                # Fill fields of the record.
                count += 1
                try:
                    voyage_id = int(r['Voyage ID'])
                    a.name = r['Name']
                    a.voyage_number = voyage_id
                    a.age = None if is_blank(r['Age']) else int(float(r['Age']))
                    a.height = None if is_blank(r['Height (in)']) else float(r['Height (in)'])
github mitodl / edx2bigquery / edx2bigquery / make_grades_persistent.py View on Github external
def cleanup_rows_from_grade_persistent(csvfn, tempfn, field_to_fix="passed_timestamp"):
    """
    Removes the null values from grades_persistentcoursegrade.csv.gz.
    The function also fixes course ids by changing them from their
    edX URL format to their usual format. For instance,
    course-v1:MITx+STL.162x+2T2017 should be MITx/STL.162x/2T2017.

    This operation permanently modifies the CSV.

    :param csvfn: The path of the csv.gz to be modified
    :param tempfn: The path of the temporary csv.gz
    :type csvfn: str
    :type tempfn: str
    """
    with gzip.open(csvfn, "r") as open_csv:
        csv_dict = csv.DictReader(open_csv)
        with gzip.open(tempfn, "w+") as write_csv_file:
            write_csv = csv.DictWriter(write_csv_file, fieldnames=csv_dict.fieldnames)
            write_csv.writeheader()
            for row in csv_dict:
                row_dict = remove_nulls_from_row(row, field_to_fix)
                row_dict = fix_course_ids(row_dict)
                write_csv.writerow(row_dict)
    os.rename(tempfn, csvfn)
github anki-decks / anki-deck-for-duolingo-chinese / tools / mdbg.py View on Github external
def main(inDefsFileName, outFileName):
  inWordsDict = {}
  inWordsList = []
  outputWords = []

  with open(inDefsFileName, "rb") as inCsvfile:
    inWordDefs = csv.DictReader(inCsvfile)

    print("after CSV reading")

    for inWordDef in inWordDefs:

      if inWordDef["Result"] == "True" :
        outputWords.append(inWordDef)
        continue

      inWord = inWordDef['Chinese']

      rslt = getDef(inWord)

      outputWord = inWordDef
      outputWord['Result'] = False
github the-tale / pynames / pynames / from_tables_generator.py View on Github external
def load_settings(self, settings_source):
        with file_adapter(settings_source) as settings_file:
            reader = unicodecsv.DictReader(settings_file, encoding='utf-8')
            for row in reader:
                new_native_language = row.get('native_language', '').strip()
                if new_native_language and not self.native_language:
                    self.native_language = new_native_language
                elif self.native_language and new_native_language and self.native_language != new_native_language:
                    raise exceptions.WrongCSVData(
                        'Wrong settings csv file. Native language is already set to "%(native_language)s" but new value "%(new_value)s" is present on some row',
                        native_language=self.native_language,
                        new_value=new_native_language
                    )

                new_language = row.get('languages', '').strip()
                if new_language:
                    self.languages.append(new_language)
        self.languages = set(self.languages)