Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
verbosity, no_comments,
comments, **dummy):
try:
# Read and skip comments at start of CSV file.
csvcomments = ''
if infile:
csvFile = file(infile, 'r')
while True:
line = csvFile.readline()
if line[:1] == '#':
csvcomments += line
else:
csvfieldnames = next(unicodecsv.reader([line]))
break
if not no_comments:
logfilename = outfile.rsplit('.',1)[0] + '.log'
if os.path.isfile(logfilename):
incomments = open(logfilename, 'r').read()
else:
incomments = ''
logfile = open(logfilename, 'w')
logfile.write(comments)
logfile.write(csvcomments)
logfile.write(incomments)
logfile.close()
norm = NVivoNorm(outfile)
norm.begin()
if tagging:
exec("\
def evaltagging(sourceRow, csvRow):\n\
return " + tagging, globals())
# Read and skip comments at start of CSV file.
csvcomments = ''
if infile:
csvFile = file(infile, 'r')
while True:
line = csvFile.readline()
if line[:1] == '#':
csvcomments += line
else:
csvfieldnames = next(unicodecsv.reader([line]))
break
if not no_comments:
logfilename = outfile.rsplit('.',1)[0] + '.log'
if os.path.isfile(logfilename):
incomments = open(logfilename, 'r').read()
else:
incomments = ''
logfile = open(logfilename, 'w')
logfile.write(comments.encode('utf8'))
logfile.write(csvcomments)
logfile.write(incomments)
logfile.close()
norm = NVivoNorm(outfile)
norm.begin()
def load_gaz(gaz_fn):
template = {'GPE': [], 'LOC': [], 'ORG': [], 'PER': []}
gaz = {
'amh': copy.copy(template),
'eng': copy.copy(template),
'deu': copy.copy(template),
'orm': copy.copy(template),
'som': copy.copy(template),
'tir': copy.copy(template),
}
with open(gaz_fn, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
next(reader)
for fields in reader:
eng, lab, tir, tir_ipa, orm, orm_ipa, wik, id_, _ = fields
if not lab:
if len(eng.split()) == 1:
lab = 'GPE'
if tir and lab:
for v in get_variants(tir):
gaz['tir'][lab].append(v)
if orm and lab:
for v in get_variants(orm):
gaz['orm'][lab].append(v)
return gaz
17: 'grandchild',
18: 'childinlaw',
19: 'student',
20: 'member',
21: 'correspondent',
22: 'opposed',
23: 'cousin',
}
tsv.seek(0)
next(tsv)
next(tsv)
next(tsv)
next(tsv)
print "Adding relationships"
for l in csv.reader(tsv, dialect="excel-tab"):
key = l[0].encode('ascii', errors='ignore')
p = Person().load({"key": key})
for i, type in rowmap.items():
if l[i]:
for pkey in l[i].split(","):
pkey = pkey.strip().encode('ascii', errors='ignore')
print "{} - {}".format(key, pkey)
if Person().load({"key": pkey}):
pr = PersonRelationship({
"type": type,
"from_key": key,
"to_key": pkey
})
pr.save()
def produce_csv():
"""Iterate over job results and prepare rows for id table"""
reader = unicodecsv.reader(result_file)
next(reader) # skip header
i = 0
for row, local_id in zip(reader, local_ids):
if row[1] == "true": # Success
sf_id = row[0]
yield "{},{}\n".format(local_id, sf_id).encode("utf-8")
else:
self.logger.warning(" Error on row {}: {}".format(i, row[3]))
i += 1
Args:
code (str): ISO 639-3 code plus "-" plus ISO 15924 code for the
language/script to be loaded
rev (boolean): True for reversing the table (for reverse transliterating)
"""
g2p = defaultdict(list)
gr_by_line = defaultdict(list)
code += '_rev' if rev else ''
try:
path = os.path.join('data', 'map', code + '.csv')
path = pkg_resources.resource_filename(__name__, path)
except IndexError:
raise DatafileError('Add an appropriately-named mapping to the data/maps directory.')
with open(path, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
orth, phon = next(reader)
if orth != 'Orth' or phon != 'Phon':
raise DatafileError('Header is ["{}", "{}"] instead of ["Orth", "Phon"].'.format(orth, phon))
for (i, fields) in enumerate(reader):
try:
graph, phon = fields
except ValueError:
raise DatafileError('Map file is not well formed at line {}.'.format(i + 2))
graph = unicodedata.normalize('NFD', graph)
phon = unicodedata.normalize('NFD', phon)
g2p[graph].append(phon)
gr_by_line[graph].append(i)
if self._one_to_many_gr_by_line_map(g2p):
graph, lines = self._one_to_many_gr_by_line_map(gr_by_line)
lines = [l + 2 for l in lines]
raise MappingError('One-to-many G2P mapping for "{}" on lines {}'.format(graph, ', '.join(map(str, lines))).encode('utf-8'))
if len(row) == 0:
return None, None
elif len(row) == 1:
return row[0], None
else:
s_or_c = row[0]
content = row[1:]
if s_or_c == "":
s_or_c = None
# concatenate all the strings in content
if reduce(lambda x, y: x + y, content) == "":
# content is a list of empty strings
content = None
return s_or_c, content
reader = csv.reader(csv_data, encoding="utf-8")
sheet_name = None
current_headers = None
for row in reader:
survey_or_choices, content = first_column_as_sheet_name(row)
if survey_or_choices is not None:
sheet_name = survey_or_choices
if sheet_name not in _dict:
_dict[unicode(sheet_name)] = []
current_headers = None
if content is not None:
if current_headers is None:
current_headers = content
_dict["%s_header" % sheet_name] = _list_to_dict_list(current_headers)
else:
_d = OrderedDict()
for key, val in zip(current_headers, content):
def __get_participants(self, source_telemetry, telemetry_file):
try:
tele_file = open(source_telemetry+telemetry_file, 'rb')
except FileNotFoundError:
self.__process_telemetry(source_telemetry, telemetry_file)
tele_file = open(source_telemetry+telemetry_file, 'rb')
finally:
index = 0
with open(
source_telemetry+telemetry_file,
'rb') as csv_file:
csvdata2 = csv.reader(csv_file, encoding='utf-8')
for row in csvdata2:
index += 1
number_lines = index+1
csvdata = csv.reader(tele_file, encoding='utf-8')
new_data = list()
participants = 0
self.participant_configurations = list()
with tqdm(desc="Analyzing telemetry", total=number_lines) \
as progress_bar:
for row in csvdata:
if len(row) == 687 and int(row[4]) != -1:
participants = int(row[4])
if len(row) == 687:
pass
def main(wordlist1, wordlist2, dist_funcs):
with open(wordlist1, 'rb') as file_a, open(wordlist2, 'rb') as file_b:
reader_a = csv.reader(file_a, encoding='utf-8')
reader_b = csv.reader(file_b, encoding='utf-8')
print('Reading word lists...')
words = zip([(w, g) for (g, w) in reader_a],
[(w, g) for (g, w) in reader_b])
words_a, words_b = zip(*[(a, b) for (a, b) in words if a and b])
print('Constructing cost matrix...')
matrix = construct_cost_matrix(words_a, words_b, dist_funcs)
m = munkres.Munkres()
print('Computing matrix using Hungarian Algorithm...')
indices = m.compute(matrix)
print(score(indices))
print('Done.')
def get_kanji(level, current_pos=1):
"""
get_kanji returns a single record of the current_pos line position
level: 1 - 4 (N1 to N4)
current_pos: up to number of records
"""
kanji = {}
with open(KANJI_FILENAMES[level], 'rb') as fobj:
reader = csv.reader(fobj, delimiter=',', encoding='utf-8')
num_of_lines = 0
for line in reader:
num_of_lines += 1
if num_of_lines == current_pos:
kanji = dict(zip(KANJI_FIELDS, line))
break
# Convert to UTF-8
for key, value in kanji.iteritems():
kanji[key] = value.encode("utf-8")
return kanji