How to use the unicodecsv.reader function in unicodecsv

To help you get started, we’ve selected a few unicodecsv examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github BarraQDA / nvivotools / editNode.py View on Github external
verbosity, no_comments,
              comments, **dummy):

    try:

        # Read and skip comments at start of CSV file.
        csvcomments = ''
        if infile:
            csvFile = file(infile, 'r')

            while True:
                line = csvFile.readline()
                if line[:1] == '#':
                    csvcomments += line
                else:
                    csvfieldnames = next(unicodecsv.reader([line]))
                    break

        if not no_comments:
            logfilename = outfile.rsplit('.',1)[0] + '.log'
            if os.path.isfile(logfilename):
                incomments = open(logfilename, 'r').read()
            else:
                incomments = ''
            logfile = open(logfilename, 'w')
            logfile.write(comments)
            logfile.write(csvcomments)
            logfile.write(incomments)
            logfile.close()

        norm = NVivoNorm(outfile)
        norm.begin()
github BarraQDA / nvivotools / editTagging.py View on Github external
if tagging:
            exec("\
def evaltagging(sourceRow, csvRow):\n\
    return " + tagging, globals())

        # Read and skip comments at start of CSV file.
        csvcomments = ''
        if infile:
            csvFile = file(infile, 'r')

            while True:
                line = csvFile.readline()
                if line[:1] == '#':
                    csvcomments += line
                else:
                    csvfieldnames = next(unicodecsv.reader([line]))
                    break

        if not no_comments:
            logfilename = outfile.rsplit('.',1)[0] + '.log'
            if os.path.isfile(logfilename):
                incomments = open(logfilename, 'r').read()
            else:
                incomments = ''
            logfile = open(logfilename, 'w')
            logfile.write(comments.encode('utf8'))
            logfile.write(csvcomments)
            logfile.write(incomments)
            logfile.close()

        norm = NVivoNorm(outfile)
        norm.begin()
github neulab / cmu-ner / utils / segnerfts_2.py View on Github external
def load_gaz(gaz_fn):
    template = {'GPE': [], 'LOC': [], 'ORG': [], 'PER': []}
    gaz = {
        'amh': copy.copy(template),
        'eng': copy.copy(template),
        'deu': copy.copy(template),
        'orm': copy.copy(template),
        'som': copy.copy(template),
        'tir': copy.copy(template),
        }
    with open(gaz_fn, 'rb') as f:
        reader = csv.reader(f, encoding='utf-8')
        next(reader)
        for fields in reader:
            eng, lab, tir, tir_ipa, orm, orm_ipa, wik, id_, _ = fields
            if not lab:
                if len(eng.split()) == 1:
                    lab = 'GPE'
            if tir and lab:
                for v in get_variants(tir):
                    gaz['tir'][lab].append(v)
            if orm and lab:
                for v in get_variants(orm):
                    gaz['orm'][lab].append(v)
    return gaz
github Sefaria / Sefaria-Project / scripts / bios / parse_bios_tsv.py View on Github external
17: 'grandchild',
        18: 'childinlaw',
        19: 'student',
        20: 'member',
        21: 'correspondent',
        22: 'opposed',
        23: 'cousin',
    }

    tsv.seek(0)
    next(tsv)
    next(tsv)
    next(tsv)
    next(tsv)
    print "Adding relationships"
    for l in csv.reader(tsv, dialect="excel-tab"):
        key = l[0].encode('ascii', errors='ignore')
        p = Person().load({"key": key})
        for i, type in rowmap.items():
            if l[i]:
                for pkey in l[i].split(","):
                    pkey = pkey.strip().encode('ascii', errors='ignore')
                    print "{} - {}".format(key, pkey)
                    if Person().load({"key": pkey}):
                        pr = PersonRelationship({
                            "type": type,
                            "from_key": key,
                            "to_key": pkey
                        })
                        pr.save()
github SFDO-Tooling / CumulusCI / cumulusci / tasks / bulkdata.py View on Github external
def produce_csv():
            """Iterate over job results and prepare rows for id table"""
            reader = unicodecsv.reader(result_file)
            next(reader)  # skip header
            i = 0
            for row, local_id in zip(reader, local_ids):
                if row[1] == "true":  # Success
                    sf_id = row[0]
                    yield "{},{}\n".format(local_id, sf_id).encode("utf-8")
                else:
                    self.logger.warning("      Error on row {}: {}".format(i, row[3]))
                i += 1
github dmort27 / epitran / epitran / simple.py View on Github external
Args:
            code (str): ISO 639-3 code plus "-" plus ISO 15924 code for the
                        language/script to be loaded
            rev (boolean): True for reversing the table (for reverse transliterating)
        """
        g2p = defaultdict(list)
        gr_by_line = defaultdict(list)
        code += '_rev' if rev else ''
        try:
            path = os.path.join('data', 'map', code + '.csv')
            path = pkg_resources.resource_filename(__name__, path)
        except IndexError:
            raise DatafileError('Add an appropriately-named mapping to the data/maps directory.')
        with open(path, 'rb') as f:
            reader = csv.reader(f, encoding='utf-8')
            orth, phon = next(reader)
            if orth != 'Orth' or phon != 'Phon':
                raise DatafileError('Header is ["{}", "{}"] instead of ["Orth", "Phon"].'.format(orth, phon))
            for (i, fields) in enumerate(reader):
                try:
                    graph, phon = fields
                except ValueError:
                    raise DatafileError('Map file is not well formed at line {}.'.format(i + 2))
                graph = unicodedata.normalize('NFD', graph)
                phon = unicodedata.normalize('NFD', phon)
                g2p[graph].append(phon)
                gr_by_line[graph].append(i)
        if self._one_to_many_gr_by_line_map(g2p):
            graph, lines = self._one_to_many_gr_by_line_map(gr_by_line)
            lines = [l + 2 for l in lines]
            raise MappingError('One-to-many G2P mapping for "{}" on lines {}'.format(graph, ', '.join(map(str, lines))).encode('utf-8'))
github XLSForm / pyxform / pyxform / xls2json_backends.py View on Github external
if len(row) == 0:
            return None, None
        elif len(row) == 1:
            return row[0], None
        else:
            s_or_c = row[0]
            content = row[1:]
            if s_or_c == "":
                s_or_c = None
            # concatenate all the strings in content
            if reduce(lambda x, y: x + y, content) == "":
                # content is a list of empty strings
                content = None
            return s_or_c, content

    reader = csv.reader(csv_data, encoding="utf-8")
    sheet_name = None
    current_headers = None
    for row in reader:
        survey_or_choices, content = first_column_as_sheet_name(row)
        if survey_or_choices is not None:
            sheet_name = survey_or_choices
            if sheet_name not in _dict:
                _dict[unicode(sheet_name)] = []
            current_headers = None
        if content is not None:
            if current_headers is None:
                current_headers = content
                _dict["%s_header" % sheet_name] = _list_to_dict_list(current_headers)
            else:
                _d = OrderedDict()
                for key, val in zip(current_headers, content):
github SenorPez / project-cars-replay-enhancer / replayenhancer / Configuration.py View on Github external
def __get_participants(self, source_telemetry, telemetry_file):
        try:
            tele_file = open(source_telemetry+telemetry_file, 'rb')
        except FileNotFoundError:
            self.__process_telemetry(source_telemetry, telemetry_file)
            tele_file = open(source_telemetry+telemetry_file, 'rb')
        finally:
            index = 0
            with open(
                source_telemetry+telemetry_file,
                'rb') as csv_file:
                csvdata2 = csv.reader(csv_file, encoding='utf-8')
                for row in csvdata2:
                    index += 1
            number_lines = index+1
            csvdata = csv.reader(tele_file, encoding='utf-8')

        new_data = list()
        participants = 0
        self.participant_configurations = list()

        with tqdm(desc="Analyzing telemetry", total=number_lines) \
                as progress_bar:
            for row in csvdata:
                if len(row) == 687 and int(row[4]) != -1:
                    participants = int(row[4])
                if len(row) == 687:
                    pass
github dmort27 / panphon / panphon / bin / align_wordlists.py View on Github external
def main(wordlist1, wordlist2, dist_funcs):
    with open(wordlist1, 'rb') as file_a, open(wordlist2, 'rb') as file_b:
        reader_a = csv.reader(file_a, encoding='utf-8')
        reader_b = csv.reader(file_b, encoding='utf-8')
        print('Reading word lists...')
        words = zip([(w, g) for (g, w) in reader_a],
                    [(w, g) for (g, w) in reader_b])
        words_a, words_b = zip(*[(a, b) for (a, b) in words if a and b])
        print('Constructing cost matrix...')
        matrix = construct_cost_matrix(words_a, words_b, dist_funcs)
        m = munkres.Munkres()
        print('Computing matrix using Hungarian Algorithm...')
        indices = m.compute(matrix)
        print(score(indices))
        print('Done.')
github freedomofkeima / messenger-maid-chan / maidchan / japanese.py View on Github external
def get_kanji(level, current_pos=1):
    """
    get_kanji returns a single record of the current_pos line position

    level: 1 - 4 (N1 to N4)
    current_pos: up to number of records
    """
    kanji = {}
    with open(KANJI_FILENAMES[level], 'rb') as fobj:
        reader = csv.reader(fobj, delimiter=',', encoding='utf-8')
        num_of_lines = 0
        for line in reader:
            num_of_lines += 1
            if num_of_lines == current_pos:
                kanji = dict(zip(KANJI_FIELDS, line))
                break
    # Convert to UTF-8
    for key, value in kanji.iteritems():
        kanji[key] = value.encode("utf-8")
    return kanji