How to use the indra.util.read_unicode_csv function in indra

To help you get started, we’ve selected a few indra examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github sorgerlab / indra / indra / databases / uniprot_client.py View on Github external
def _build_uniprot_entries():
    up_entries_file = os.path.dirname(os.path.abspath(__file__)) + \
        '/../resources/uniprot_entries.tsv'
    uniprot_gene_name = {}
    uniprot_mnemonic = {}
    uniprot_mnemonic_reverse = {}
    uniprot_mgi = {}
    uniprot_rgd = {}
    uniprot_mgi_reverse = {}
    uniprot_rgd_reverse = {}
    try:
        csv_rows = read_unicode_csv(up_entries_file, delimiter='\t')
        # Skip the header row
        next(csv_rows)
        for row in csv_rows:
            up_id, gene_name, up_mnemonic, rgd, mgi = row
            uniprot_gene_name[up_id] = gene_name
            uniprot_mnemonic[up_id] = up_mnemonic
            uniprot_mnemonic_reverse[up_mnemonic] = up_id
            if mgi:
                mgi_ids = mgi.split(';')
                if mgi_ids:
                    uniprot_mgi[up_id] = mgi_ids[0]
                    uniprot_mgi_reverse[mgi_ids[0]] = up_id
            if rgd:
                rgd_ids = rgd.split(';')
                if rgd_ids:
                    uniprot_rgd[up_id] = rgd_ids[0]
github sorgerlab / indra / indra / databases / mesh_client.py View on Github external
def _load_db_mappings(path):
    mesh_to_db = {}
    db_to_mesh = {}
    to_db_ambigs = set()
    db_to_ambigs = set()
    for _, mesh_id, _, db_ns, db_id, _ in \
            read_unicode_csv(path, delimiter='\t'):
        # Make sure we don't add any one-to-many mappings
        if mesh_id in mesh_to_db:
            to_db_ambigs.add(mesh_id)
            mesh_to_db.pop(mesh_id, None)
        elif mesh_id not in to_db_ambigs:
            mesh_to_db[mesh_id] = (db_ns, db_id)
        # Make sure we don't add any one-to-many reverse mappings
        if (db_ns, db_id) in db_to_mesh:
            db_to_ambigs.add((db_ns, db_id))
            db_to_mesh.pop((db_ns, db_id), None)
        elif (db_ns, db_id) not in db_to_ambigs:
            db_to_mesh[(db_ns, db_id)] = mesh_id
    return mesh_to_db, db_to_mesh
github sorgerlab / indra / indra / databases / uniprot_client.py View on Github external
def _build_uniprot_subcell_loc():
    fname = os.path.dirname(os.path.abspath(__file__)) +\
                '/../resources/uniprot_subcell_loc.tsv'
    try:
        csv_rows = read_unicode_csv(fname, delimiter='\t')
        # Skip the header row
        next(csv_rows)
        subcell_loc = {}
        for row in csv_rows:
            loc_id = row[0]
            loc_alias = row[3]
            subcell_loc[loc_id] = loc_alias
    except IOError:
        subcell_loc = {}
    return subcell_loc
github sorgerlab / indra / indra / databases / hgnc_client.py View on Github external
def _read_tfs():
    fname = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir,
                         'resources', 'transcription_factors.csv')
    tf_table = read_unicode_csv(fname)
    gene_names = [lin[1] for lin in list(tf_table)[1:]]
    return gene_names
github sorgerlab / indra / indra / resources / update_resources.py View on Github external
if len(pc_ids) > 1:
            ck = chebi_client.get_inchi_key(chebi_id)
            for pc_id in pc_ids:
                pk = pubchem_client.get_inchi_key(pc_id)
                if ck == pk:
                    ik_matches.add((chebi_id, pc_id))
    # Looking for InChIKey matches for duplicates in the PubChem -> ChEBI
    # direction
    for pc_id, chebi_ids in pubchem_chebi.items():
        if len(chebi_ids) > 1:
            pk = pubchem_client.get_inchi_key(pc_id)
            for chebi_id in chebi_ids:
                ck = chebi_client.get_inchi_key(chebi_id)
                if ck == pk:
                    ik_matches.add((chebi_id, pc_id))
    rows = read_unicode_csv(fname, '\t')
    header = next(rows)
    header.append('IK_MATCH')
    new_rows = [header]
    for chebi_id, pc_id in rows:
        if (chebi_id, pc_id) in ik_matches:
            new_rows.append([chebi_id, pc_id, 'Y'])
        else:
            new_rows.append([chebi_id, pc_id, ''])
    write_unicode_csv(fname, new_rows, '\t')

    # Save ChEMBL mapping
    fname = os.path.join(path, 'chebi_to_chembl.tsv')
    logger.info('Saving into %s' % fname)
    df_chembl = df[df['REFERENCE_DB_NAME']=='ChEMBL']
    df_chembl.sort_values(['COMPOUND_ID', 'REFERENCE_ID'], ascending=True,
                          inplace=True)
github sorgerlab / indra / indra / sources / reach / processor.py View on Github external
def _read_famplex_map():
    fname = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         '../../resources/famplex_map.tsv')
    famplex_map = {}
    csv_rows = read_unicode_csv(fname, delimiter='\t')
    for row in csv_rows:
        source_ns = row[0]
        source_id = row[1]
        be_id = row[2]
        famplex_map[(source_ns, source_id)] = be_id
    return famplex_map
github sorgerlab / indra / indra / preassembler / grounding_mapper / mapper.py View on Github external
Path to csv file containing grounding map information. Rows of the file
        should be of the form ,,,...
        ,
    lineterminator : Optional[str]
        Line terminator used in input csv file. Default: \r\n
    hgnc_symbols : Optional[bool]
        Set to True if the grounding map file contains HGNC symbols rather than
        IDs. In this case, the entries are replaced by IDs. Default: True

    Returns
    -------
    g_map : dict
        The grounding map constructed from the given files.
    """
    gmap = {}
    map_rows = read_unicode_csv(grounding_map_path, delimiter=',',
                                quotechar='"',
                                quoting=csv.QUOTE_MINIMAL,
                                lineterminator=lineterminator)
    for row in map_rows:
        txt = row[0]
        keys = [entry for entry in row[1::2] if entry]
        values = [entry for entry in row[2::2] if entry]
        if not keys or not values:
            logger.warning('Missing grounding entries for %s, skipping.' % txt)
            continue
        if len(keys) != len(values):
            logger.warning('Mismatched keys and values in row %s, skipping.' %
                           str(row))
            continue
        gmap[txt] = dict(zip(keys, values))
    if hgnc_symbols:
github sorgerlab / indra / indra / databases / hgnc_client.py View on Github external
def _read_hgnc_maps():
    hgnc_file = os.path.dirname(os.path.abspath(__file__)) + \
                '/../resources/hgnc_entries.tsv'
    csv_rows = read_unicode_csv(hgnc_file, delimiter='\t', encoding='utf-8')
    hgnc_names = {}
    hgnc_ids = {}
    hgnc_withdrawn = []
    uniprot_ids = {}
    entrez_ids = {}
    entrez_ids_reverse = {}
    mouse_map = {}
    rat_map = {}
    prev_sym_map = {}
    ensembl_ids = {}
    ensembl_ids_reverse = {}
    hgnc_withdrawn_new_ids = {}
    # Skip the header
    next(csv_rows)
    for row in csv_rows:
        hgnc_id = row[0][5:]
github sorgerlab / indra / indra / databases / chebi_client.py View on Github external
def _read_resource_csv(fname):
    file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             os.pardir, 'resources', fname)
    csv_reader = read_unicode_csv(file_path, delimiter='\t')
    return csv_reader