How to use the indra.databases.hgnc_client.get_uniprot_id function in indra

To help you get started, we’ve selected a few indra examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github sorgerlab / indra / models / phase3_eval / read_phosphosite.py View on Github external
def read_phosphosite(fname=phosphosite_file):
    df = pandas.read_csv(fname, index_col=None, sep='\t', encoding='utf8')
    statements = []
    for _, row in df.iterrows():
        sub_upid = row['SUB_ID']
        if not pandas.isnull(sub_upid):
            if sub_upid.find('-') != -1:
                sub_upid = sub_upid.split('-')[0]
            sub_hgnc_symbol = uniprot_client.get_gene_name(sub_upid)
            sub_hgnc = hgnc_client.get_hgnc_id(sub_hgnc_symbol)
        else:
            sub_hgnc_symbol = row['SUB_GENE']
            sub_hgnc_id = hgnc_client.get_hgnc_id(sub_hgnc_symbol)
            sub_upid = hgnc_client.get_uniprot_id(sub_hgnc_id)
            if sub_upid is None:
                continue
        sub = Agent(sub_hgnc_symbol,
                    db_refs={'UP': sub_upid,'HGNC': sub_hgnc})
        residue = row['Actual_site'][0]
        if len(row['Actual_site']) > 1:
            position = row['Actual_site'][1:]
        else:
            position = None

        ps = row['phosphosite']
        kin_upid = row['KIN_ID']
        if not pandas.isnull(kin_upid):
            if kin_upid.find('-') != -1:
                kin_upid = kin_upid.split('-')[0]
            if not uniprot_client.is_human(kin_upid):
github sorgerlab / indra / indra / sources / medscan / processor.py View on Github external
# TODO: support more types of URNs
    if urn_type == 'agi-cas':
        # Identifier is CAS, convert to CHEBI
        chebi_id = get_chebi_id_from_cas(urn_id)
        if chebi_id:
            db_refs['CHEBI'] = chebi_id
            db_name = get_chebi_name_from_id(chebi_id)
    elif urn_type == 'agi-llid':
        # This is an Entrez ID, convert to HGNC
        hgnc_id = get_hgnc_from_entrez(urn_id)
        if hgnc_id is not None:
            db_refs['HGNC'] = hgnc_id

            # Convert the HGNC ID to a Uniprot ID
            uniprot_id = get_uniprot_id(hgnc_id)
            if uniprot_id is not None:
                db_refs['UP'] = uniprot_id

            # Try to lookup HGNC name; if it's available, set it to the
            # agent name
            db_name = get_hgnc_name(hgnc_id)
    elif urn_type in ['agi-meshdis', 'agi-ncimorgan', 'agi-ncimtissue',
                      'agi-ncimcelltype']:
        if urn_id.startswith('C') and urn_id[1:].isdigit():
            # Identifier is probably UMLS
            db_refs['UMLS'] = urn_id
        else:
            # Identifier is MESH
            urn_mesh_name = unquote(urn_id)
            mesh_id, mesh_name = mesh_client.get_mesh_id_name(urn_mesh_name)
            if mesh_id:
github sorgerlab / indra / models / sitemapper / align_isoforms.py View on Github external
def get_genes_to_refseq_ids(problems):
    # First, collect refseq IDs for each gene
    gene_dict = {}
    for row in read_unicode_csv(peptide_file, delimiter='\t', skiprows=1):
        site_id = row[0]
        gene_sym, rem = site_id.split('.', maxsplit=1)
        refseq_id, site_info = rem.split(':')
        if gene_sym not in gene_dict:
            hgnc_id = hgnc_client.get_hgnc_id(gene_sym)
            if not hgnc_id:
                problems.add((refseq_id, 'invalid gene symbol'))
                continue
            up_id_main = hgnc_client.get_uniprot_id(hgnc_id)
            if not up_id_main or ', ' in up_id_main:
                problems.add((refseq_id, 'could not get Uniprot ID from HGNC'))
                continue
            gene_dict[gene_sym] = set([refseq_id])
        else:
            gene_dict[gene_sym].add(refseq_id)
    return gene_dict
github sorgerlab / indra / models / fallahi_eval / process_data.py View on Github external
def agent_from_gene_name(name):
    """Return a grounded Agent based on a gene name."""
    agent = Agent(name)
    hgnc_id = hgnc_client.get_hgnc_id(name)
    uniprot_id = hgnc_client.get_uniprot_id(hgnc_id)
    agent.db_refs = {'HGNC': hgnc_id, 'UP': uniprot_id}
    return agent
github sorgerlab / indra / models / phase3_eval / make_stmts_for_checking.py View on Github external
def get_target_agent(target):
    target_hgnc_id = hgnc_client.get_hgnc_id(target)
    target_up_id = hgnc_client.get_uniprot_id(target_hgnc_id)
    target_agent = Agent(target, db_refs={'HGNC': target_hgnc_id,
                                          'UP': target_up_id})
    return target_agent
github sorgerlab / indra / indra / databases / biogrid_client.py View on Github external
def get_db_refs(egid):
        hgnc_id = hgnc_client.get_hgnc_from_entrez(egid)
        if not hgnc_id:
            logger.info("No HGNC ID for Entrez ID: %s" % egid)
            return (None, {})
        hgnc_name = hgnc_client.get_hgnc_name(hgnc_id)
        if not hgnc_name:
            logger.info("No HGNC name for HGNC ID: %s" % hgnc_id)
            return (None, {})
        up_id = hgnc_client.get_uniprot_id(hgnc_id)
        if not up_id:
            logger.info("No Uniprot ID for EGID / HGNC ID / Symbol "
                        "%s / %s / %s" % (egid, hgnc_id, hgnc_name))
            return (None, {})
        return (hgnc_name, {'HGNC': hgnc_id, 'UP': up_id})
github sorgerlab / indra / indra / sources / bel / rdf_processor.py View on Github external
def _get_agent(concept, entity):
        name = term_from_uri(concept)
        namespace = namespace_from_uri(entity)
        db_refs = {}
        if namespace == 'HGNC':
            agent_name = name
            hgnc_id = hgnc_client.get_hgnc_id(name)
            if hgnc_id is not None:
                db_refs['HGNC'] = str(hgnc_id)
                up_id = hgnc_client.get_uniprot_id(hgnc_id)
                if up_id:
                    db_refs['UP'] = up_id
                else:
                    logger.warning('HGNC entity %s with HGNC ID %s has no '
                                   'corresponding Uniprot ID.' %
                                   (name, hgnc_id))
            else:
                logger.warning("Couldn't get HGNC ID for HGNC symbol %s" %
                               name)
        elif namespace in ('MGI', 'RGD'):
            agent_name = name
            db_refs[namespace] = name
        elif namespace in ('PFH', 'SFAM'):
            indra_name = bel_to_indra.get(name)
            db_refs[namespace] = name
            if indra_name is None: