How to use the indra.databases.uniprot_client.get_gene_name function in indra

To help you get started, we’ve selected a few indra examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github sorgerlab / indra / models / phase3_eval / process_data.py View on Github external
def get_drug_targets(fname=None):
    if not fname:
        fname = drug_grounding_file
    df = pandas.read_csv(fname, index_col=None, header=None, encoding='utf-8')
    abbrevs = df[1]
    target_upids = df[6]
    targets = {}
    for abb, tupid in zip(abbrevs, target_upids):
        targets[abb] = [uniprot_client.get_gene_name(ui)
                        for ui in tupid.split(',')]
    return targets
github sorgerlab / indra / indra / assemblers / index_card / assembler.py View on Github external
participant['entity_type'] = 'chemical'
    elif pfam_def_ids:
        participant['entity_type'] = 'protein_family'
        participant['entities'] = []
        pfam_def_list = []
        for p in pfam_def_ids.split('|'):
            dbname, dbid = p.split(':')
            pfam_def_list.append({dbname: dbid})
        for pdi in pfam_def_list:
            # TODO: handle non-uniprot protein IDs here
            uniprot_id = pdi.get('UP')
            if uniprot_id:
                entity_dict = {}
                uniprot_mnemonic = \
                    str(uniprot_client.get_mnemonic(uniprot_id))
                gene_name = uniprot_client.get_gene_name(uniprot_id)
                if gene_name is None:
                    gene_name = ""
                entity_dict['entity_text'] = [gene_name]
                entity_dict['identifier'] = 'UNIPROT:%s' % uniprot_mnemonic
                entity_dict['entity_type'] = 'protein'
                participant['entities'].append(entity_dict)
    else:
        participant['identifier'] = ''
        participant['entity_type'] = 'protein'

    features = []
    not_features = []
    # Binding features
    for bc in agent.bound_conditions:
        feature = {
            'feature_type': 'binding_feature',
github sorgerlab / indra / models / phase3_eval / read_phosphosite.py View on Github external
def read_phosphosite(fname=phosphosite_file):
    df = pandas.read_csv(fname, index_col=None, sep='\t', encoding='utf8')
    statements = []
    for _, row in df.iterrows():
        sub_upid = row['SUB_ID']
        if not pandas.isnull(sub_upid):
            if sub_upid.find('-') != -1:
                sub_upid = sub_upid.split('-')[0]
            sub_hgnc_symbol = uniprot_client.get_gene_name(sub_upid)
            sub_hgnc = hgnc_client.get_hgnc_id(sub_hgnc_symbol)
        else:
            sub_hgnc_symbol = row['SUB_GENE']
            sub_hgnc_id = hgnc_client.get_hgnc_id(sub_hgnc_symbol)
            sub_upid = hgnc_client.get_uniprot_id(sub_hgnc_id)
            if sub_upid is None:
                continue
        sub = Agent(sub_hgnc_symbol,
                    db_refs={'UP': sub_upid,'HGNC': sub_hgnc})
        residue = row['Actual_site'][0]
        if len(row['Actual_site']) > 1:
            position = row['Actual_site'][1:]
        else:
            position = None

        ps = row['phosphosite']
github sorgerlab / indra / indra / sources / sparser / processor.py View on Github external
gene_name = hgnc_client.get_hgnc_name(hgnc_id)
        if gene_name:
            agent.name = gene_name
        if not up_id:
            up_id = hgnc_client.get_uniprot_id(hgnc_id)
            if up_id:
                if ', ' in up_id:
                    up_id = up_id.split(', ')[0]
                agent.db_refs['UP'] = up_id
    elif up_id:
        hgnc_id = uniprot_client.get_hgnc_id(up_id)
        if hgnc_id:
            agent.db_refs['HGNC'] = hgnc_id
            agent.name = hgnc_client.get_hgnc_name(hgnc_id)
        else:
            gene_name = uniprot_client.get_gene_name(up_id, web_fallback=False)
            if gene_name:
                agent.name = gene_name
            # If it doesn't have a gene name, it's better to just
            # use the raw string name otherwise Sparser sets
            # has Uniprot IDs or mnemonics as the name
            else:
                name = agent.db_refs.get('TEXT', agent.name)
                agent.name = name
github sorgerlab / indra / indra / sources / virhostnet / processor.py View on Github external
up_id, feat_id = db_id.split('-')
            # Assume it's a feature ID
            assert feat_id.startswith('PRO'), feat_id
            db_refs = {'UP': up_id, 'UPPRO': feat_id}
        else:
            db_refs = {'UP': db_id}
    elif db_ns == 'refseq':
        db_refs = {'REFSEQ_PROT': db_id}
    else:
        db_refs = {'GENBANK': db_id}
    agent = Agent(db_id, db_refs=db_refs)
    standardized = standardize_agent_name(agent)
    if up_web_fallback:
        # Handle special case of unreviewed UP entries
        if not standardized and 'UP' in db_refs:
            name = uniprot_client.get_gene_name(db_refs['UP'],
                                                web_fallback=True)
            if name:
                agent.name = name
    return agent
github sorgerlab / indra / models / phase3_eval / process_sparser.py View on Github external
for ev in stmt.evidence:
            if ev.pmid and ev.pmid.startswith('PMID'):
                ev.pmid = ev.pmid[:-4]
        # Skip if no subject
        if isinstance(stmt, RegulateActivity):
            if stmt.subj is None:
                continue
        # Skip if no locations
        if isinstance(stmt, Translocation):
            if not (stmt.from_location or stmt.to_location):
                continue
        for agent in stmt.agent_list():
            if agent is not None:
                upid = agent.db_refs.get('UP')
                if upid:
                    gene_name = uniprot_client.get_gene_name(upid)
                    if gene_name:
                        agent.name = gene_name
                        if uniprot_client.is_human(upid):
                            hgnc_id = hgnc_client.get_hgnc_id(gene_name)
                            if hgnc_id:
                                agent.db_refs['HGNC'] = hgnc_id

        new_stmts.append(stmt)
    return new_stmts
github sorgerlab / indra / indra / sources / signor / processor.py View on Github external
# condition
            agent = agents[0]
            agent.bound_conditions = \
                    [BoundCondition(a, True) for a in agents[1:]]
            return agent
        else:
            gnd_type = _type_db_map[(ent_type, database)]
            if gnd_type == 'UP':
                up_id = id
                db_refs = {'UP': up_id}
                hgnc_id = uniprot_client.get_hgnc_id(up_id)
                if hgnc_id:
                    db_refs['HGNC'] = hgnc_id
                    name = hgnc_client.get_hgnc_name(hgnc_id)
                else:
                    name = uniprot_client.get_gene_name(up_id)
            # Map SIGNOR protein families to FamPlex families
            elif ent_type == 'proteinfamily':
                db_refs = {database: id} # Keep the SIGNOR family ID in db_refs
                key = (database, id)
                # Use SIGNOR name unless we have a mapping in FamPlex
                name = ent_name
                famplex_id = famplex_map.get(key)
                if famplex_id is None:
                    logger.info('Could not find %s in FamPlex map' %
                                str(key))
                else:
                    db_refs['FPLX'] = famplex_id
                    name = famplex_id
            # Other possible groundings are PUBCHEM, SIGNOR, etc.
            elif gnd_type is not None:
                if database not in ('PUBCHEM', 'SIGNOR', 'ChEBI', 'miRBase',
github sorgerlab / indra / indra / sources / signor / processor.py View on Github external
def _get_complex_agents(self, complex_id):
        """Returns a list of agents corresponding to each of the constituents
        in a SIGNOR complex."""
        agents = []
        components = self._recursively_lookup_complex(complex_id)

        for c in components:
            db_refs = {}
            name = uniprot_client.get_gene_name(c)
            if name is None:
                db_refs['SIGNOR'] = c
            else:
                db_refs['UP'] = c
                hgnc_id = uniprot_client.get_hgnc_id(c)
                if hgnc_id:
                    name = hgnc_client.get_hgnc_name(hgnc_id)
                    db_refs['HGNC'] = hgnc_id

            famplex_key = ('SIGNOR', c)
            if famplex_key in famplex_map:
                db_refs['FPLX'] = famplex_map[famplex_key]
                if not name:
                    name = db_refs['FPLX']  # Set agent name to Famplex name if
                                            # the Uniprot name is not available
            elif not name:
github sorgerlab / indra / indra / preassembler / grounding_mapper / analysis.py View on Github external
# If 'UP' (Uniprot) not one of the grounding entries for this text,
        # then we skip it.
        if 'UP' not in [entry[0] for entry in grounding_list]:
            continue
        # Otherwise, collect all the Uniprot IDs for this protein.
        uniprot_ids = [entry[1] for entry in grounding_list
                       if entry[0] == 'UP']
        # For each Uniprot ID, look up the species
        for uniprot_id in uniprot_ids:
            # If it's not a human protein, skip it
            mnemonic = uniprot_client.get_mnemonic(uniprot_id)
            if mnemonic is None or not mnemonic.endswith('_HUMAN'):
                continue
            # Otherwise, look up the gene name in HGNC and match against the
            # agent text
            gene_name = uniprot_client.get_gene_name(uniprot_id)
            if gene_name is None:
                unmatched += 1
                continue
            if agent_text.upper() == gene_name.upper():
                matched += 1
                protein_map[agent_text] = {'TEXT': agent_text,
                                           'UP': uniprot_id}
            else:
                unmatched += 1
    logger.info('Exact matches for %d proteins' % matched)
    logger.info('No match (or no gene name) for %d proteins' % unmatched)
    return protein_map