How to use the indra.databases.hgnc_client function in indra

To help you get started, we’ve selected a few indra examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github sorgerlab / indra / indra / sources / sparser / processor.py View on Github external
# db_refs['UP'] is an empty string, and there is no other grounding.
    # In this case, we remove the empty UP grounding and reset the name to the
    # agent text.
    if not be_id and not hgnc_id and up_id == '':
        agent.name = agent.db_refs.get('TEXT', agent.name)
        agent.db_refs.pop('UP')
    # FPLX takes precedence if we have it
    elif be_id:
        agent.db_refs['FPLX'] = be_id
        agent.name = be_id
    elif hgnc_id:
        gene_name = hgnc_client.get_hgnc_name(hgnc_id)
        if gene_name:
            agent.name = gene_name
        if not up_id:
            up_id = hgnc_client.get_uniprot_id(hgnc_id)
            if up_id:
                if ', ' in up_id:
                    up_id = up_id.split(', ')[0]
                agent.db_refs['UP'] = up_id
    elif up_id:
        hgnc_id = uniprot_client.get_hgnc_id(up_id)
        if hgnc_id:
            agent.db_refs['HGNC'] = hgnc_id
            agent.name = hgnc_client.get_hgnc_name(hgnc_id)
        else:
            gene_name = uniprot_client.get_gene_name(up_id, web_fallback=False)
            if gene_name:
                agent.name = gene_name
            # If it doesn't have a gene name, it's better to just
            # use the raw string name otherwise Sparser sets
            # has Uniprot IDs or mnemonics as the name
github sorgerlab / indra / indra / sources / trrust / processor.py View on Github external
def get_grounded_agent(gene_name):
    """Return a grounded Agent based on an HGNC symbol."""
    db_refs = {'TEXT': gene_name}
    if gene_name in hgnc_map:
        gene_name = hgnc_map[gene_name]
    hgnc_id = hgnc_client.get_hgnc_id(gene_name)
    if not hgnc_id:
        hgnc_id = hgnc_client.get_current_hgnc_id(gene_name)
    if hgnc_id:
        db_refs['HGNC'] = hgnc_id
        up_id = hgnc_client.get_uniprot_id(hgnc_id)
        if up_id:
            db_refs['UP'] = up_id
    agent = Agent(gene_name, db_refs=db_refs)
    return agent
github sorgerlab / indra / models / fallahi_eval / process_data.py View on Github external
def agent_from_gene_name(name):
    """Return a grounded Agent based on a gene name."""
    agent = Agent(name)
    hgnc_id = hgnc_client.get_hgnc_id(name)
    uniprot_id = hgnc_client.get_uniprot_id(hgnc_id)
    agent.db_refs = {'HGNC': hgnc_id, 'UP': uniprot_id}
    return agent
github sorgerlab / indra / indra / db / query_db_stmts.py View on Github external
Optionally specify a database manager that attaches to something
        besides the primary database, for example a local databse instance.

    Returns
    -------
    list of Statements from the database corresponding to the query.
    """
    if db is None:
        db = get_primary_db()

    if not (agent_id or role or stmt_type):
        raise ValueError('At least one of agent_id, role, or stmt_type '
                         'must be specified.')
    clauses = []
    if agent_id and agent_ns == 'HGNC':
        hgnc_id = hgnc_client.get_hgnc_id(agent_id)
        if not hgnc_id:
            logger.warning('Invalid gene name: %s' % agent_id)
            return []
        clauses.extend([db.Agents.db_name == 'HGNC',
                        db.Agents.db_id == hgnc_id])
    elif agent_id:
        clauses.extend([db.Agents.db_name == agent_ns,
                        db.Agents.db_id == agent_id])
    if role:
        clauses.append(db.Agents.role == role)
    if agent_id or role:
        clauses.append(db.Agents.stmt_id == db.Statements.id)
    if stmt_type:
        clauses.append(db.Statements.type == stmt_type)
    stmts = get_statements(clauses, count=count, do_stmt_count=do_stmt_count,
                           db=db)
github sorgerlab / indra / indra / sources / ctd / processor.py View on Github external
def get_gene_agent(name, gene_entrez_id):
    db_refs = {'EGID': gene_entrez_id}
    hgnc_id = hgnc_client.get_hgnc_id(name)
    if hgnc_id:
        db_refs['HGNC'] = hgnc_id
    standard_name, db_refs = standardize_name_db_refs(db_refs)
    if standard_name:
        name = standard_name
    return Agent(name, db_refs=db_refs)
github sorgerlab / indra / indra / sources / trrust / processor.py View on Github external
def get_grounded_agent(gene_name):
    """Return a grounded Agent based on an HGNC symbol."""
    db_refs = {'TEXT': gene_name}
    if gene_name in hgnc_map:
        gene_name = hgnc_map[gene_name]
    hgnc_id = hgnc_client.get_hgnc_id(gene_name)
    if not hgnc_id:
        hgnc_id = hgnc_client.get_current_hgnc_id(gene_name)
    if hgnc_id:
        db_refs['HGNC'] = hgnc_id
        up_id = hgnc_client.get_uniprot_id(hgnc_id)
        if up_id:
            db_refs['UP'] = up_id
    agent = Agent(gene_name, db_refs=db_refs)
    return agent
github sorgerlab / indra / indra / preassembler / grounding_mapper / mapper.py View on Github external
def check_grounding_map(gm):
        """Run sanity checks on the grounding map, raise error if needed."""
        for key, refs in gm.items():
            if not refs:
                continue
            if 'HGNC' in refs and \
                    hgnc_client.get_hgnc_name(refs['HGNC']) is None:
                raise ValueError('HGNC:%s for key %s in the grounding map is '
                                 'not a valid ID' % (refs['HGNC'], key))
github sorgerlab / indra / indra / resources / update_resources.py View on Github external
def update_kinases():
    logger.info('--Updating kinase list------')
    url = 'http://www.uniprot.org/uniprot/?' + \
        'sort=entry_name&desc=no&compress=no&query=database:(type:' + \
        'interpro%20ipr011009)%20AND%20reviewed:yes%20AND%20organism:' + \
        '%22Homo%20sapiens%20(Human)%20[9606]%22&fil=&force=no' + \
        '&format=tab&columns=id,genes(PREFERRED),organism-id,entry%20name'
    fname = os.path.join(path, 'kinases.tsv')
    save_from_http(url, fname)

    from indra.databases import hgnc_client, uniprot_client
    add_kinases = ['PGK1', 'PKM', 'TAF1', 'NME1', 'BCKDK', 'PDK1', 'PDK2',
                   'PDK3', 'PDK4', 'BCR', 'FAM20C', 'BAZ1B', 'PIKFYVE']
    df = pandas.read_csv(fname, sep='\t')
    for kinase in add_kinases:
        hgnc_id = hgnc_client.get_hgnc_id(kinase)
        up_id = hgnc_client.get_uniprot_id(hgnc_id)
        up_mnemonic = uniprot_client.get_mnemonic(up_id)
        df = df.append({'Entry': up_id, 'Gene names  (primary )': kinase,
                        'Organism ID': '9606', 'Entry name': up_mnemonic},
                       ignore_index=True)
    df.to_csv(fname, sep='\t', index=False)
github sorgerlab / indra / indra / sources / biopax / processor.py View on Github external
def sanitize_hgnc_ids(raw_hgnc_ids):
    # First we get a list of primary IDs
    hgnc_ids = set()
    for raw_hgnc_id in raw_hgnc_ids:
        # Check if it's an ID first
        m1 = re.match('([0-9]+)', raw_hgnc_id)
        m2 = re.match('hgnc:([0-9]+)', raw_hgnc_id.lower())
        if m1:
            hgnc_id = str(m1.groups()[0])
            hgnc_ids.add(hgnc_id)
        elif m2:
            hgnc_id = str(m2.groups()[0])
            hgnc_ids.add(hgnc_id)
        # If not, we assume it's a symbol
        else:
            hgnc_id = hgnc_client.get_current_hgnc_id(raw_hgnc_id)
            if isinstance(hgnc_id, list):
                hgnc_ids |= set(hgnc_id)
            elif hgnc_id:
                hgnc_ids.add(hgnc_id)

    return list(hgnc_ids)