Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# db_refs['UP'] is an empty string, and there is no other grounding.
# In this case, we remove the empty UP grounding and reset the name to the
# agent text.
if not be_id and not hgnc_id and up_id == '':
agent.name = agent.db_refs.get('TEXT', agent.name)
agent.db_refs.pop('UP')
# FPLX takes precedence if we have it
elif be_id:
agent.db_refs['FPLX'] = be_id
agent.name = be_id
elif hgnc_id:
gene_name = hgnc_client.get_hgnc_name(hgnc_id)
if gene_name:
agent.name = gene_name
if not up_id:
up_id = hgnc_client.get_uniprot_id(hgnc_id)
if up_id:
if ', ' in up_id:
up_id = up_id.split(', ')[0]
agent.db_refs['UP'] = up_id
elif up_id:
hgnc_id = uniprot_client.get_hgnc_id(up_id)
if hgnc_id:
agent.db_refs['HGNC'] = hgnc_id
agent.name = hgnc_client.get_hgnc_name(hgnc_id)
else:
gene_name = uniprot_client.get_gene_name(up_id, web_fallback=False)
if gene_name:
agent.name = gene_name
# If it doesn't have a gene name, it's better to just
# use the raw string name otherwise Sparser sets
# has Uniprot IDs or mnemonics as the name
def get_grounded_agent(gene_name):
"""Return a grounded Agent based on an HGNC symbol."""
db_refs = {'TEXT': gene_name}
if gene_name in hgnc_map:
gene_name = hgnc_map[gene_name]
hgnc_id = hgnc_client.get_hgnc_id(gene_name)
if not hgnc_id:
hgnc_id = hgnc_client.get_current_hgnc_id(gene_name)
if hgnc_id:
db_refs['HGNC'] = hgnc_id
up_id = hgnc_client.get_uniprot_id(hgnc_id)
if up_id:
db_refs['UP'] = up_id
agent = Agent(gene_name, db_refs=db_refs)
return agent
def agent_from_gene_name(name):
"""Return a grounded Agent based on a gene name."""
agent = Agent(name)
hgnc_id = hgnc_client.get_hgnc_id(name)
uniprot_id = hgnc_client.get_uniprot_id(hgnc_id)
agent.db_refs = {'HGNC': hgnc_id, 'UP': uniprot_id}
return agent
Optionally specify a database manager that attaches to something
besides the primary database, for example a local databse instance.
Returns
-------
list of Statements from the database corresponding to the query.
"""
if db is None:
db = get_primary_db()
if not (agent_id or role or stmt_type):
raise ValueError('At least one of agent_id, role, or stmt_type '
'must be specified.')
clauses = []
if agent_id and agent_ns == 'HGNC':
hgnc_id = hgnc_client.get_hgnc_id(agent_id)
if not hgnc_id:
logger.warning('Invalid gene name: %s' % agent_id)
return []
clauses.extend([db.Agents.db_name == 'HGNC',
db.Agents.db_id == hgnc_id])
elif agent_id:
clauses.extend([db.Agents.db_name == agent_ns,
db.Agents.db_id == agent_id])
if role:
clauses.append(db.Agents.role == role)
if agent_id or role:
clauses.append(db.Agents.stmt_id == db.Statements.id)
if stmt_type:
clauses.append(db.Statements.type == stmt_type)
stmts = get_statements(clauses, count=count, do_stmt_count=do_stmt_count,
db=db)
def get_gene_agent(name, gene_entrez_id):
db_refs = {'EGID': gene_entrez_id}
hgnc_id = hgnc_client.get_hgnc_id(name)
if hgnc_id:
db_refs['HGNC'] = hgnc_id
standard_name, db_refs = standardize_name_db_refs(db_refs)
if standard_name:
name = standard_name
return Agent(name, db_refs=db_refs)
def get_grounded_agent(gene_name):
"""Return a grounded Agent based on an HGNC symbol."""
db_refs = {'TEXT': gene_name}
if gene_name in hgnc_map:
gene_name = hgnc_map[gene_name]
hgnc_id = hgnc_client.get_hgnc_id(gene_name)
if not hgnc_id:
hgnc_id = hgnc_client.get_current_hgnc_id(gene_name)
if hgnc_id:
db_refs['HGNC'] = hgnc_id
up_id = hgnc_client.get_uniprot_id(hgnc_id)
if up_id:
db_refs['UP'] = up_id
agent = Agent(gene_name, db_refs=db_refs)
return agent
def check_grounding_map(gm):
"""Run sanity checks on the grounding map, raise error if needed."""
for key, refs in gm.items():
if not refs:
continue
if 'HGNC' in refs and \
hgnc_client.get_hgnc_name(refs['HGNC']) is None:
raise ValueError('HGNC:%s for key %s in the grounding map is '
'not a valid ID' % (refs['HGNC'], key))
def update_kinases():
logger.info('--Updating kinase list------')
url = 'http://www.uniprot.org/uniprot/?' + \
'sort=entry_name&desc=no&compress=no&query=database:(type:' + \
'interpro%20ipr011009)%20AND%20reviewed:yes%20AND%20organism:' + \
'%22Homo%20sapiens%20(Human)%20[9606]%22&fil=&force=no' + \
'&format=tab&columns=id,genes(PREFERRED),organism-id,entry%20name'
fname = os.path.join(path, 'kinases.tsv')
save_from_http(url, fname)
from indra.databases import hgnc_client, uniprot_client
add_kinases = ['PGK1', 'PKM', 'TAF1', 'NME1', 'BCKDK', 'PDK1', 'PDK2',
'PDK3', 'PDK4', 'BCR', 'FAM20C', 'BAZ1B', 'PIKFYVE']
df = pandas.read_csv(fname, sep='\t')
for kinase in add_kinases:
hgnc_id = hgnc_client.get_hgnc_id(kinase)
up_id = hgnc_client.get_uniprot_id(hgnc_id)
up_mnemonic = uniprot_client.get_mnemonic(up_id)
df = df.append({'Entry': up_id, 'Gene names (primary )': kinase,
'Organism ID': '9606', 'Entry name': up_mnemonic},
ignore_index=True)
df.to_csv(fname, sep='\t', index=False)
def sanitize_hgnc_ids(raw_hgnc_ids):
# First we get a list of primary IDs
hgnc_ids = set()
for raw_hgnc_id in raw_hgnc_ids:
# Check if it's an ID first
m1 = re.match('([0-9]+)', raw_hgnc_id)
m2 = re.match('hgnc:([0-9]+)', raw_hgnc_id.lower())
if m1:
hgnc_id = str(m1.groups()[0])
hgnc_ids.add(hgnc_id)
elif m2:
hgnc_id = str(m2.groups()[0])
hgnc_ids.add(hgnc_id)
# If not, we assume it's a symbol
else:
hgnc_id = hgnc_client.get_current_hgnc_id(raw_hgnc_id)
if isinstance(hgnc_id, list):
hgnc_ids |= set(hgnc_id)
elif hgnc_id:
hgnc_ids.add(hgnc_id)
return list(hgnc_ids)