Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_drug_targets(fname=None):
if not fname:
fname = drug_grounding_file
df = pandas.read_csv(fname, index_col=None, header=None, encoding='utf-8')
abbrevs = df[1]
target_upids = df[6]
targets = {}
for abb, tupid in zip(abbrevs, target_upids):
targets[abb] = [uniprot_client.get_gene_name(ui)
for ui in tupid.split(',')]
return targets
participant['entity_type'] = 'chemical'
elif pfam_def_ids:
participant['entity_type'] = 'protein_family'
participant['entities'] = []
pfam_def_list = []
for p in pfam_def_ids.split('|'):
dbname, dbid = p.split(':')
pfam_def_list.append({dbname: dbid})
for pdi in pfam_def_list:
# TODO: handle non-uniprot protein IDs here
uniprot_id = pdi.get('UP')
if uniprot_id:
entity_dict = {}
uniprot_mnemonic = \
str(uniprot_client.get_mnemonic(uniprot_id))
gene_name = uniprot_client.get_gene_name(uniprot_id)
if gene_name is None:
gene_name = ""
entity_dict['entity_text'] = [gene_name]
entity_dict['identifier'] = 'UNIPROT:%s' % uniprot_mnemonic
entity_dict['entity_type'] = 'protein'
participant['entities'].append(entity_dict)
else:
participant['identifier'] = ''
participant['entity_type'] = 'protein'
features = []
not_features = []
# Binding features
for bc in agent.bound_conditions:
feature = {
'feature_type': 'binding_feature',
def read_phosphosite(fname=phosphosite_file):
df = pandas.read_csv(fname, index_col=None, sep='\t', encoding='utf8')
statements = []
for _, row in df.iterrows():
sub_upid = row['SUB_ID']
if not pandas.isnull(sub_upid):
if sub_upid.find('-') != -1:
sub_upid = sub_upid.split('-')[0]
sub_hgnc_symbol = uniprot_client.get_gene_name(sub_upid)
sub_hgnc = hgnc_client.get_hgnc_id(sub_hgnc_symbol)
else:
sub_hgnc_symbol = row['SUB_GENE']
sub_hgnc_id = hgnc_client.get_hgnc_id(sub_hgnc_symbol)
sub_upid = hgnc_client.get_uniprot_id(sub_hgnc_id)
if sub_upid is None:
continue
sub = Agent(sub_hgnc_symbol,
db_refs={'UP': sub_upid,'HGNC': sub_hgnc})
residue = row['Actual_site'][0]
if len(row['Actual_site']) > 1:
position = row['Actual_site'][1:]
else:
position = None
ps = row['phosphosite']
gene_name = hgnc_client.get_hgnc_name(hgnc_id)
if gene_name:
agent.name = gene_name
if not up_id:
up_id = hgnc_client.get_uniprot_id(hgnc_id)
if up_id:
if ', ' in up_id:
up_id = up_id.split(', ')[0]
agent.db_refs['UP'] = up_id
elif up_id:
hgnc_id = uniprot_client.get_hgnc_id(up_id)
if hgnc_id:
agent.db_refs['HGNC'] = hgnc_id
agent.name = hgnc_client.get_hgnc_name(hgnc_id)
else:
gene_name = uniprot_client.get_gene_name(up_id, web_fallback=False)
if gene_name:
agent.name = gene_name
# If it doesn't have a gene name, it's better to just
# use the raw string name otherwise Sparser sets
# has Uniprot IDs or mnemonics as the name
else:
name = agent.db_refs.get('TEXT', agent.name)
agent.name = name
up_id, feat_id = db_id.split('-')
# Assume it's a feature ID
assert feat_id.startswith('PRO'), feat_id
db_refs = {'UP': up_id, 'UPPRO': feat_id}
else:
db_refs = {'UP': db_id}
elif db_ns == 'refseq':
db_refs = {'REFSEQ_PROT': db_id}
else:
db_refs = {'GENBANK': db_id}
agent = Agent(db_id, db_refs=db_refs)
standardized = standardize_agent_name(agent)
if up_web_fallback:
# Handle special case of unreviewed UP entries
if not standardized and 'UP' in db_refs:
name = uniprot_client.get_gene_name(db_refs['UP'],
web_fallback=True)
if name:
agent.name = name
return agent
for ev in stmt.evidence:
if ev.pmid and ev.pmid.startswith('PMID'):
ev.pmid = ev.pmid[:-4]
# Skip if no subject
if isinstance(stmt, RegulateActivity):
if stmt.subj is None:
continue
# Skip if no locations
if isinstance(stmt, Translocation):
if not (stmt.from_location or stmt.to_location):
continue
for agent in stmt.agent_list():
if agent is not None:
upid = agent.db_refs.get('UP')
if upid:
gene_name = uniprot_client.get_gene_name(upid)
if gene_name:
agent.name = gene_name
if uniprot_client.is_human(upid):
hgnc_id = hgnc_client.get_hgnc_id(gene_name)
if hgnc_id:
agent.db_refs['HGNC'] = hgnc_id
new_stmts.append(stmt)
return new_stmts
# condition
agent = agents[0]
agent.bound_conditions = \
[BoundCondition(a, True) for a in agents[1:]]
return agent
else:
gnd_type = _type_db_map[(ent_type, database)]
if gnd_type == 'UP':
up_id = id
db_refs = {'UP': up_id}
hgnc_id = uniprot_client.get_hgnc_id(up_id)
if hgnc_id:
db_refs['HGNC'] = hgnc_id
name = hgnc_client.get_hgnc_name(hgnc_id)
else:
name = uniprot_client.get_gene_name(up_id)
# Map SIGNOR protein families to FamPlex families
elif ent_type == 'proteinfamily':
db_refs = {database: id} # Keep the SIGNOR family ID in db_refs
key = (database, id)
# Use SIGNOR name unless we have a mapping in FamPlex
name = ent_name
famplex_id = famplex_map.get(key)
if famplex_id is None:
logger.info('Could not find %s in FamPlex map' %
str(key))
else:
db_refs['FPLX'] = famplex_id
name = famplex_id
# Other possible groundings are PUBCHEM, SIGNOR, etc.
elif gnd_type is not None:
if database not in ('PUBCHEM', 'SIGNOR', 'ChEBI', 'miRBase',
def _get_complex_agents(self, complex_id):
"""Returns a list of agents corresponding to each of the constituents
in a SIGNOR complex."""
agents = []
components = self._recursively_lookup_complex(complex_id)
for c in components:
db_refs = {}
name = uniprot_client.get_gene_name(c)
if name is None:
db_refs['SIGNOR'] = c
else:
db_refs['UP'] = c
hgnc_id = uniprot_client.get_hgnc_id(c)
if hgnc_id:
name = hgnc_client.get_hgnc_name(hgnc_id)
db_refs['HGNC'] = hgnc_id
famplex_key = ('SIGNOR', c)
if famplex_key in famplex_map:
db_refs['FPLX'] = famplex_map[famplex_key]
if not name:
name = db_refs['FPLX'] # Set agent name to Famplex name if
# the Uniprot name is not available
elif not name:
# If 'UP' (Uniprot) not one of the grounding entries for this text,
# then we skip it.
if 'UP' not in [entry[0] for entry in grounding_list]:
continue
# Otherwise, collect all the Uniprot IDs for this protein.
uniprot_ids = [entry[1] for entry in grounding_list
if entry[0] == 'UP']
# For each Uniprot ID, look up the species
for uniprot_id in uniprot_ids:
# If it's not a human protein, skip it
mnemonic = uniprot_client.get_mnemonic(uniprot_id)
if mnemonic is None or not mnemonic.endswith('_HUMAN'):
continue
# Otherwise, look up the gene name in HGNC and match against the
# agent text
gene_name = uniprot_client.get_gene_name(uniprot_id)
if gene_name is None:
unmatched += 1
continue
if agent_text.upper() == gene_name.upper():
matched += 1
protein_map[agent_text] = {'TEXT': agent_text,
'UP': uniprot_id}
else:
unmatched += 1
logger.info('Exact matches for %d proteins' % matched)
logger.info('No match (or no gene name) for %d proteins' % unmatched)
return protein_map