Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@register_pipeline
def agent_name_matches(agent):
"""Return a sorted, normalized bag of words as the name."""
if agent is None:
return None
bw = '_'.join(sorted(list(set(agent.name.lower().split()))))
return bw
@register_pipeline
def run_preassembly(stmts_in, return_toplevel=True, poolsize=None,
size_cutoff=None, belief_scorer=None, ontology=None,
matches_fun=None, refinement_fun=None, refinement_ns=None,
flatten_evidence=False, flatten_evidence_collect_from=None,
normalize_equivalences=False, normalize_opposites=False,
normalize_ns='WM', **kwargs):
"""Run preassembly on a list of statements.
Parameters
----------
stmts_in : list[indra.statements.Statement]
A list of statements to preassemble.
return_toplevel : Optional[bool]
If True, only the top-level statements are returned. If False,
all statements are returned irrespective of level of specificity.
Default: True
@register_pipeline
def map_grounding(stmts_in, do_rename=True, grounding_map=None,
misgrounding_map=None, agent_map=None, ignores=None, use_adeft=True,
gilda_mode=None, grounding_map_policy='replace', **kwargs):
"""Map grounding using the GroundingMapper.
Parameters
----------
stmts_in : list[indra.statements.Statement]
A list of statements to map.
do_rename : Optional[bool]
If True, Agents are renamed based on their mapped grounding.
grounding_map : Optional[dict]
A user supplied grounding map which maps a string to a
dictionary of database IDs (in the format used by Agents'
db_refs).
misgrounding_map : Optional[dict]
@register_pipeline
def filter_direct(stmts_in, **kwargs):
"""Filter to statements that are direct interactions
Parameters
----------
stmts_in : list[indra.statements.Statement]
A list of statements to filter.
save : Optional[str]
The name of a pickle file to save the results (stmts_out) into.
Returns
-------
stmts_out : list[indra.statements.Statement]
A list of filtered statements.
"""
def get_is_direct(stmt):
@register_pipeline
def filter_inconsequential_mods(stmts_in, whitelist=None, **kwargs):
"""Filter out Modifications that modify inconsequential sites
Inconsequential here means that the site is not mentioned / tested
in any other statement. In some cases specific sites should be
preserved, for instance, to be used as readouts in a model.
In this case, the given sites can be passed in a whitelist.
Parameters
----------
stmts_in : list[indra.statements.Statement]
A list of statements to filter.
whitelist : Optional[dict]
A whitelist containing agent modification sites whose
modifications should be preserved even if no other statement
refers to them. The whitelist parameter is a dictionary in which
@register_pipeline
def strip_agent_context(stmts_in, **kwargs):
"""Strip any context on agents within each statement.
Parameters
----------
stmts_in : list[indra.statements.Statement]
A list of statements whose agent context should be stripped.
save : Optional[str]
The name of a pickle file to save the results (stmts_out) into.
Returns
-------
stmts_out : list[indra.statements.Statement]
A list of stripped statements.
"""
logger.info('Stripping agent context on %d statements...' % len(stmts_in))
@register_pipeline
def location_time_matches(stmt):
"""Return matches key which takes location and time into account."""
if isinstance(stmt, Event):
return event_location_time_matches(stmt)
elif isinstance(stmt, Influence):
subj_mk = event_location_time_matches(stmt.subj)
obj_mk = event_location_time_matches(stmt.obj)
return str((stmt.matches_key(), subj_mk, obj_mk))
else:
return stmt.matches_key()
from indra.tools.assemble_corpus import filter_grounded_only
from indra_db.client.principal.curation import get_curations
from emmaa.priors import SearchTerm
from emmaa.readers.aws_reader import read_pmid_search_terms
from emmaa.readers.db_client_reader import read_db_pmid_search_terms, \
read_db_doi_search_terms
from emmaa.readers.elsevier_eidos_reader import \
read_elsevier_eidos_search_terms
from emmaa.util import make_date_str, find_latest_s3_file, strip_out_date, \
EMMAA_BUCKET_NAME, find_nth_latest_s3_file, load_pickle_from_s3, \
save_pickle_to_s3, load_json_from_s3, save_json_to_s3
from emmaa.statements import to_emmaa_stmts
logger = logging.getLogger(__name__)
register_pipeline(get_curations)
class EmmaaModel(object):
""""Represents an EMMAA model.
Parameters
----------
name : str
The name of the model.
config : dict
A configuration dict that is typically loaded from a YAML file.
Attributes
----------
name : str
A string containing the name of the model
@register_pipeline
def filter_eidos_ungrounded(stmts):
"""Filter out statements from Eidos with ungrounded agents."""
logger.info(
'Filtering out ungrounded Eidos statements from %d statements...'
% len(stmts))
stmts_out = []
eidos_stmts = []
for stmt in stmts:
if stmt.evidence[0].source_api == 'eidos':
eidos_stmts.append(stmt)
else:
stmts_out.append(stmt)
eidos_grounded = filter_grounded_only(eidos_stmts)
stmts_out += eidos_grounded
logger.info('%d statements after filter...' % len(stmts_out))
return stmts_out
@register_pipeline
def filter_genes_only(stmts_in, specific_only=False, remove_bound=False,
**kwargs):
"""Filter to statements containing genes only.
Parameters
----------
stmts_in : list[indra.statements.Statement]
A list of statements to filter.
specific_only : Optional[bool]
If True, only elementary genes/proteins will be kept and families
will be filtered out. If False, families are also included in the
output. Default: False
save : Optional[str]
The name of a pickle file to save the results (stmts_out) into.
remove_bound: Optional[bool]
If true, removes bound conditions that are not genes