Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
orig_txt = [ln.strip() for ln in fh.readlines()]
with open(fname, 'rt') as fh:
correct_txt = [ln.strip() for ln in fh.readlines()]
for ln in correct_txt:
if ln.startswith('<'):
remove_line = ln[2:]
orig_txt.remove(remove_line)
elif ln.startswith('>'):
add_line = ln[2:]
orig_txt.append(add_line)
txt = '\n'.join(orig_txt)
if reader == 'reach':
stmts = process_reach(txt)
elif reader == 'trips':
stmts = process_trips(txt, reread=True)
ac.dump_statements(stmts, 'ras_pathway_correction.pkl')
draw_graph(stmts, 'ras_pathway_correction')
def filter(stmts, cutoff, filename):
stmts = ac.filter_belief(stmts, cutoff)
stmts = ac.filter_top_level(stmts)
stmts = ac.filter_direct(stmts)
#stmts = ac.filter_enzyme_kinase(stmts)
ac.dump_statements(stmts, filename)
return stmts
else:
assemble_models = sys.argv[1:]
print('Assembling the following model types: %s' % \
', '.join(assemble_models))
print('##############')
outf = 'output/'
data = process_data.read_data(process_data.data_file)
data_genes = process_data.get_all_gene_names(data)
reassemble = False
if not reassemble:
stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl'))
else:
#prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl'))
prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
prior_stmts = ac.map_grounding(prior_stmts,
save=pjoin(outf, 'gmapped_prior.pkl'))
reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
reach_stmts = ac.filter_no_hypothesis(reach_stmts)
#extra_stmts = ac.load_statements(pjoin(outf, 'extra_stmts.pkl'))
extra_stmts = read_extra_sources(pjoin(outf, 'extra_stmts.pkl'))
reading_stmts = reach_stmts + extra_stmts
reading_stmts = ac.map_grounding(reading_stmts,
save=pjoin(outf, 'gmapped_reading.pkl'))
stmts = prior_stmts + reading_stmts + extra_stmts
stmts = ac.filter_grounded_only(stmts)
stmts = ac.filter_genes_only(stmts, specific_only=False)
stmts = ac.filter_human_only(stmts)
stmts = ac.expand_families(stmts)
stmts = ac.filter_gene_list(stmts, data_genes, 'one')
def preprocess_stmts(stmts, data_genes):
# Filter the INDRA Statements to be put into the model
stmts = ac.filter_mutation_status(stmts,
{'BRAF': [('V', '600', 'E')]}, ['PTEN'])
stmts = ac.filter_by_type(stmts, Complex, invert=True)
stmts = ac.filter_direct(stmts)
stmts = ac.filter_belief(stmts, 0.95)
stmts = ac.filter_top_level(stmts)
stmts = ac.filter_gene_list(stmts, data_genes, 'all')
stmts = ac.filter_enzyme_kinase(stmts)
stmts = ac.filter_mod_nokinase(stmts)
stmts = ac.filter_transcription_factor(stmts)
# Simplify activity types
ml = MechLinker(stmts)
ml.gather_explicit_activities()
ml.reduce_activities()
ml.gather_modifications()
ml.reduce_modifications()
af_stmts = ac.filter_by_type(ml.statements, ActiveForm)
non_af_stmts = ac.filter_by_type(ml.statements, ActiveForm, invert=True)
af_stmts = ac.run_preassembly(af_stmts)
stmts = af_stmts + non_af_stmts
stmts = ac.filter_no_hypothesis(stmts)
# Fix grounding
if grounding_map is not None:
stmts = ac.map_grounding(stmts, grounding_map=grounding_map)
else:
stmts = ac.map_grounding(stmts)
if filters and ('grounding' in filters):
stmts = ac.filter_grounded_only(stmts)
# Fix sites
stmts = ac.map_sequence(stmts)
if filters and 'human_only' in filters:
stmts = ac.filter_human_only(stmts)
# Run preassembly
stmts = ac.run_preassembly(stmts, return_toplevel=False)
# Run relevance filter
stmts = self._relevance_filter(stmts, filters)
# Save Statements
self.assembled_stmts = stmts
stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl'))
else:
#prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl'))
prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
prior_stmts = ac.map_grounding(prior_stmts,
save=pjoin(outf, 'gmapped_prior.pkl'))
reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
reach_stmts = ac.filter_no_hypothesis(reach_stmts)
#extra_stmts = ac.load_statements(pjoin(outf, 'extra_stmts.pkl'))
extra_stmts = read_extra_sources(pjoin(outf, 'extra_stmts.pkl'))
reading_stmts = reach_stmts + extra_stmts
reading_stmts = ac.map_grounding(reading_stmts,
save=pjoin(outf, 'gmapped_reading.pkl'))
stmts = prior_stmts + reading_stmts + extra_stmts
stmts = ac.filter_grounded_only(stmts)
stmts = ac.filter_genes_only(stmts, specific_only=False)
stmts = ac.filter_human_only(stmts)
stmts = ac.expand_families(stmts)
stmts = ac.filter_gene_list(stmts, data_genes, 'one')
stmts = ac.map_sequence(stmts, save=pjoin(outf, 'smapped.pkl'))
#stmts = ac.load_statements(pjoin(outf, 'smapped.pkl'))
stmts = ac.run_preassembly(stmts, return_toplevel=False,
save=pjoin(outf, 'preassembled.pkl'),
poolsize=4)
# Old pickle files can have missing Statement UUIDs, to avoid propagating
# this, we set UUIDs here
for stmt in stmts:
try:
stmt.uuid
except:
stmt.uuid = str(uuid.uuid4())
def process_statements(stmts, num_procs=1):
stmts = ac.map_grounding(stmts)
stmts = ac.map_sequence(stmts)
stmts = ac.run_preassembly(stmts, return_toplevel=False,
poolsize=num_procs)
return stmts
def filter_grounded_only():
"""Filter to grounded Statements only."""
if request.method == 'OPTIONS':
return {}
response = request.body.read().decode('utf-8')
body = json.loads(response)
stmts_json = body.get('statements')
score_threshold = body.get('score_threshold')
if score_threshold is not None:
score_threshold = float(score_threshold)
stmts = stmts_from_json(stmts_json)
stmts_out = ac.filter_grounded_only(stmts, score_threshold=score_threshold)
return _return_stmts(stmts_out)
def filter(stmts, cutoff, filename):
stmts = ac.filter_belief(stmts, cutoff)
stmts = ac.filter_top_level(stmts)
stmts = ac.filter_direct(stmts)
#stmts = ac.filter_enzyme_kinase(stmts)
ac.dump_statements(stmts, filename)
return stmts
gn = GeneNetwork(gene_names, basen)
# Read BEL Statements
bel_stmts = gn.get_bel_stmts(filter=False)
ac.dump_statements(bel_stmts, prefixed_pkl('bel'))
# Read Pathway Commons Statements
database_filter = ['reactome', 'kegg', 'pid']
biopax_stmts = gn.get_biopax_stmts(database_filter=database_filter)
# Eliminate blacklisted interactions
tmp_stmts = []
for stmt in biopax_stmts:
source_ids = [ev.source_id for ev in stmt.evidence]
if set(source_ids) & set(biopax_blacklist):
continue
tmp_stmts.append(stmt)
biopax_stmts = tmp_stmts
ac.dump_statements(biopax_stmts, prefixed_pkl('biopax'))
# Read Phosphosite Statements
phosphosite_stmts = read_phosphosite_owl(phosphosite_owl_file)
ac.dump_statements(phosphosite_stmts, prefixed_pkl('phosphosite'))