How to use the indra.tools.assemble_corpus function in indra

To help you get started, we’ve selected a few indra examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github sorgerlab / indra / models / ras_pathway / run_ras_pathway.py View on Github external
orig_txt = [ln.strip() for ln in fh.readlines()]
    with open(fname, 'rt') as fh:
        correct_txt = [ln.strip() for ln in fh.readlines()]
    for ln in correct_txt:
        if ln.startswith('<'):
            remove_line = ln[2:]
            orig_txt.remove(remove_line)
        elif ln.startswith('>'):
            add_line = ln[2:]
            orig_txt.append(add_line)
    txt = '\n'.join(orig_txt)
    if reader == 'reach':
        stmts = process_reach(txt)
    elif reader == 'trips':
        stmts = process_trips(txt, reread=True)
    ac.dump_statements(stmts, 'ras_pathway_correction.pkl')
    draw_graph(stmts, 'ras_pathway_correction')
github sorgerlab / indra / models / ndex_networks / palb2 / assemble_model.py View on Github external
def filter(stmts, cutoff, filename):
    stmts = ac.filter_belief(stmts, cutoff)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_direct(stmts)
    #stmts = ac.filter_enzyme_kinase(stmts)
    ac.dump_statements(stmts, filename)
    return stmts
github sorgerlab / indra / models / phase3_eval / assemble_models.py View on Github external
else:
            assemble_models = sys.argv[1:]

    print('Assembling the following model types: %s' % \
          ', '.join(assemble_models))
    print('##############')

    outf = 'output/'
    data = process_data.read_data(process_data.data_file)
    data_genes = process_data.get_all_gene_names(data)
    reassemble = False
    if not reassemble:
        stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl'))
    else:
        #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.map_grounding(prior_stmts,
                                       save=pjoin(outf, 'gmapped_prior.pkl'))
        reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
        reach_stmts = ac.filter_no_hypothesis(reach_stmts)
        #extra_stmts = ac.load_statements(pjoin(outf, 'extra_stmts.pkl'))
        extra_stmts = read_extra_sources(pjoin(outf, 'extra_stmts.pkl'))
        reading_stmts = reach_stmts + extra_stmts
        reading_stmts = ac.map_grounding(reading_stmts,
                                        save=pjoin(outf, 'gmapped_reading.pkl'))
        stmts = prior_stmts + reading_stmts + extra_stmts

        stmts = ac.filter_grounded_only(stmts)
        stmts = ac.filter_genes_only(stmts, specific_only=False)
        stmts = ac.filter_human_only(stmts)
        stmts = ac.expand_families(stmts)
        stmts = ac.filter_gene_list(stmts, data_genes, 'one')
github sorgerlab / indra / models / phase3_eval / assemble_pysb.py View on Github external
def preprocess_stmts(stmts, data_genes):
    # Filter the INDRA Statements to be put into the model
    stmts = ac.filter_mutation_status(stmts,
                                      {'BRAF': [('V', '600', 'E')]}, ['PTEN'])
    stmts = ac.filter_by_type(stmts, Complex, invert=True)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.filter_enzyme_kinase(stmts)
    stmts = ac.filter_mod_nokinase(stmts)
    stmts = ac.filter_transcription_factor(stmts)
    # Simplify activity types
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.reduce_activities()
    ml.gather_modifications()
    ml.reduce_modifications()
    af_stmts = ac.filter_by_type(ml.statements, ActiveForm)
    non_af_stmts = ac.filter_by_type(ml.statements, ActiveForm, invert=True)
    af_stmts = ac.run_preassembly(af_stmts)
    stmts = af_stmts + non_af_stmts
github sorgerlab / indra / indra / tools / incremental_model.py View on Github external
stmts = ac.filter_no_hypothesis(stmts)

        # Fix grounding
        if grounding_map is not None:
            stmts = ac.map_grounding(stmts, grounding_map=grounding_map)
        else:
            stmts = ac.map_grounding(stmts)

        if filters and ('grounding' in filters):
            stmts = ac.filter_grounded_only(stmts)

        # Fix sites
        stmts = ac.map_sequence(stmts)

        if filters and 'human_only' in filters:
            stmts = ac.filter_human_only(stmts)

        # Run preassembly
        stmts = ac.run_preassembly(stmts, return_toplevel=False)

        # Run relevance filter
        stmts = self._relevance_filter(stmts, filters)

        # Save Statements
        self.assembled_stmts = stmts
github sorgerlab / indra / models / phase3_eval / assemble_models.py View on Github external
stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl'))
    else:
        #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.map_grounding(prior_stmts,
                                       save=pjoin(outf, 'gmapped_prior.pkl'))
        reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
        reach_stmts = ac.filter_no_hypothesis(reach_stmts)
        #extra_stmts = ac.load_statements(pjoin(outf, 'extra_stmts.pkl'))
        extra_stmts = read_extra_sources(pjoin(outf, 'extra_stmts.pkl'))
        reading_stmts = reach_stmts + extra_stmts
        reading_stmts = ac.map_grounding(reading_stmts,
                                        save=pjoin(outf, 'gmapped_reading.pkl'))
        stmts = prior_stmts + reading_stmts + extra_stmts

        stmts = ac.filter_grounded_only(stmts)
        stmts = ac.filter_genes_only(stmts, specific_only=False)
        stmts = ac.filter_human_only(stmts)
        stmts = ac.expand_families(stmts)
        stmts = ac.filter_gene_list(stmts, data_genes, 'one')
        stmts = ac.map_sequence(stmts, save=pjoin(outf, 'smapped.pkl'))
        #stmts = ac.load_statements(pjoin(outf, 'smapped.pkl'))
        stmts = ac.run_preassembly(stmts, return_toplevel=False,
                                   save=pjoin(outf, 'preassembled.pkl'),
                                   poolsize=4)
    # Old pickle files can have missing Statement UUIDs, to avoid propagating
    # this, we set UUIDs here
    for stmt in stmts:
        try:
            stmt.uuid
        except:
            stmt.uuid = str(uuid.uuid4())
github sorgerlab / indra / indra / pre_assemble_for_db / pre_assemble_script.py View on Github external
def process_statements(stmts, num_procs=1):
    stmts = ac.map_grounding(stmts)
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False,
                               poolsize=num_procs)
    return stmts
github sorgerlab / indra / rest_api / api.py View on Github external
def filter_grounded_only():
    """Filter to grounded Statements only."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    score_threshold = body.get('score_threshold')
    if score_threshold is not None:
        score_threshold = float(score_threshold)
    stmts = stmts_from_json(stmts_json)
    stmts_out = ac.filter_grounded_only(stmts, score_threshold=score_threshold)
    return _return_stmts(stmts_out)
github sorgerlab / indra / models / ndex_networks / palb2 / assemble_model.py View on Github external
def filter(stmts, cutoff, filename):
    stmts = ac.filter_belief(stmts, cutoff)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_direct(stmts)
    #stmts = ac.filter_enzyme_kinase(stmts)
    ac.dump_statements(stmts, filename)
    return stmts
github sorgerlab / indra / models / fallahi_eval / sources / process_databases.py View on Github external
gn = GeneNetwork(gene_names, basen)
    # Read BEL Statements
    bel_stmts = gn.get_bel_stmts(filter=False)
    ac.dump_statements(bel_stmts, prefixed_pkl('bel'))
    # Read Pathway Commons Statements
    database_filter = ['reactome', 'kegg', 'pid']
    biopax_stmts = gn.get_biopax_stmts(database_filter=database_filter)
    # Eliminate blacklisted interactions
    tmp_stmts = []
    for stmt in biopax_stmts:
        source_ids = [ev.source_id for ev in stmt.evidence]
        if set(source_ids) & set(biopax_blacklist):
            continue
        tmp_stmts.append(stmt)
    biopax_stmts = tmp_stmts
    ac.dump_statements(biopax_stmts, prefixed_pkl('biopax'))
    # Read Phosphosite Statements
    phosphosite_stmts = read_phosphosite_owl(phosphosite_owl_file)
    ac.dump_statements(phosphosite_stmts, prefixed_pkl('phosphosite'))