How to use the goatools.associations.read_associations function in goatools

To help you get started, we’ve selected a few goatools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tanghaibao / goatools / tests / test_goea_rpt_bonferroni.py View on Github external
def run_bonferroni():
    """Do Gene Ontology Enrichment Analysis w/Bonferroni multipletest. Print results 3 ways."""
    # ---------------------------------------------------------------------
    # Run Gene Ontology Analysis (GOEA)
    #
    # 1. Initialize
    godag = get_godag(os.path.join(os.getcwd(), "go-basic.obo"), loading_bar=None)
    fin_assc = os.path.join(REPO, "data/association")
    assoc = read_associations(fin_assc, 'id2gos', no_top=True)
    popul_ids = [line.rstrip() for line in open(os.path.join(REPO, "data/population"))]
    study_ids = [line.rstrip() for line in open(os.path.join(REPO, "data/study"))]
    # 2. Run enrichment analysis
    goea = GOEnrichmentStudy(popul_ids, assoc, godag, alpha=0.05, methods=['bonferroni'])
    results_nt = goea.run_study(study_ids)
    return results_nt, goea
github tanghaibao / goatools / tests / test_goea_rpt_fdr_bh.py View on Github external
def test_fdr_bh(fout_log=None):
    """Do Gene Ontology Enrichment Analysis w/Benjamini-Hochberg multipletest. Print results"""
    # ---------------------------------------------------------------------
    # Run Gene Ontology Analysis (GOEA)
    #
    # 1. Initialize
    log = sys.stdout if fout_log is None else open(fout_log, 'w')
    obo_dag = GODag("go-basic.obo")
    assoc = read_associations("../data/association", no_top=True)
    popul_ids = [line.rstrip() for line in open("../data/population")]
    study_ids = [line.rstrip() for line in open("../data/study")]
    # 2. Run enrichment analysis
    goea = GOEA(obo_dag, assoc, log)
    goea.set_population(popul_ids)
    goea.set_params(alpha=0.05, method='fdr_bh')
    results_nt = goea.find_enrichment(study_ids)

    # ---------------------------------------------------------------------
    # Print results 3 ways: to screen, to tsv(tab-separated file), to xlsx(Excel spreadsheet)
    fout_tsv = "goea_fdr_bh.tsv"
    fout_xls = "goea_fdr_bh.xlsx"
   
    field_names = ['NS', 'study_cnt', 'fdr_bh', 'level', 'depth', 'GO', 'name', 'fdr_bh_sig'] # collect these
    print_names = ['NS', 'study_cnt', 'fdr_bh', 'level', 'depth', 'GO', 'name'] # print these in tsv and xlsx
    # Optional user customizable sort: 
github tanghaibao / goatools / tests / test_goea_errors.py View on Github external
def init_goea(**kws):
    """Initialize GODag and GOEnrichmentStudy."""
    godag = get_godag(os.path.join(os.getcwd(), "go-basic.obo"), loading_bar=None)
    fin_assc = ROOT + "association"
    assoc = read_associations(fin_assc, 'id2gos', no_top=True)
    popul_ids = [line.rstrip() for line in open(ROOT + "population")]
    methods = kws['methods'] if 'methods' in kws else ['not_bonferroni']
    study_ids = [line.rstrip() for line in open(ROOT + "study")]
    return GOEnrichmentStudy(popul_ids, assoc, godag, methods=methods), study_ids
github tanghaibao / goatools / tests / test_goea_rpt_various.py View on Github external
def init_goea(log):
    """Read Ontologies and Annotations once."""
    # ---------------------------------------------------------------------
    # Run Gene Ontology Analysis (GOEA)
    #
    # 1. Initialize
    obo_dag = GODag("go-basic.obo")
    assoc = read_associations("../data/association", no_top=True)
    popul_ids = [line.rstrip() for line in open("../data/population")]
    # 2. Run enrichment analysis
    goeaobj = GOEA(obo_dag, assoc, log)
    goeaobj.set_population(popul_ids)
    return goeaobj
github tanghaibao / goatools / tests / test_goea_local.py View on Github external
def get_goeaobj(methods=None):
    """Test GOEA with method, fdr."""
    obo_fin = os.path.join(REPO, "go-basic.obo")
    obo_dag = get_godag(obo_fin, loading_bar=None)
    fin_assc = "{REPO}/tests/data/small_association".format(REPO=REPO)
    assoc = read_associations(fin_assc, 'id2gos', no_top=True)
    popul_fin = "{REPO}/tests/data/small_population".format(REPO=REPO)
    popul_ids = [line.rstrip() for line in open(popul_fin)]
    goeaobj = GOEnrichmentStudy(popul_ids, assoc, obo_dag, methods=methods)
    return goeaobj
github tanghaibao / goatools / tests / test_combine_nt_lists.py View on Github external
def get_goea_results(method="fdr_bh"):
    """Get GOEA results."""
    root_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
    obo_fin = os.path.join(root_dir, "goslim_generic.obo")
    obo_dag = GODag(obo_fin)
    fin_slim = os.path.join(root_dir, "slim_association")
    assoc = read_associations(fin_slim, 'id2gos', no_top=True)
    popul_ids = [line.rstrip() for line in open(os.path.join(root_dir, "small_population"))]
    goeaobj = GOEnrichmentStudy(popul_ids, assoc, obo_dag, methods=[method])
    study_ids = [line.rstrip() for line in open(os.path.join(root_dir, "small_study"))]
    goea_results = goeaobj.run_study(study_ids, methods=[method])
    return goea_results
github tanghaibao / goatools / tests / test_goea_statsmodels.py View on Github external
def get_goeaobj(methods=None):
    """Test GOEA with method, fdr."""
    obo_dag = GODag(ROOT + "goslim_generic.obo")
    fin_assc = ROOT + "slim_association"
    assoc = read_associations(fin_assc, 'id2gos', no_top=True)
    popul_ids = [line.rstrip() for line in open(ROOT + "small_population")]
    goeaobj = GOEnrichmentStudy(popul_ids, assoc, obo_dag, methods=methods)
    return goeaobj
github tanghaibao / goatools / scripts / map_to_slim.py View on Github external
if opts.term not in go_dag:
            sys.stderr.write(("term %s not found!\n" % opts.term))
            sys.exit(1)
        direct_anc, all_anc = mapslim(opts.term, go_dag, goslim_dag)
        # output either all or only direct slims, depending on user command
        if only_direct:
            slim_terms_str = ";".join(direct_anc)
        else:
            slim_terms_str = ";".join(all_anc)
        print(slim_terms_str)

    # in case a association file is given as input
    if opts.ass_file_name:
        assert os.path.exists(opts.ass_file_name), ("file %s not found!"
                                                    % opts.ass_file_name)
        assocs = read_associations(opts.ass_file_name, 'id2gos')
        for protein_product, go_terms in assocs.items():
            all_direct_anc = set()
            all_covered_anc = set()
            all_all_anc = set()
            for go_term in go_terms:
                if go_term not in go_dag:
                    continue
                direct_anc, all_anc = mapslim(go_term, go_dag, goslim_dag)
                all_all_anc |= all_anc
                # collect all covered ancestors, so the direct ancestors
                # can be calculated afterwards
                all_covered_anc |= (all_anc - direct_anc)
            all_direct_anc = all_all_anc - all_covered_anc
            # output either all or only direct, depending on user command
            if only_direct:
                slim_terms_str = ";".join(all_direct_anc)