How to use the goatools.utils.get_b2aset function in goatools

To help you get started, we’ve selected a few goatools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tanghaibao / goatools / tests / test_yang_fig1b.py View on Github external
name2go['B']:  1,
        name2go['C']: 10,
        name2go['D']: 10,
        name2go['E']: 10,
        name2go['F']: 10,
        name2go['G']: 10,
        name2go['H']: 10,
        name2go['I']: 18,
    }
    go2genes = cx.defaultdict(set)
    genenum = 0
    for goid, qty in id2num.items():
        for _ in range(qty):
            go2genes[goid].add(genenum)
            genenum += 1
    id2gos = get_b2aset(go2genes)
    IdToGosReader.wr_id2gos(file_id2gos, id2gos)
    return id2gos
github tanghaibao / goatools / tests / test_assc_stats.py View on Github external
def describe_assc(org, fin_assc, go2obj, obj, prt):
    """Report statistics for a single association."""
    # Assc.       | # Assc| range      | 25th | median | 75th | mean | stddev
    # ------------|-------|------------|------|--------|------|------|-------
    # hsa GO/gene | 19394 | 1 to   212 |    5 |      9 |   17 |   13 |     14
    # hsa gene/GO | 17277 | 1 to 8,897 |    1 |      3 |    8 |   15 |    120
    #
    # mus GO/gene | 19870 | 1 to   261 |    5 |     10 |   18 |   14 |     15
    # mus gene/GO | 17491 | 1 to 7,009 |    1 |      3 |    8 |   16 |    129
    #
    # dme GO/gene | 12551 | 1 to   137 |    2 |      4 |    8 |    6 |      7
    # dme gene/GO |  7878 | 1 to 1,675 |    1 |      3 |    7 |   10 |     41
    gene2gos = dnld_assc(fin_assc, go2obj, prt=None) # Associations
    go2genes = get_b2aset(gene2gos)
    assert gene2gos
    assert go2genes
    cnts_gos_p_gene = [len(gos) for gos in gene2gos.values()]
    cnts_genes_p_go = [len(genes) for genes in go2genes.values()]
    obj.prt_data("{ORG} GO/gene".format(ORG=org), cnts_gos_p_gene, prt)
    obj.prt_data("{ORG} gene/GO".format(ORG=org), cnts_genes_p_go, prt)
github tanghaibao / goatools / tests / test_yang_fig1a.py View on Github external
name2go['E']: 10,
        name2go['F']: 10,
        name2go['G']: 10,
        name2go['H']: 10,
        name2go['I']: 30,
        name2go['L']: 30,
        name2go['M']: 20,
        name2go['N']: 30,
    }
    go2genes = cx.defaultdict(set)
    genenum = 0
    for goid, qty in id2num.items():
        for _ in range(qty):
            go2genes[goid].add(genenum)
            genenum += 1
    id2gos = get_b2aset(go2genes)
    IdToGosReader.wr_id2gos(file_id2gos, id2gos)
    return id2gos
github tanghaibao / goatools / tests / test_yang_fig2a.py View on Github external
def _get_id2gos(file_id2gos, godag, name2go, name2num):
    """Get annotations"""
    if os.path.exists(file_id2gos):
        return IdToGosReader(file_id2gos, godag=godag).get_id2gos('CC')
    go2genes = cx.defaultdict(set)
    genenum = 0
    for name, qty in name2num.items():
        goid = name2go[name]
        for _ in range(qty):
            go2genes[goid].add(genenum)
            genenum += 1
    id2gos = get_b2aset(go2genes)
    IdToGosReader.wr_id2gos(file_id2gos, id2gos)
    return id2gos
github tanghaibao / goatools / goatools / associations.py View on Github external
def get_assc_pruned(assc_geneid2gos, min_genecnt=None, max_genecnt=None, prt=sys.stdout):
    """Remove GO IDs associated with large numbers of genes. Used in stochastic simulations."""
    # DEFN WAS: get_assc_pruned(assc_geneid2gos, max_genecnt=None, prt=sys.stdout):
    #      ADDED min_genecnt argument and functionality
    if max_genecnt is None and min_genecnt is None:
        return assc_geneid2gos, set()
    go2genes_orig = utils_get_b2aset(assc_geneid2gos)
    # go2genes_prun = {go:gs for go, gs in go2genes_orig.items() if len(gs) <= max_genecnt}
    go2genes_prun = {}
    for goid, genes in go2genes_orig.items():
        num_genes = len(genes)
        if (min_genecnt is None or num_genes >= min_genecnt) and \
           (max_genecnt is None or num_genes <= max_genecnt):
            go2genes_prun[goid] = genes
    num_was = len(go2genes_orig)
    num_now = len(go2genes_prun)
    gos_rm = set(go2genes_orig.keys()).difference(set(go2genes_prun.keys()))
    assert num_was-num_now == len(gos_rm)
    if prt is not None:
        if min_genecnt is None:
            min_genecnt = 1
        if max_genecnt is None:
            max_genecnt = "Max"
github tanghaibao / goatools / goatools / grouper / aart_geneproducts_one.py View on Github external
def get_go2chrs(sec2gos, sec2chr):
        """Dict: given a GO return a set of letters representing it's section membership(s)."""
        go2chrs = {}
        for goid, sections in get_b2aset(sec2gos).items():
            go2chrs[goid] = set(sec2chr[s] for s in sections)
        return go2chrs
github tanghaibao / goatools / goatools / associations.py View on Github external
num_genes = len(genes)
        if (min_genecnt is None or num_genes >= min_genecnt) and \
           (max_genecnt is None or num_genes <= max_genecnt):
            go2genes_prun[goid] = genes
    num_was = len(go2genes_orig)
    num_now = len(go2genes_prun)
    gos_rm = set(go2genes_orig.keys()).difference(set(go2genes_prun.keys()))
    assert num_was-num_now == len(gos_rm)
    if prt is not None:
        if min_genecnt is None:
            min_genecnt = 1
        if max_genecnt is None:
            max_genecnt = "Max"
        prt.write("{N:4} GO IDs pruned. Kept {NOW} GOs assc w/({m} to {M} genes)\n".format(
            m=min_genecnt, M=max_genecnt, N=num_was-num_now, NOW=num_now))
    return utils_get_b2aset(go2genes_prun), gos_rm