How to use the hist.Hist function in hist

To help you get started, we’ve selected a few hist examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github psathyrella / partis / python / lbplotting.py View on Github external
def plot_bcr_phylo_target_attraction(plotdir, event):  # plots of which sequences are going toward which targets
    fig, ax = plotting.mpl_init()

    # affinity vs stuff:
    # xvals = [1. / af for line in mutated_events for af in line['affinities']]
    # yvals = [nm for line in mutated_events for nm in line['n_mutations']]

    # # min distance to target:
    # yvals = [hd for line in mutated_events for hd in get_min_target_hdists(line['input_seqs'], line['target_seqs'])]
    # ax.scatter(xvals, yvals, alpha=0.65)

    hist = Hist(len(event['target_seqs']), -0.5, len(event['target_seqs']) - 0.5, value_list=event['nearest_target_indices'])
    hist.mpl_plot(ax, alpha=0.7, ignore_overflows=True)

    plotname = 'nearest-target-identities'
    plotting.mpl_finish(ax, plotdir, plotname, xlabel='index (identity) of nearest target sequence', ylabel='counts') #, xbounds=(minfrac*xmin, maxfrac*xmax), ybounds=(-0.05, 1.05), log='x', xticks=xticks, xticklabels=[('%d' % x) for x in xticks], leg_loc=(0.8, 0.55 + 0.05*(4 - len(plotvals))), leg_title=leg_title, title=title)
github psathyrella / partis / python / compareutils.py View on Github external
elif jk2 in cachevals:
            return jk2
        else:
            return None

    hstyles = ['plain']  #, 'zoom']  # 'zoom-logy', 
    if metric == 'logprob':
        nbins, xmin, xmax = 40, -55, 70
    elif metric == 'naive_hfrac':
        nbins, xmin, xmax = 70, 0., 0.65
    else:
        assert False
    hists = OrderedDict()
    htypes = ['nearest-clones', 'farthest-clones', 'all-clones', 'not']
    for ht in htypes:
        hists[ht] = Hist(nbins, xmin, xmax)
    bigvals, smallvals = {}, {}
    for key_a, key_b in combinations:  #  is colon-separated string (not a list of keys)
        a_ids, b_ids = key_a.split(':'), key_b.split(':')
        # if not utils.from_same_event(reco_info, a_ids) or not utils.from_same_event(reco_info, b_ids):  # skip clusters that were erroneously merged -- i.e., in effect, assume the previous step didn't screw up at all
        #     raise Exception('woop')
        jk = get_joint_key(key_a, key_b)
        if jk is None:  # if we don't have the joint logprob cached
            continue

        if metric == 'logprob':
            # jk = get_joint_key(key_a, key_b)
            # if jk is None:  # if we don't have the joint logprob cached
            #     continue
            lratio = cachevals[jk] - cachevals[key_a] - cachevals[key_b]
            # print '%f - %f - %f = %f' % (cachevals[jk], cachevals[key_a], cachevals[key_b], lratio),
            mval = lratio
github psathyrella / partis / python / lbplotting.py View on Github external
def make_hist(plotvals, n_total, n_skipped, iclust=None, affinities=None):
        if len(plotvals) == 0:
            return
        hist = Hist(30, min(plotvals), max(plotvals), value_list=plotvals)
        fig, ax = plotting.mpl_init()
        hist.mpl_plot(ax) #, square_bins=True, errors=False)
        # fig.text(0.7, 0.8, 'mean %.3f' % numpy.mean(plotvals), fontsize=15)
        # fig.text(0.7, 0.75, 'max %.3f' % max(plotvals), fontsize=15)
        # if affinities is not None:
        #     fig.text(0.38, 0.88, 'mean/max affinity: %.4f/%.4f' % (numpy.mean(affinities), max(affinities)), fontsize=15)
        plotname = '%s-%s' % (lb_metric, str(iclust) if iclust is not None else 'all-clusters')
        leafskipstr = ', skipped %d leaves' % n_skipped if n_skipped > 0 else ''  # ok they're not necessarily leaves, but almost all of them are leaves (and not really sure how a non-leaf could get zero, but some of them seem to)
        title = '%s %s: %d entries%s (%s)' % ('true' if is_true_line else 'inferred', mtitlestr('per-seq', lb_metric, short=True), n_total, leafskipstr, 'all clusters' if iclust is None else 'iclust %d'%iclust)
        fn = plotting.mpl_finish(ax, plotdir, plotname, xlabel=mtitlestr('per-seq', lb_metric), log='y', ylabel='counts', title=title)
        if iclust is None or (iclust_fnames is not None and iclust < iclust_fnames):
            add_fn(fnames, fn=fn)
github psathyrella / partis / bin / compare-plotdirs.py View on Github external
def get_hists_from_dir(dirname, histname, string_to_ignore=None):
    hists = {}
    for fname in glob.glob(dirname + '/*.csv'):
        varname = os.path.basename(fname).replace('.csv', '')
        if string_to_ignore is not None:
            varname = varname.replace(string_to_ignore, '')
        hists[varname] = Hist(fname=fname, title=histname)
    if len(hists) == 0:
        print '    no csvs found in %s' % dirname
    return hists
github psathyrella / partis / python / plotting.py View on Github external
xmin, xmax = None, None
        hists, labels = [], []
        for ihist in range(len(numpyhists)):
            nphist = numpyhists[ihist]  # numpy.hist is two arrays: [0] is bin counts, [1] is bin x values (not sure if low, high, or centers)
            obs_time = ihist  #  + 1  # I *think* it's right without the 1 (although I guess it's really a little arbitrary)
            if not plot_this_time(obs_time, numpyhists):
                continue
            if nphist is None:  # time points at which we didn't sample
                hists.append(None)
                labels.append(None)
                continue
            bin_contents, bin_edges = nphist
            assert len(bin_contents) == len(bin_edges) - 1
            # print ' '.join('%5.1f' % c for c in bin_contents)
            # print ' '.join('%5.1f' % c for c in bin_edges)
            hist = Hist(len(bin_edges) - 1, bin_edges[0], bin_edges[-1])
            for ibin in range(len(bin_edges) - 1):  # nphist indexing, not Hist indexing
                lo_edge = bin_edges[ibin]
                hi_edge = bin_edges[ibin + 1]
                bin_center = (hi_edge + lo_edge) / 2.
                for _ in range(bin_contents[ibin]):
                    hist.fill(bin_center)
                    xmin = lo_edge if xmin is None else min(xmin, lo_edge)
                    xmax = hi_edge if xmax is None else max(xmax, hi_edge)
            hists.append(hist)
            labels.append('%d (%.1f)' % (obs_time, hist.get_mean()))

        # hists = [Hist(1, xmin, xmax) if h is None else h for h in hists]  # replace the None values with empty hists
        hists, labels = zip(*[(h, l) for h, l in zip(hists, labels) if h is not None])
        return hists, labels, xmin, xmax
github psathyrella / partis / python / allelefinder.py View on Github external
self.min_min_candidate_ratio_to_plot = 1.5  # don't plot positions that're below this (for all )

        self.n_warn_alleles_per_gene = 2

        self.default_slope_bounds = (-0.1, 1.)  # fitting function needs some reasonable bounds from which to start (I could at some point make slope part of the criteria for candidacy, but it wouldn't add much sensitivity)
        self.unbounded_y_icpt_bounds = (-1., 1.5)

        self.seq_info = {}

        self.counts, self.fitfos = {}, {}
        self.inferred_allele_info = []  # new alleles with respect to the template genes from which we actually inferred them, for usage internal to allelefinder
        self.new_allele_info = []  #  new alleles with respect to original template genes, for external use (distinction is important if we infer a new allele from another previously-inferred new allele)
        self.positions_to_plot = {}
        self.n_seqs_too_highly_mutated = {}  # sequences (per-gene) that had more than  mutations
        self.gene_obs_counts = {}  # NOTE same as n_clonal_representatives
        self.overall_mute_counts = Hist(self.args.n_max_mutations_per_segment - 1, 0.5, self.args.n_max_mutations_per_segment - 0.5)  # i.e. 0th (underflow) bin corresponds to zero mutations
        self.per_gene_mute_counts = {}  # crappy name -- this is the denominator for each position in . Which is usually the same for most positions, but it's cleaner to keep it separate than choose one of 'em.
        self.n_big_del_skipped = {s : {} for s in self.n_bases_to_exclude}

        self.n_fits = 0

        self.default_initial_glfo = None
        if self.args.default_initial_germline_dir is not None:  # if this is set, we want to take any new allele names from this directory's glfo if they're in there
            self.default_initial_glfo = glutils.read_glfo(self.args.default_initial_germline_dir, glfo['locus'])

        self.n_excluded_clonal_queries = {}
        self.n_clones = {}
        self.n_clonal_representatives = {}  # NOTE same as gene_obs_counts

        self.finalized = False

        self.reflengths = {}
github psathyrella / partis / python / allelefinder.py View on Github external
def init_gene(self, gene):
        self.counts[gene] = {}
        for igl in range(len(self.glfo['seqs'][utils.get_region(gene)][gene])):
            self.counts[gene][igl] = {}
            for istart in range(self.args.n_max_mutations_per_segment + 1):  # istart and n_mutes are equivalent
                self.counts[gene][igl][istart] = {n : 0 for n in ['muted', 'total'] + utils.nukes}
        self.gene_obs_counts[gene] = 0
        self.per_gene_mute_counts[gene] = Hist(self.args.n_max_mutations_per_segment - 1, 0.5, self.args.n_max_mutations_per_segment - 0.5)  # i.e. 0th (underflow) bin corresponds to zero mutations
        for side in self.n_big_del_skipped:
            self.n_big_del_skipped[side][gene] = 0
        self.n_seqs_too_highly_mutated[gene] = 0
        self.n_excluded_clonal_queries[gene] = 0
        self.n_clones[gene] = 0
        self.n_clonal_representatives[gene] = 0
github psathyrella / partis / python / treegenerator.py View on Github external
def get_mute_hist(self, mtype):
        if self.args.mutate_from_scratch:
            mean_mute_val = self.args.scratch_mute_freq
            if self.args.same_mute_freq_for_all_seqs:
                hist = Hist(1, mean_mute_val - utils.eps, mean_mute_val + utils.eps)
                hist.fill(mean_mute_val)
            else:
                n_entries = 500
                length_vals = [v for v in numpy.random.exponential(mean_mute_val, n_entries)]  # count doesn't work on numpy.ndarray objects
                max_val = 0.8  # this is arbitrary, but you shouldn't be calling this with anything that gets a significant number anywhere near there, anyway
                if length_vals.count(max_val):
                    print '%s lots of really high mutation rates treegenerator::get_mute_hist()' % utils.color('yellow', 'warning')
                length_vals = [min(v, max_val) for v in length_vals]
                hist = Hist(30, 0., max_val)
                for val in length_vals:
                    hist.fill(val)
                hist.normalize()
        else:
            hist = Hist(fname=self.parameter_dir + '/' + mtype + '-mean-mute-freqs.csv')

        return hist
github psathyrella / partis / bin / compare-plotdirs.py View on Github external
def compare_directories(args, plotdirlist, outdir):
    utils.prep_dir(outdir, wildlings=['*.png', '*.svg', '*.csv'])

    # read hists from 
    allhists = OrderedDict()
    allvars = set()  # all variables that appeared in any dir
    for idir in range(len(plotdirlist)):
        dirhists = get_hists_from_dir(plotdirlist[idir], args.names[idir])
        allvars |= set(dirhists.keys())
        allhists[args.names[idir]] = dirhists
    # then loop over all the s we found
    for varname in allvars:
        hlist = [allhists[dname].get(varname, Hist(1, 0, 1, title='null')) for dname in allhists]
        plot_single_variable(args, varname, hlist, outdir, pathnameclues=plotdirlist[0])

    plotting.make_html(outdir, n_columns=4)

hist

Hist classes and utilities

BSD-3-Clause
Latest version published 4 months ago

Package Health Score

78 / 100
Full package analysis