How to use the statsmodels.sandbox.stats.multicomp.multipletests function in statsmodels

To help you get started, we’ve selected a few statsmodels examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ratschlab / RiboDiff / src / ribodiff / testcnt.py View on Github external
method = 'bonferroni'
    elif opts.multiTest == 'Holm':
        method = 'holm'
    elif opts.multiTest == 'Hochberg':
        method = 'simes-hochberg'
    elif opts.multiTest == 'Hommel':
        method = 'hommel'
    elif opts.multiTest == 'BY':
        method = 'fdr_by'
    elif opts.multiTest == 'TSBH':
        method = 'tsbh'
    else:
        sys.stderr.write('ERROR: The methods for multiple test correction can only accept \'Bonferroni\', \'Holm\', \'Hochberg\', \'Hommel\', \'BH\', \'BY\' or \'TSBH\' as its input.\n')
        sys.exit()

    mtc = sms.stats.multicomp.multipletests(pval[idx], alpha=0.1, method=method, returnsorted=False)

    padj = pval.copy()
    padj[idx] = mtc[1]
    data.padj = padj

    return data
github geronimp / enrichM / bin / enrichment.py View on Github external
def correct_multi_test(self, pvalues):

        mtc_dict = {'fdr_bh':'Benjamini/Hochberg (non-negative)'}

        logging.info('Applying multi-test correction using the %s method' % (mtc_dict[self.multi_test_correction]) )
        corrected_pvals \
            = sm.multipletests(pvalues,
                               alpha        = self.threshold,
                               method       = self.multi_test_correction,
                               returnsorted = False,
                               is_sorted    = False)[1]        
        return corrected_pvals
github CoBiG2 / RAD_Tools / vcftools_stats.py View on Github external
het_deficit = []
    het_excess = []

    with open(f) as fh:
        #Skip header
        next(fh)

        for line in fh:
            fields = line.strip().split()
            snp_pos.append((fields[0], fields[1]))
            pvals.append(float(fields[5]))
            het_deficit.append(float(fields[6]))
            het_excess.append(float(fields[7]))

    fdr_bool_list, fdr_pvalue_list, alpha_S, alpha_B = \
        multi_correction.multipletests(pvals, alpha=float(alpha),
                                       method="fdr_bh")

    snp_pvals = OrderedDict()
    for pos, pval in zip(snp_pos, fdr_pvalue_list):
        snp_pvals["-".join(pos)] = pval

    with open(vcf_file) as vcf_fh, open(vcf_outfile, "w") as ofh:
        for line in vcf_file:
            if line.startswith("#"):
                ofh.write(line)
            elif line.strip() != "":
                fields = line.split()
                # Check pval for locus
                pos = "-".join(fields[0], fields[1])
                if snp_pvals[pos] <= 0.05:
                    ofh.write(line)
github Xinglab / CLAM / CLAM / bak / CLAM.fdr_peak.MP.py View on Github external
continue
		else:
			lefter=0
			for j in range(int(h), max(rand_heights_dist)+1):
				lefter+=rand_heights_dist[j]
			height_to_pval[h]=lefter/float(rand_sum)
	pval_list=[]
	for i in obs_heights_count:
		if i<1:
			continue
		pval_list.append(height_to_pval[i])
	if len(pval_list)<=1:
		return []

	if correction_method==2:
		qval_list=multipletests(pval_list, method='fdr_bh')[1]
	else:
		qval_list=[min(x*(len(set([int(y) for y in height_to_pval if y!=0]))), 1.0) for x in pval_list]
	
	ind=0
	last_height=0
	for j in range(len(obs_heights_count)):
		this_height=obs_heights_count[j]
		if this_height<1:
			last_height=0
			continue
		if qval_list[ind] <= pval_cutoff:
			if this_height==last_height:
				chr, last_start, last_end, last_strand, last_height, last_qval=tid_to_qval[-1]
				tid_to_qval[-1]=[chr, last_start, tstart+j+1, strand, last_height, last_qval]
			else:
				tid_to_qval.append([chr, tstart+j, tstart+j+1, strand, obs_heights_count[j], qval_list[ind]])  # chr, start, end, strand, height, this_qval
github neurostuff / NiMARE / nimare / decode / discrete.py View on Github external
chi2_fi = one_way(n_selected_term, n_term)
    p_fi = special.chdtrc(1, chi2_fi)
    sign_fi = np.sign(n_selected_term - np.mean(n_selected_term)).ravel()  # pylint: disable=no-member

    # Two-way chi-square test for specificity of activation
    cells = np.array([[n_selected_term, n_selected_noterm],  # pylint: disable=no-member
                      [n_unselected_term, n_unselected_noterm]]).T
    chi2_ri = two_way(cells)
    p_ri = special.chdtrc(1, chi2_ri)
    sign_ri = np.sign(p_selected_g_term - p_selected_g_noterm).ravel()  # pylint: disable=no-member

    # Multiple comparisons correction across terms. Separately done for FI and RI.
    if correction is not None:
        _, p_corr_fi, _, _ = multipletests(p_fi, alpha=u, method=correction,
                                           returnsorted=False)
        _, p_corr_ri, _, _ = multipletests(p_ri, alpha=u, method=correction,
                                           returnsorted=False)
    else:
        p_corr_fi = p_fi
        p_corr_ri = p_ri

    # Compute z-values
    z_corr_fi = p_to_z(p_corr_fi, 'two') * sign_fi
    z_corr_ri = p_to_z(p_corr_ri, 'two') * sign_ri

    # Effect size
    # est. prob. of brain state described by term finding activation in ROI
    p_selected_g_term_g_prior = prior * p_selected_g_term + (1 - prior) * p_selected_g_noterm

    # est. prob. of activation in ROI reflecting brain state described by term
    p_term_g_selected_g_prior = p_selected_g_term * prior / p_selected_g_term_g_prior
github IBT-FMI / SAMRI / samri / plotting / summary.py View on Github external
brain_mask = path.abspath(path.expanduser(brain_mask))
	try:
		img = nib.load(p_file)
		brain_mask = nib.load(brain_mask)
	except (FileNotFoundError, nib.py3k.FileNotFoundError):
		return None,None,None,None,None
	data = img.get_data()
	brain_mask = brain_mask.get_data()
	header = img.header
	affine = img.affine
	shape = data.shape
	data = data.flatten()
	brain_mask = brain_mask.flatten()
	brain_mask = brain_mask.astype(bool)
	brain_data = data[brain_mask]
	reject, nonzero_data, _, _ = multipletests(brain_data, p_level, method="fdr_bh")
	brain_mask[brain_mask]=reject
	brain_mask = brain_mask.astype(int)
	mask = brain_mask.reshape(shape)
	mask_map = nib.Nifti1Image(mask, affine, header)
	masker = NiftiMasker(mask_img=mask_map)
	try:
		timecourse = masker.fit_transform(ts_file).T
		betas = masker.fit_transform(beta_file).T
	except ValueError:
		return None,None,None,None,None
	subplot_title = "\n ".join([str(substitution["subject"]),str(substitution["session"])])
	timecourse = np.mean(timecourse, axis=0)
	design = pd.read_csv(design_file, skiprows=5, sep="\t", header=None, index_col=False)
	design = design*np.mean(betas)
	event_df = pd.read_csv(event_file, sep="\t")
github afrendeiro / toolkit / ngs_toolkit / general.py View on Github external
# calculate p-value from Fisher's exact test
    intersections['a'] = total - intersections[['size1', 'size2', 'intersection']].sum(axis=1)
    intersections['b'] = intersections['size1'] - intersections['intersection']
    intersections['c'] = intersections['size2'] - intersections['intersection']
    intersections['d'] = intersections['intersection']

    for i, row in intersections[['d', 'b', 'c', 'a']].astype(int).iterrows():
        odds, p = fisher_exact(
            row
            .values
            .reshape((2, 2)),
            alternative="greater")
        intersections.loc[i, 'odds_ratio'] = odds
        intersections.loc[i, 'p_value'] = p
    # intersections['q_value'] = intersections['p_value'] * intersections.shape[0]
    intersections['q_value'] = multipletests(intersections['p_value'])[1]
    intersections['log_p_value'] = (-np.log10(intersections['p_value'])).fillna(0).replace(np.inf, 300)
    intersections['log_q_value'] = (-np.log10(intersections['q_value'])).fillna(0).replace(np.inf, 300)

    # save
    intersections.to_csv(os.path.join(output_dir, output_prefix + ".differential_overlap.csv"), index=False)
    intersections = pd.read_csv(os.path.join(output_dir, output_prefix + ".differential_overlap.csv"))

    for metric, label, description, fill_value in [
            ("intersection_max_perc", "percentage_overlap", "max of intersection %", 0),
            ("log_p_value", "significance", "p-value", 0)]:
        print(metric)
        # make pivot tables
        piv_up = pd.pivot_table(
            intersections[(intersections['dir1'] == "up") & (intersections['dir2'] == "up")],
            index="group1", columns="group2", values=metric).fillna(fill_value)
        piv_down = pd.pivot_table(
github ambrosejcarr / seqc / src / seqc / stats / resampled_nonparametric.py View on Github external
print('sampling %d cells (with replacement) per iteration' % n_cell)
        print('sampling %d molecules per cell' % v)

    with closing(Pool()) as pool:
        results = pool.map(sampling_function, repeat(norm_data, n_iter))

    results = np.stack(results)  # u, z, p

    ci = confidence_interval(results[:, :, 1])
    results = pd.DataFrame(
        data=np.concatenate([np.median(results, axis=0), ci], axis=1),
        index=labels,
        columns=['U', 'z_approx', 'p', 'z_lo', 'z_hi'])

    # add multiple-testing correction
    results['q'] = multipletests(results['p'], alpha=alpha, method='fdr_tsbh')[1]

    # remove low-value genes whose median sampling value is -inf
    neginf = np.isneginf(results['z_approx'])
    results.ix[neginf, 'z_lo'] = np.nan
    results.ix[neginf, 'z_approx'] = 0
    results.ix[neginf, ['p', 'q']] = 1.

    results = results[['U', 'z_approx', 'z_lo', 'z_hi', 'p', 'q']].sort_values('q')
    results.iloc[:, 1:4] = np.round(results.iloc[:, 1:4], 2)

    return results
github selective-inference / Python-software / docs / examples / power_comparison.py View on Github external
def BH(pvalues, active_var, s, q=0.2):
    decisions = multipletests(pvalues, alpha=q, method="fdr_bh")[0]
    TP = decisions[active_var].sum()
    FDP = np.true_divide(decisions.sum() - TP, max(decisions.sum(), 1))
    power = np.true_divide(TP, s)
    total_rejections = decisions.sum()
    false_rejections = total_rejections - TP
    return FDP, power, total_rejections, false_rejections
github limix / struct-lmm / struct_lmm / _fdr.py View on Github external
def fdr_bh(pv):
    import numpy as np

    from statsmodels.sandbox.stats.multicomp import multipletests

    return multipletests(np.asarray(pv), method="fdr_bh")[1]