How to use the seaborn.clustermap function in seaborn

To help you get started, we’ve selected a few seaborn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github getcontacts / getcontacts / get_contact_fingerprints.py View on Github external
import seaborn as sns; 
    sns.set(color_codes=True)
    sns.set(font_scale=1.5)

    freq_matrix = np.array([freq_table[(r1, r2)] for (r1, r2) in freq_table])
    row_labels = [r1 + " - " + r2 for (r1, r2) in freq_table]
    pdframe = pd.DataFrame(freq_matrix, index=row_labels, columns=col_labels)

    # Scale down figsize if too large
    figsize = [pdframe.shape[1], pdframe.shape[0]]
    if figsize[1] > 320:
        figsize[0] *= 320 / figsize[1]
        figsize[1] *= 320 / figsize[1]

    # Create clustermap
    fingerprints = sns.clustermap(pdframe,
                                  figsize=figsize,
                                  annot=False,
                                  col_cluster=cluster_columns,
                                  linewidths=0.5,
                                  linecolor='black',
                                  cmap='Greens')

    # Remove color bar
    # fingerprints.cax.set_visible(False)

    import matplotlib.pyplot as plt
    plt.setp(fingerprints.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    plt.setp(fingerprints.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)

    fingerprints.savefig(out_file)
github davek44 / Basset / src / basset_motifs.py View on Github external
filter_seqs = np.transpose(filter_seqs)

    if drop_dead:
        filter_stds = filter_seqs.std(axis=1)
        filter_seqs = filter_seqs[filter_stds > 0]

    # downsample sequences
    seqs_i = np.random.randint(0, filter_seqs.shape[1], 500)

    hmin = np.percentile(filter_seqs[:,seqs_i], 0.1)
    hmax = np.percentile(filter_seqs[:,seqs_i], 99.9)

    sns.set(font_scale=0.3)

    plt.figure()
    sns.clustermap(filter_seqs[:,seqs_i], row_cluster=True, col_cluster=True, linewidths=0, xticklabels=False, vmin=hmin, vmax=hmax)
    plt.savefig(out_pdf)
    #out_png = out_pdf[:-2] + 'ng'
    #plt.savefig(out_png, dpi=300)
    plt.close()
github MrOlm / drep / build / lib / drep / d_analyze.py View on Github external
def plot_ANIn_cov_heatmap(Ndb):
    gs = []
    for Mcluster in Ndb['MASH_cluster'].unique():
        db = Ndb[Ndb['MASH_cluster'] == Mcluster].copy()
        if len(db['reference'].unique()) == 1:
            continue
        d = db.pivot("reference","querry","alignment_coverage")
        g = sns.clustermap(d,method=METHOD)
        g.fig.suptitle("MASH cluster {0} - Alignment Coverage".format(Mcluster))
        plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
        gs.append(g)
    return gs
github afrendeiro / toolkit / toolkit / atacseq.py View on Github external
# LOLA
        # take top n per PC
        import string
        lola_enrichments["set_id"] = lola_enrichments[
            ["collection", "description", "cellType", "tissue", "antibody", "treatment"]].astype(str).apply(string.join, axis=1)

        top = lola_enrichments.set_index('set_id').groupby("PC")['pValueLog'].nlargest(50)
        top_ids = top.index.get_level_values('set_id').unique()

        pivot = pd.pivot_table(
            lola_enrichments,
            index="set_id", columns="PC", values="pValueLog").fillna(0)
        pivot.index = pivot.index.str.replace(" nan", "").str.replace("blueprint blueprint", "blueprint").str.replace("None", "")
        top_ids = top_ids.str.replace(" nan", "").str.replace("blueprint blueprint", "blueprint").str.replace("None", "")

        g = sns.clustermap(
            pivot.ix[top_ids],
            cbar_kws={"label": "Enrichment: -log10(p-value)"}, cmap="Spectral_r",
            col_cluster=True)
        for tick in g.ax_heatmap.get_xticklabels():
            tick.set_rotation(90)
        for tick in g.ax_heatmap.get_yticklabels():
            tick.set_rotation(0)
        g.fig.savefig(os.path.join(self.results_dir, "PCA.PC_pvalues.lola_enrichments.svg"), bbox_inches="tight", dpi=300)

        g = sns.clustermap(
            pivot.ix[top_ids],
            cbar_kws={"label": "Enrichment: p-value z-score"},
            col_cluster=True, z_score=0)
        for tick in g.ax_heatmap.get_xticklabels():
            tick.set_rotation(90)
        for tick in g.ax_heatmap.get_yticklabels():
github audreyqyfu / LATE / scripts / weight_clustmap.py View on Github external
# exclude saved bias files
if (m == 1 or n == 1):
    raise Exception('Not matrix, but vector, so skipped')

print('matrix sample', arr[0:2, 0:2])
print('matrix shape:', arr.shape)

# exclude large matrix
m_max = 1000
n_max = 1000
if (m > m_max or n > n_max):
    print('matrix too large, down-sample to 1000 max each dim')
    arr = random_subset_arr(arr, m_max, n_max)

# seaborn clustering (the rows are rows, columns are columns in clustmap)
heatmap = sns.clustermap(arr, method='average', cmap="summer", robust=True)
heatmap.savefig(in_name+'.'+tag+'.png', bbox_inches='tight')
github afrendeiro / toolkit / ngs_toolkit / atacseq.py View on Github external
ratio = m.loc[sex_chroms[1]] - m.loc[sex_chroms[0]]
        ratio.name = "{}_to_{}_ratio".format(sex_chroms[1], sex_chroms[0])
        ratio.to_csv(
            os.path.join(output_dir, self.name + "." + output_prefix + ".csv"), header=True
        )

        if plot:
            ratio.sort_values(inplace=True)
            m = m.reindex(ratio.index, axis=1)

            # Clustermap
            if isinstance(ratio.index, pd.MultiIndex):
                cols = m.columns.get_level_values("sample_name")
            else:
                cols = m.columns
            grid = sns.clustermap(
                m.T,
                z_score=1,
                center=0,
                cmap="RdBu_r",
                figsize=(m.shape[0] * 0.3, m.shape[1] * 0.3),
                row_cluster=False,
                col_cluster=False,
                cbar_kws={"label": "Deviation from mean\nchromosome accessibility"},
                yticklabels=cols,
            )
            grid.ax_heatmap.set_xlabel("Chromosomes")
            grid.ax_heatmap.set_ylabel("Samples")
            savefig(
                grid, os.path.join(output_dir, self.name + "." + output_prefix + ".clustermap.svg")
            )
github dmnfarrell / pandastable / pandastable / plugins / mdanalysis.py View on Github external
def cluster_map(data, names):
    import seaborn as sns
    import pylab as plt
    data = data.ix[names]
    X = np.log(data).fillna(0)
    cg = sns.clustermap(X,cmap='RdYlBu',figsize=(8,9),lw=1,linecolor='gray')
    mt = plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
    cg.fig.subplots_adjust(right=.75)
    return cg
github audreyqyfu / LATE / scripts / trash / step1.mse.mtl.py View on Github external
def save_bottle_neck_representation():
    print("> save bottle-neck_representation")
    # todo: change variable name for each model
    code_bottle_neck_input = sess.run(e_a1, feed_dict={X: df.values, pIn_holder: 1, pHidden_holder: 1})
    np.save('pre_train/code_neck_valid.npy', code_bottle_neck_input)
    # todo: hclust, but seaborn not on server yet
    clustermap = sns.clustermap(code_bottle_neck_input)
    clustermap.savefig('./plots/bottle_neck.hclust.png')
github ATOMconsortium / AMPL / atomsci / ddm / pipeline / diversity_plots.py View on Github external
if out_dir is not None:
            dist_df.to_csv('%s/%s_mcs_dist_table.csv' % (out_dir, file_prefix), index=False)
            for k in range(10):
                mol_i = base_mols[dist_df.i.values[k]]
                mol_j = base_mols[dist_df.j.values[k]]
                img_file_i = '%s/%d_%s.png' % (out_dir, k, compound_ids[dist_df.i.values[k]])
                img_file_j = '%s/%d_%s.png' % (out_dir, k, compound_ids[dist_df.j.values[k]])
                Draw.MolToFile(mol_i, img_file_i, size=(500,500), fitImage=False)
                Draw.MolToFile(mol_j, img_file_j, size=(500,500), fitImage=False)
    
        mcs_linkage = linkage(mcs_dist, method='complete')
        mcs_df = pd.DataFrame(mcs_dist, columns=compound_ids, index=compound_ids)
        if out_dir is not None:
            pdf_path = '%s/%s_mcs_clustermap.pdf' % (out_dir, file_prefix)
            pdf = PdfPages(pdf_path)
        g = sns.clustermap(mcs_df, row_linkage=mcs_linkage, col_linkage=mcs_linkage, figsize=(12,12), cmap='plasma')
        if out_dir is not None:
            pdf.savefig(g.fig)
            pdf.close()
    
        # Draw a UMAP projection based on MCS distance
        mapper = umap.UMAP(n_neighbors=20, min_dist=0.1, n_components=2, metric='precomputed', random_state=17)
        reps = mapper.fit_transform(mcs_dist)
        rep_df = pd.DataFrame.from_records(reps, columns=['x', 'y'])
        rep_df['compound_id'] = compound_ids
        if out_dir is not None:
            pdf_path = '%s/%s_mcs_umap_proj.pdf' % (out_dir, file_prefix)
            pdf = PdfPages(pdf_path)
        fig, ax = plt.subplots(figsize=(12,12))
        if responses is None:
            sns.scatterplot(x='x', y='y', data=rep_df, ax=ax)
        else:
github theislab / scanpy / scanpy / plotting / _anndata.py View on Github external
sanitize_anndata(adata)
    if use_raw is None and adata.raw is not None:
        use_raw = True
    X = adata.raw.X if use_raw else adata.X
    if issparse(X):
        X = X.toarray()
    df = pd.DataFrame(X, index=adata.obs_names, columns=adata.var_names)
    if obs_keys is not None:
        row_colors = adata.obs[obs_keys]
        _utils.add_colors_for_categorical_sample_annotation(adata, obs_keys)
        # do this more efficiently... just a quick solution
        lut = dict(
            zip(row_colors.cat.categories, adata.uns[obs_keys + '_colors'])
        )
        row_colors = adata.obs[obs_keys].map(lut)
        g = sns.clustermap(df, row_colors=row_colors.values, **kwds)
    else:
        g = sns.clustermap(df, **kwds)
    show = settings.autoshow if show is None else show
    _utils.savefig_or_show('clustermap', show=show, save=save)
    if show:
        pl.show()
    else:
        return g