How to use the seaborn.pairplot function in seaborn

To help you get started, we’ve selected a few seaborn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tengge1 / ShadowEditor / test / tensorflow / basic / basic_regression.py View on Github external
dataset.isna().sum()

dataset = dataset.dropna()

origin = dataset.pop('Origin')

dataset['USA'] = (origin == 1)*1.0
dataset['Europe'] = (origin == 2)*1.0
dataset['Japan'] = (origin == 3)*1.0
dataset.tail()

train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)

sns.pairplot(
    train_dataset[["MPG", "Cylinders", "Displacement", "Weight"]], diag_kind="kde")


train_stats = train_dataset.describe()
train_stats.pop("MPG")
train_stats = train_stats.transpose()
train_stats

train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')


def norm(x):
  return (x - train_stats['mean']) / train_stats['std']
github ScottfreeLLC / AlphaPy / alphapy / plots.py View on Github external
https://seaborn.pydata.org/examples/scatterplot_matrix.html

    """

    logger.info("Generating Scatter Plot")

    # Get the feature subset

    features.append(target)
    df = df[features]

    # Generate the pair plot

    sns.set()
    sns_plot = sns.pairplot(df, hue=target)

    # Save the plot
    write_plot('seaborn', sns_plot, 'scatter_plot', tag, directory)
github rtqichen / ffjord / diagnostics / scrap_log.py View on Github external
def plot_pairplot(csv_filename, fig_filename, top=None):
    import seaborn as sns
    import pandas as pd

    sns.set(style="ticks", color_codes=True)
    quants = pd.read_csv(csv_filename)
    if top is not None:
        quants = quants[:top]

    g = sns.pairplot(quants, kind='reg', diag_kind='kde', markers='.')
    g.savefig(fig_filename)
github GaelVaroquaux / interpreting_ml_tuto / src / 01_how_well / 02_cross_validation.py View on Github external
filename)
exams = pd.read_csv('exams.csv.gz')

# Select data for students present all three years
continuing_students = exams.StudentID.value_counts()
continuing_students = continuing_students[continuing_students > 2].index
exams = exams[exams.StudentID.isin(continuing_students)]

###############################################################
# **Visualization**: Grade at tests in in exams depend on socio-economic
# status, year at school, ...
#
# The simplest way to do this is using seaborn's pairplot function.

import seaborn as sns
sns.pairplot(exams.drop(columns=['StudentID']))

###############################################################
# A more elaborate plot using density estimation gives better
# understanding of the dense regions:
g = sns.PairGrid(exams.drop(columns=['StudentID']),
                 diag_sharey=False)
g.map_lower(sns.kdeplot)
g.map_upper(plt.scatter, s=2)
g.map_diag(sns.kdeplot, lw=3)


###############################################################
# **Prediction**: Can we predict test grades in maths from demographics
# (ie, not from other grades)?

# A bit of feature engineering to get a numerical matrix (easily done
github amueller / dabl / dabl / plotting.py View on Github external
features = features.drop(target_col, axis=1)
    if features.shape[1] == 0:
        return
    top_for_interactions = 20
    features_imp = SimpleImputer().fit_transform(features)
    target = X[target_col]
    # FIXME if one class only has NaN for a value we crash! :-/
    # TODO univariate plot?
    # already on diagonal for pairplot but not for many features
    if features.shape[1] <= 5:
        # for n_dim <= 5 we do full pairplot plot
        # FIXME filling in missing values here b/c of a bug in seaborn
        # we really shouldn't be doing this
        # https://github.com/mwaskom/seaborn/issues/1699
        X_imp = X.fillna(features.median(axis=0))
        sns.pairplot(X_imp, vars=features.columns,
                     hue=target_col)
    else:
        # univariate plots
        show_top = _get_n_top(features, "continuous")
        f, p = f_classif(features_imp, target)
        top_k = np.argsort(f)[-show_top:][::-1]
        # FIXME this will fail if a feature is always
        # NaN for a particular class
        best_features = features.iloc[:, top_k].copy()

        best_features[target_col] = target
        df = best_features.melt(target_col)
        rows, cols = find_pretty_grid(show_top)
        g = sns.FacetGrid(df, col='variable', hue=target_col, col_wrap=cols,
                          sharey=False, sharex=False, hue_order=hue_order)
        g = g.map(sns.kdeplot, "value", shade=True)
github jeremyn / python-machine-learning-book / chapter_10.py View on Github external
def visualize_housing_data(df):
    sns.set(style='whitegrid', context='notebook')
    cols = ['LSTAT', 'INDUS', 'NOX', 'RM', 'MEDV']

    sns.pairplot(df[cols], size=2.5)

    plt.show()

    correlation_matrix = np.corrcoef(df[cols].values.T)
    sns.set(font_scale=1.5)
    heatmap = sns.heatmap(
        correlation_matrix,
        cbar=True,
        annot=True,
        square=True,
        fmt='.2f',
        annot_kws={'size': 15},
        yticklabels=cols,
        xticklabels=cols,
    )
github gionanide / Speech_Signal_Processing_and_Classification / classifiers / dimensionality_reduction / graph_spectral_analysis&spectral_clustering_default.py View on Github external
pC = (conf_matrix[1][1]/(conf_matrix[1][0]+conf_matrix[1][1]))*100

	#see the inside details of the classification
	print 'For class 0 man cases:',conf_matrix[0][0],'classified correctly and',conf_matrix[0][1],'missclassified,',hC,'accuracy \n'
	print 'For class 1 woman cases:',conf_matrix[1][1],'classified correctly and',conf_matrix[1][0],'missclassified,',pC,'accuracy\n'


	#plot the training features after the kpca and the lda procedure
	embedded_labels = pd.DataFrame({'Feature1': features_embedded[: ,0], 'Feature2': features_embedded[: ,1],'Label': labels})
	sns.pairplot(embedded_labels, hue='Label')
	#plt.savefig('kpca_trainset_parkinson_healthy.png')
	#plt.show()

	#plot the training features after the kpca and the lda procedure
	embedded_predicted_labels = pd.DataFrame({'Feature1': features_embedded[: ,0], 'Feature2': features_embedded[: ,1],'Label': predicted_labels})
	sns.pairplot(embedded_predicted_labels, hue='Label')
	#plt.savefig('kpca_trainset_parkinson_healthy.png')
	plt.show()
github probcomp / Venturecxx / examples / causal-inference / synth_data_generator.py View on Github external
df = pd.DataFrame({
        "a":a, 
        "b":b, 
        "c":c, 
        "d":d, 
        "e":e, 
        "f":f, 
        "g":g, 
        "h":h, 
        "i":i, 
        "j":j})
    df.to_csv("csv_files/causal_linear.csv", index=False)

    #Plotting
    sns.pairplot(data=pd.DataFrame({"a":a, "b":b}))
    plt.title("Data of Fig. 1, subplot (i)", fontsize=20, y=1.08, x=-0.2)
    sns.pairplot(data=pd.DataFrame({"c": c, "d":d}))
    plt.title("Data of Fig. 1, subplot(ii)", fontsize=20, y=1.08, x=-0.2)
    plt.figure()
    sns.pairplot(data=pd.DataFrame({"e":e, "f": f, "g":g}))
    plt.title("Data of Fig. 1, subplot (iii)", fontsize=20, y=2.28, x=-0.8)
    sns.pairplot(data=pd.DataFrame({"h":h, "i": i, "j":j}))
    plt.title("Data of Fig. 1, subplot (iv)", fontsize=20, y=2.28, x=-0.8)
github zmcddn / Data-Science-Helper / dshelper / plot / pair.py View on Github external
self.figure.clf()

        legend_labels = self.df[column_name].unique()
        legend_title = column_name

        df = prepare_data(self.df[self.available_columns])

        if str(self.df[column_name].dtype) == "object":
            # Update hue column for categorical data
            column_name += "_code"

        pub.sendMessage("LOG_MESSAGE", log_message="\nReady to plot...")

        try:
            # Produce pairpolot using seaborn
            pair_plot = sns.pairplot(
                df,
                hue=column_name,
                palette="deep",
                size=1.2,
                diag_kind="kde",
                diag_kws=dict(shade=True),
                plot_kws=dict(s=10),
            )

            # Get the number of rows and columns from the seaborn pairplot
            pp_rows = len(pair_plot.axes)
            pp_cols = len(pair_plot.axes[0])

            # Update axes to the corresponding number of subplots from pairplot
            self.axes = self.figure.subplots(pp_rows, pp_cols)
github neurodata / graspy / graspy / plot / plot.py View on Github external
df_labels = pd.DataFrame(labels, columns=[legend_name])
        df = pd.concat([df_labels, df], axis=1)

        names, counts = np.unique(labels, return_counts=True)
        if counts.min() < 2:
            diag_kind = "hist"
    plot_kws = dict(
        alpha=alpha,
        s=size,
        # edgecolor=None, # could add this latter
        linewidth=0,
        marker=marker,
    )
    with sns.plotting_context(context=context, font_scale=font_scale):
        if labels is not None:
            pairs = sns.pairplot(
                df,
                hue=legend_name,
                vars=variables,
                height=height,
                palette=palette,
                diag_kind=diag_kind,
                plot_kws=plot_kws,
            )
        else:
            pairs = sns.pairplot(
                df,
                vars=variables,
                height=height,
                palette=palette,
                diag_kind=diag_kind,
                plot_kws=plot_kws,