Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_width_ratios_matrix(self):
msno.matrix(self.simple_df, width_ratios=(30, 1))
return plt.gcf()
def test_no_sparkline_matrix(self):
msno.matrix(self.simple_df, sparkline=False)
return plt.gcf()
def test_simple_matrix(self):
msno.matrix(self.simple_df)
return plt.gcf()
def test_freq_matrix(self):
msno.matrix(self.freq_df, freq='BQ')
return plt.gcf()
def test_fontsize_matrix(self):
msno.matrix(self.simple_df, fontsize=8)
return plt.gcf()
def test_large_matrix(self):
msno.matrix(self.large_df)
return plt.gcf()
def plot_md_locations(data, **kwargs):
"""Plot the locations where data is missing within a DataFrame.
Args:
data (pd.DataFrame): DataFrame to plot.
**kwargs: Keyword arguments for plot. Passed to missingno.matrix.
Returns:
matplotlib.axes._subplots.AxesSubplot: missingness location plot.
Raises:
TypeError: if data is not a DataFrame. Error raised through decorator.
"""
_default_plot_args(**kwargs)
msno.matrix(data, **kwargs)
sleep1 = pd.read_csv('data/sleep.csv')
sleep1.head()
sleep = sleep1.copy()
sns.heatmap(sleep.isnull(), cbar=False)
#NonD, Dream, Sleep, Span, Gest have missing values
sleep.isna().sum()
#
# pip install missingno
import missingno as msno
msno.matrix(sleep)
#In addition to the heatmap, there is a bar on the right side of this diagram. This is a line plot for each row's data completeness.
msno.heatmap(sleep)
#missingno.heatmap visualizes the correlation matrix about the locations of missing values in columns.
#%%
dataset = sleep.copy()
total = dataset.isnull().sum().sort_values(ascending=False)
percent = (dataset.isnull().sum()/dataset.isnull().count()).sort_values( ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
f, ax = plt.subplots(figsize=(15, 6))
plt.xticks(rotation='90')
sns.barplot(x=missing_data.index, y=missing_data['Percent'])
plt.xlabel('Features', fontsize=15)
plt.ylabel('Percent of missing values', fontsize=15)
plt.title('Percent missing data by feature', fontsize=15)
missing_data.head()