How to use the pingouin.utils.remove_na function in pingouin

To help you get started, we’ve selected a few pingouin examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github raphaelvallat / pingouin / pingouin / multivariate.py View on Github external
x = x[~np.isnan(x).any(axis=1)]
    else:
        nx, kx = x.shape
        y = np.asarray(Y)
        if y.ndim == 1:
            # One sample with specified null
            assert y.size == kx
        elif y.ndim == 2:
            # Two-sample
            err = 'X and Y must have the same number of features (= columns).'
            assert y.shape[1] == kx, err
            if paired:
                err = 'X and Y must have the same number of rows if paired.'
                assert y.shape[0] == nx, err
        # Remove rows with missing values in both x and y
        x, y = remove_na(x, y, paired=paired, axis='rows')

    # Shape of arrays
    nx, k = x.shape
    ny = y.shape[0]
    assert nx >= 5, 'At least five samples are required.'

    if y.ndim == 1 or paired is True:
        n = nx
        if y.ndim == 1:
            # One sample test
            cov = np.cov(x, rowvar=False)
            diff = x.mean(0) - y
        else:
            # Paired two sample
            cov = np.cov(x - y, rowvar=False)
            diff = x.mean(0) - y.mean(0)
github raphaelvallat / pingouin / pingouin / correlation.py View on Github external
>>> import pandas as pd
    >>> data = pd.DataFrame({'x': x, 'y': y})
    >>> corr(data['x'], data['y'])
              n      r          CI95%     r2  adj_r2     p-val   BF10  power
    pearson  30  0.147  [-0.23, 0.48]  0.022  -0.051  0.439148  0.302  0.121
    """
    x = np.asarray(x)
    y = np.asarray(y)

    # Check size
    if x.size != y.size:
        raise ValueError('x and y must have the same length.')

    # Remove NA
    x, y = remove_na(x, y, paired=True)
    nx = x.size

    # Compute correlation coefficient
    if method == 'pearson':
        r, pval = pearsonr(x, y)
    elif method == 'spearman':
        r, pval = spearmanr(x, y)
    elif method == 'kendall':
        r, pval = kendalltau(x, y)
    elif method == 'percbend':
        r, pval = percbend(x, y)
    elif method == 'shepherd':
        r, pval, outliers = shepherd(x, y)
    elif method == 'skipped':
        r, pval, outliers = skipped(x, y, method='spearman')
    else:
github raphaelvallat / pingouin / pingouin / regression.py View on Github external
else:
        names = []

    # Convert to numpy array
    X = np.asarray(X)
    y = np.asarray(y)
    assert y.ndim == 1, 'y must be one-dimensional.'
    assert 0 < alpha < 1, 'alpha must be between 0 and 1.'

    # Add axis if only one-dimensional array
    if X.ndim == 1:
        X = X[..., np.newaxis]

    # Check for NaN /  Inf
    if remove_na:
        X, y = rm_na(X, y[..., np.newaxis], paired=True, axis='rows')
        y = np.squeeze(y)
    y_gd = np.isfinite(y).all()
    X_gd = np.isfinite(X).all()
    assert y_gd, ("Target (y) contains NaN or Inf. Please remove them "
                  "manually or use remove_na=True.")
    assert X_gd, ("Predictors (X) contain NaN or Inf. Please remove them "
                  "manually or use remove_na=True.")

    # Check that X and y have same length
    assert y.shape[0] == X.shape[0], 'X and y must have same number of samples'

    # Check that y is binary
    if np.unique(y).size != 2:
        raise ValueError('Dependent variable must be binary.')

    if not names:
github raphaelvallat / pingouin / pingouin / distribution.py View on Github external
>>> pg.normality(data, dv='Performance', group='Time')
                 W      pval  normal
    Pre   0.967718  0.478773    True
    Post  0.940728  0.095157    True
    """
    assert isinstance(data, (pd.DataFrame, pd.Series, list, np.ndarray))
    assert method in ['shapiro', 'normaltest']
    if isinstance(data, pd.Series):
        data = data.to_frame()
    col_names = ['W', 'pval']
    func = getattr(scipy.stats, method)
    if isinstance(data, (list, np.ndarray)):
        data = np.asarray(data)
        assert data.ndim == 1, 'Data must be 1D.'
        assert data.size > 3, 'Data must have more than 3 samples.'
        data = remove_na(data)
        stats = pd.DataFrame(func(data)).T
        stats.columns = col_names
        stats['normal'] = np.where(stats['pval'] > alpha, True, False)
    else:
        # Data is a Pandas DataFrame
        if dv is None and group is None:
            # Wide-format
            # Get numeric data only
            numdata = data._get_numeric_data()
            stats = numdata.apply(lambda x: func(x.dropna()),
                                  result_type='expand', axis=0).T
            stats.columns = col_names
            stats['normal'] = np.where(stats['pval'] > alpha, True, False)
        else:
            # Long-format
            stats = pd.DataFrame([])
github raphaelvallat / pingouin / pingouin / regression.py View on Github external
else:
        names = []

    # Convert input to numpy array
    X = np.asarray(X)
    y = np.asarray(y)
    assert y.ndim == 1, 'y must be one-dimensional.'
    assert 0 < alpha < 1

    if X.ndim == 1:
        # Convert to (n_samples, n_features) shape
        X = X[..., np.newaxis]

    # Check for NaN / Inf
    if remove_na:
        X, y = rm_na(X, y[..., np.newaxis], paired=True, axis='rows')
        y = np.squeeze(y)
    y_gd = np.isfinite(y).all()
    X_gd = np.isfinite(X).all()
    assert y_gd, ("Target (y) contains NaN or Inf. Please remove them "
                  "manually or use remove_na=True.")
    assert X_gd, ("Predictors (X) contain NaN or Inf. Please remove them "
                  "manually or use remove_na=True.")

    # Check that X and y have same length
    assert y.shape[0] == X.shape[0], 'X and y must have same number of samples'

    if not names:
        names = ['x' + str(i + 1) for i in range(X.shape[1])]

    if add_intercept:
        # Add intercept