How to use the pingouin.remove_rm_na function in pingouin

To help you get started, we’ve selected a few pingouin examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github raphaelvallat / pingouin / pingouin / nonparametric.py View on Github external
--------
    Compute the Cochran Q test for repeated measurements.

    >>> from pingouin import cochran, read_dataset
    >>> df = read_dataset('cochran')
    >>> cochran(data=df, dv='Energetic', within='Time', subject='Subject')
            Source  dof      Q     p-unc
    cochran   Time    2  6.706  0.034981
    """
    # Check data
    _check_dataframe(dv=dv, within=within, data=data, subject=subject,
                     effects='within')

    # Remove NaN
    if data[dv].isnull().any():
        data = remove_rm_na(dv=dv, within=within, subject=subject,
                            data=data[[subject, within, dv]])

    # Groupby and extract size
    grp = data.groupby(within)[dv]
    grp_s = data.groupby(subject)[dv]
    k = data[within].nunique()
    dof = k - 1
    # n = grp.count().unique()[0]

    # Q statistic and p-value
    q = (dof * (k * np.sum(grp.sum()**2) - grp.sum().sum()**2)) / \
        (k * grp.sum().sum() - np.sum(grp_s.sum()**2))
    p_unc = scipy.stats.chi2.sf(q, dof)

    # Create output dataframe
    stats = pd.DataFrame({'Source': within,
github raphaelvallat / pingouin / pingouin / parametric.py View on Github external
'ddof2' : Degrees of freedom (denominator)
        'F' : F-value
        'p-unc' : Uncorrected p-value
        'np2' : Partial eta-square effect size
        'eps' : Greenhouse-Geisser epsilon factor (= index of sphericity)
        'p-GG-corr' : Greenhouse-Geisser corrected p-value
    """
    a, b = within

    # Validate the dataframe
    _check_dataframe(dv=dv, within=within, data=data, subject=subject,
                     effects='within')

    # Remove NaN
    if data[[subject, a, b, dv]].isnull().any().any():
        data = remove_rm_na(dv=dv, subject=subject, within=[a, b],
                            data=data[[subject, a, b, dv]])

    # Collapse to the mean (that this is also done in remove_rm_na)
    data = data.groupby([subject, a, b]).mean().reset_index()

    assert not data[a].isnull().any(), 'Cannot have NaN in %s' % a
    assert not data[b].isnull().any(), 'Cannot have NaN in %s' % b
    assert not data[subject].isnull().any(), 'Cannot have NaN in %s' % subject

    # Group sizes and grandmean
    n_a = data[a].nunique()
    n_b = data[b].nunique()
    n_s = data[subject].nunique()
    mu = data[dv].mean()

    # Groupby means
github raphaelvallat / pingouin / pingouin / parametric.py View on Github external
assert data.shape[0] > 2, 'Data must have at least 3 rows.'
        assert data.shape[1] > 1, 'Data must contain at least two columns.'
        data['Subj'] = np.arange(data.shape[0])
        data = data.melt(id_vars='Subj', var_name='Within', value_name='DV')
        subject, within, dv = 'Subj', 'Within', 'DV'

    # Check dataframe
    _check_dataframe(dv=dv, within=within, data=data, subject=subject,
                     effects='within')

    # Collapse to the mean
    data = data.groupby([subject, within]).mean().reset_index()

    # Remove NaN
    if data[dv].isnull().any():
        data = remove_rm_na(dv=dv, within=within, subject=subject,
                            data=data[[subject, within, dv]])
    assert not data[within].isnull().any(), 'Cannot have NaN in `within`.'
    assert not data[subject].isnull().any(), 'Cannot have NaN in `subject`.'

    # Groupby
    grp_with = data.groupby(within)[dv]
    rm = list(data[within].unique())
    n_rm = len(rm)
    n_obs = int(data.groupby(within)[dv].count().max())
    grandmean = data[dv].mean()

    # Calculate sums of squares
    sstime = ((grp_with.mean() - grandmean)**2 * grp_with.count()).sum()
    sswithin = grp_with.apply(lambda x: (x - x.mean())**2).sum()
    grp_subj = data.groupby(subject)[dv]
    sssubj = n_rm * np.sum((grp_subj.mean() - grandmean)**2)
github raphaelvallat / pingouin / pingouin / nonparametric.py View on Github external
>>> df = read_dataset('rm_anova')
    >>> friedman(data=df, dv='DesireToKill', within='Disgustingness',
    ...          subject='Subject')
                      Source  ddof1      Q     p-unc
    Friedman  Disgustingness      1  9.228  0.002384
    """
    # Check data
    _check_dataframe(dv=dv, within=within, data=data, subject=subject,
                     effects='within')

    # Collapse to the mean
    data = data.groupby([subject, within]).mean().reset_index()

    # Remove NaN
    if data[dv].isnull().any():
        data = remove_rm_na(dv=dv, within=within, subject=subject,
                            data=data[[subject, within, dv]])

    # Extract number of groups and total sample size
    grp = data.groupby(within)[dv]
    rm = list(data[within].unique())
    k = len(rm)
    X = np.array([grp.get_group(r).values for r in rm]).T
    n = X.shape[0]

    # Rank per subject
    ranked = np.zeros(X.shape)
    for i in range(n):
        ranked[i] = scipy.stats.rankdata(X[i, :])

    ssbn = (ranked.sum(axis=0)**2).sum()
github raphaelvallat / pingouin / pingouin / parametric.py View on Github external
>>> aov
            Source     SS  DF1  DF2     MS      F     p-unc    np2    eps
    0        Group  5.460    1   58  5.460  5.052  0.028420  0.080      -
    1         Time  7.628    2  116  3.814  4.027  0.020373  0.065  0.999
    2  Interaction  5.168    2  116  2.584  2.728  0.069530  0.045      -
    """
    # Check data
    _check_dataframe(dv=dv, within=within, between=between, data=data,
                     subject=subject, effects='interaction')

    # Collapse to the mean
    data = data.groupby([subject, within, between]).mean().reset_index()

    # Remove NaN
    if data[dv].isnull().any():
        data = remove_rm_na(dv=dv, within=within, subject=subject,
                            data=data[[subject, within, between, dv]])

    # SUMS OF SQUARES
    grandmean = data[dv].mean()
    # Extract main effects of time and between
    mtime = rm_anova(dv=dv, within=within, subject=subject, data=data,
                     correction=correction, detailed=True)
    mbetw = anova(dv=dv, between=between, data=data, detailed=True)
    # Extract SS total, residuals and interactions
    grp = data.groupby([between, within])[dv]
    sstotal = grp.apply(lambda x: (x - grandmean)**2).sum()
    # sst = residuals within + residuals between
    sst = grp.apply(lambda x: (x - x.mean())**2).sum()
    # Interaction
    ssinter = sstotal - (sst + mtime.at[0, 'SS'] + mbetw.at[0, 'SS'])
    sswg = mtime.at[1, 'SS'] - ssinter