How to use the pingouin.utils._check_dataframe function in pingouin

To help you get started, we’ve selected a few pingouin examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github raphaelvallat / pingouin / pingouin / pairwise.py View on Github external
studentized range quantiles." Computational statistics & data
           analysis 31.2 (1999): 147-158.

    Examples
    --------
    Pairwise Games-Howell post-hocs on the pain threshold dataset.

    >>> from pingouin import pairwise_gameshowell, read_dataset
    >>> df = read_dataset('anova')
    >>> pairwise_gameshowell(data=df, dv='Pain threshold',
    ...                      between='Hair color')  # doctest: +SKIP
    '''
    from pingouin.external.qsturng import psturng

    # Check the dataframe
    _check_dataframe(dv=dv, between=between, effects='between', data=data)

    # Reset index (avoid duplicate axis error)
    data = data.reset_index(drop=True)

    # Extract infos
    ng = data[between].nunique()
    grp = data.groupby(between)[dv]
    n = grp.count().values
    gmeans = grp.mean().values
    gvars = grp.var().values

    # Pairwise combinations
    g1, g2 = np.array(list(combinations(np.arange(ng), 2))).T
    mn = gmeans[g1] - gmeans[g2]
    se = np.sqrt(0.5 * (gvars[g1] / n[g1] + gvars[g2] / n[g2]))
    tval = mn / np.sqrt(gvars[g1] / n[g1] + gvars[g2] / n[g2])
github raphaelvallat / pingouin / pingouin / pairwise.py View on Github external
5. Between1 + Between2 + Between1 * Between2

    >>> posthocs = pairwise_ttests(dv='Scores', between=['Group', 'Time'],
    ...                            data=df)

    6. Between1 + Between2, no interaction

    >>> posthocs = df.pairwise_ttests(dv='Scores', between=['Group', 'Time'],
    ...                               interaction=False)
    '''
    from .parametric import ttest
    from .nonparametric import wilcoxon, mwu

    # Safety checks
    _check_dataframe(dv=dv, between=between, within=within, subject=subject,
                     effects='all', data=data)

    assert tail in ['one-sided', 'two-sided', 'greater', 'less']
    assert isinstance(alpha, float), 'alpha must be float.'
    assert nan_policy in ['listwise', 'pairwise']

    # Check if we have multiple between or within factors
    multiple_between = False
    multiple_within = False
    contrast = None

    if isinstance(between, list):
        if len(between) > 1:
            multiple_between = True
            contrast = 'multiple_between'
            assert all([b in data.keys() for b in between])
github raphaelvallat / pingouin / pingouin / plotting.py View on Github external
.. plot::

        >>> from pingouin import read_dataset
        >>> df = read_dataset('mixed_anova').query("Time != 'January'")
        >>> import pingouin as pg
        >>> import matplotlib.pyplot as plt
        >>> fig, ax1 = plt.subplots(1, 1, figsize=(5, 4))
        >>> pg.plot_paired(data=df[df['Group'] == 'Meditation'],
        ...                dv='Scores', within='Time', subject='Subject',
        ...                ax=ax1, boxplot=False,
        ...                colors=['grey', 'grey', 'grey'])  # doctest: +SKIP
    """
    from pingouin.utils import _check_dataframe, remove_rm_na

    # Validate args
    _check_dataframe(data=data, dv=dv, within=within, subject=subject,
                     effects='within')

    # Remove NaN values
    data = remove_rm_na(dv=dv, within=within, subject=subject, data=data)

    # Extract subjects
    subj = data[subject].unique()

    # Extract within-subject level (alphabetical order)
    x_cat = np.unique(data[within])
    assert len(x_cat) == 2, 'Within must have exactly two unique levels.'

    if order is None:
        order = x_cat
    else:
        assert len(order) == 2, 'Order must have exactly two elements.'