How to use the pingouin.utils._flatten_list function in pingouin

To help you get started, we’ve selected a few pingouin examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github raphaelvallat / pingouin / pingouin / correlation.py View on Github external
n      r         CI95%     r2  adj_r2     p-val  power
    spearman  30  0.429  [0.08, 0.68]  0.184   0.123  0.018092  0.676
    """
    from pingouin.utils import _flatten_list
    # Check arguments
    assert isinstance(data, pd.DataFrame), 'data must be a pandas DataFrame.'
    assert data.shape[0] > 2, 'Data must have at least 3 samples.'
    assert isinstance(x, (str, tuple)), 'x must be a string.'
    assert isinstance(y, (str, tuple)), 'y must be a string.'
    assert isinstance(covar, (str, list, type(None)))
    assert isinstance(x_covar, (str, list, type(None)))
    assert isinstance(y_covar, (str, list, type(None)))
    if covar is not None and (x_covar is not None or y_covar is not None):
        raise ValueError('Cannot specify both covar and {x,y}_covar.')
    # Check that columns exist
    col = _flatten_list([x, y, covar, x_covar, y_covar])
    if isinstance(covar, str):
        covar = [covar]
    if isinstance(x_covar, str):
        x_covar = [x_covar]
    if isinstance(y_covar, str):
        y_covar = [y_covar]
    assert all([c in data for c in col]), 'columns are not in dataframe.'
    # Check that columns are numeric
    assert all([data[c].dtype.kind in 'bfi' for c in col])

    # Drop rows with NaN
    data = data[col].dropna()
    assert data.shape[0] > 2, 'Data must have at least 3 non-NAN samples.'

    # Standardize (= no need for an intercept in least-square regression)
    C = (data[col] - data[col].mean(axis=0)) / data[col].std(axis=0)
github raphaelvallat / pingouin / pingouin / regression.py View on Github external
assert isinstance(y, str), 'y must be a string.'
    assert isinstance(m, (list, str)), 'Mediator(s) must be a list or string.'
    assert isinstance(covar, (type(None), str, list))
    if isinstance(m, str):
        m = [m]
    n_mediator = len(m)
    assert isinstance(data, pd.DataFrame), 'Data must be a DataFrame.'
    # Check for duplicates
    assert n_mediator == len(set(m)), 'Cannot have duplicates mediators.'
    if isinstance(covar, str):
        covar = [covar]
    if isinstance(covar, list):
        assert len(covar) == len(set(covar)), 'Cannot have duplicates covar.'
        assert set(m).isdisjoint(covar), 'Mediator cannot be in covar.'
    # Check that columns are in dataframe
    columns = _fl([x, m, y, covar])
    keys = data.columns
    assert all([c in keys for c in columns]), 'Column(s) are not in DataFrame.'
    # Check that columns are numeric
    err_msg = "Columns must be numeric or boolean."
    assert all([data[c].dtype.kind in 'bfi' for c in columns]), err_msg

    # Drop rows with NAN Values
    data = data[columns].dropna()
    n = data.shape[0]
    assert n > 5, 'DataFrame must have at least 5 samples (rows).'

    # Check if mediator is binary
    mtype = 'logistic' if all(data[m].nunique() == 2) else 'linear'

    # Name of CI
    ll_name = 'CI[%.1f%%]' % (100 * alpha / 2)
github raphaelvallat / pingouin / pingouin / pairwise.py View on Github external
grp_fac1 = data.groupby(factors[0], sort=False)[dv]
            grp_fac2 = data.groupby(factors[1], sort=False)[dv]
            grp_both = data.groupby(factors, sort=False)[dv]
            labels_fac1 = grp_fac1.groups.keys()
            labels_fac2 = grp_fac2.groups.keys()
            # comb_fac1 = list(combinations(labels_fac1, 2))
            comb_fac2 = list(combinations(labels_fac2, 2))

            # Pairwise comparisons
            combs_list = list(product(labels_fac1, comb_fac2))
            ncombs = len(combs_list)
            # np.array(combs_list) does not work because of tuples
            # we therefore need to flatten the tupple
            combs = np.zeros(shape=(ncombs, 3), dtype=object)
            for i in range(ncombs):
                combs[i] = _flatten_list(combs_list[i], include_tuple=True)

            # Append empty rows
            idxiter = np.arange(nrows, nrows + ncombs)
            stats = stats.append(pd.DataFrame(columns=stats.columns,
                                 index=idxiter), ignore_index=True)
            # Update other columns
            stats.loc[idxiter, 'Contrast'] = factors[0] + ' * ' + factors[1]
            stats.loc[idxiter, 'Time'] = combs[:, 0]
            stats.loc[idxiter, 'Paired'] = paired
            stats.loc[idxiter, 'Tail'] = tail
            stats.loc[idxiter, 'A'] = combs[:, 1]
            stats.loc[idxiter, 'B'] = combs[:, 2]

            for i, comb in enumerate(combs):
                ic = nrows + i  # Take into account previous rows
                fac1, col1, col2 = comb
github raphaelvallat / pingouin / pingouin / distribution.py View on Github external
"""
    # Check arguments
    assert isinstance(dv, str), 'dv must be a string.'
    assert isinstance(subject, str), 'subject must be a string.'
    assert isinstance(within, (str, list)), 'within must be a string or list.'
    # Check that all columns are present
    assert dv in data.columns, '%s not in data' % dv
    assert data[dv].dtype.kind in 'bfi', '%s must be numeric' % dv
    assert subject in data.columns, '%s not in data' % subject
    assert not data[subject].isnull().any(), 'Cannot have NaN in %s' % subject
    if isinstance(within, str):
        within = [within]  # within = ['fac1'] or ['fac1', 'fac2']
    for w in within:
        assert w in data.columns, '%s not in data' % w
    # Keep all relevant columns and reset index
    data = data[_fl([subject, within, dv])]
    # Convert to wide-format + collapse to the mean
    data = pd.pivot_table(data, index=subject, values=dv, columns=within,
                          aggfunc='mean', dropna=True)
    return data
github raphaelvallat / pingouin / pingouin / regression.py View on Github external
n = data.shape[0]
    assert n > 5, 'DataFrame must have at least 5 samples (rows).'

    # Check if mediator is binary
    mtype = 'logistic' if all(data[m].nunique() == 2) else 'linear'

    # Name of CI
    ll_name = 'CI[%.1f%%]' % (100 * alpha / 2)
    ul_name = 'CI[%.1f%%]' % (100 * (1 - alpha / 2))

    # Compute regressions
    cols = ['names', 'coef', 'se', 'pval', ll_name, ul_name]

    # For speed, we pass np.array instead of pandas DataFrame
    X_val = data[_fl([x, covar])].values  # X + covar as predictors
    XM_val = data[_fl([x, m, covar])].values  # X + M + covar as predictors
    M_val = data[m].values  # M as target (no covariates)
    y_val = data[y].values  # y as target (no covariates)

    # M(j) ~ X + covar
    sxm = {}
    for idx, j in enumerate(m):
        if mtype == 'linear':
            sxm[j] = linear_regression(X_val, M_val[:, idx],
                                       alpha=alpha).loc[[1], cols]
        else:
            sxm[j] = logistic_regression(X_val, M_val[:, idx],
                                         alpha=alpha).loc[[1], cols]
        sxm[j].at[1, 'names'] = '%s ~ X' % j
    sxm = pd.concat(sxm, ignore_index=True)

    # Y ~ M + covar