How to use the linearmodels.compat.pandas.get_codes function in linearmodels

To help you get started, we’ve selected a few linearmodels examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bashtage / linearmodels / linearmodels / panel / data.py View on Github external
def __init__(self, df):
        self._items = df.columns
        index = df.index
        self._major_axis = Index(index.levels[1][get_codes(index)[1]]).unique()
        self._minor_axis = Index(index.levels[0][get_codes(index)[0]]).unique()
        self._full_index = MultiIndex.from_product([self._minor_axis,
                                                    self._major_axis])
        new_df = df.reindex(self._full_index)
        new_df.index.names = df.index.names
        self._frame = new_df
        i, j, k = len(self._items), len(self._major_axis), len(self.minor_axis)
        self._shape = (i, j, k)
        self._values = np.swapaxes(np.reshape(np.asarray(new_df).copy().T, (i, k, j)), 1, 2)
github bashtage / linearmodels / linearmodels / panel / data.py View on Github external
def entity_ids(self):
        """
        Get array containing entity group membership information

        Returns
        -------
        id : ndarray
            2d array containing entity ids corresponding dataframe view
        """
        return np.asarray(get_codes(self._frame.index)[0])[:, None]
github bashtage / linearmodels / linearmodels / iv / absorbing.py View on Github external
Returns
    -------
    cp : Series
        Categorical series containing the cartesian product of the categories
        in cats
    """
    if isinstance(cats, Series):
        return cats

    sizes = []
    for c in cats:
        if not is_categorical(cats[c]):
            raise TypeError('cats must contain only categorical variables')
        col = cats[c]
        max_code = get_codes(col.cat).max()
        size = 1
        while max_code >= 2 ** size:
            size += 1
        sizes.append(size)
    nobs = cats.shape[0]
    total_size = sum(sizes)
    if total_size >= 63:
        raise ValueError('There are too many cats with too many states to use this method.')
    dtype_size = min(filter(lambda v: total_size < (v - 1), (8, 16, 32, 64)))
    dtype_str = 'int{0:d}'.format(dtype_size)
    dtype_val = dtype(dtype_str)
    codes = zeros(nobs, dtype=dtype_val)
    cum_size = 0
    for i, col in enumerate(cats):
        codes += (get_codes(cats[col].cat).astype(dtype_val) << SCALAR_DTYPES[dtype_str](cum_size))
        cum_size += sizes[i]
github bashtage / linearmodels / linearmodels / iv / absorbing.py View on Github external
max_code = get_codes(col.cat).max()
        size = 1
        while max_code >= 2 ** size:
            size += 1
        sizes.append(size)
    nobs = cats.shape[0]
    total_size = sum(sizes)
    if total_size >= 63:
        raise ValueError('There are too many cats with too many states to use this method.')
    dtype_size = min(filter(lambda v: total_size < (v - 1), (8, 16, 32, 64)))
    dtype_str = 'int{0:d}'.format(dtype_size)
    dtype_val = dtype(dtype_str)
    codes = zeros(nobs, dtype=dtype_val)
    cum_size = 0
    for i, col in enumerate(cats):
        codes += (get_codes(cats[col].cat).astype(dtype_val) << SCALAR_DTYPES[dtype_str](cum_size))
        cum_size += sizes[i]
    return Series(Categorical(codes), index=cats.index)
github bashtage / linearmodels / linearmodels / iv / absorbing.py View on Github external
def hash(self):
        hashes = []
        hasher = hash_func()
        if self._cat is not None:
            for col in self._cat:
                hasher.update(ascontiguousarray(to_numpy(get_codes(self._cat[col].cat)).data))
                hashes.append((hasher.hexdigest(),))
                hasher = _reset(hasher)
        if self._cont is not None:
            for col in self._cont:
                hasher.update(ascontiguousarray(to_numpy(self._cont[col]).data))
                hashes.append((hasher.hexdigest(),))
                hasher = _reset(hasher)
        if self._interactions is not None:
            for interact in self._interactions:
                hashes.extend(interact.hash)
        # Add weight hash if provided
        if self._weights is not None:
            hasher = hash_func()
            hasher.update(ascontiguousarray(self._weights.data))
            hashes.append((hasher.hexdigest(),))
        return tuple(sorted(hashes))
github bashtage / linearmodels / linearmodels / panel / data.py View on Github external
def time_ids(self):
        """
        Get array containing time membership information

        Returns
        -------
        id : ndarray
            2d array containing time ids corresponding dataframe view
        """
        return np.asarray(get_codes(self._frame.index)[1])[:, None]
github bashtage / linearmodels / linearmodels / iv / absorbing.py View on Github external
"""
    Parameters
    ----------
    cat : Series
        Categorical series to convert to dummy variables
    cont : {Series, DataFrame}
        Continuous variable values to use in the dummy interaction
    precondition : bool
        Flag whether dummies should be preconditioned

    Returns
    -------
    interact : csc_matrix
        Sparse matrix of dummy interactions with unit column norm
    """
    codes = get_codes(category_product(cat).cat)
    interact = csc_matrix((to_numpy(cont).flat, (arange(codes.shape[0]), codes)))
    if not precondition:
        return interact
    else:
        return preconditioner(interact)[0]
github bashtage / linearmodels / linearmodels / panel / data.py View on Github external
def time(self):
        """List of time index names"""
        index = self._frame.index
        return list(index.levels[1][get_codes(index)[1]].unique())