How to use linearmodels - 10 common examples

To help you get started, we’ve selected a few linearmodels examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bashtage / linearmodels / experiment.py View on Github external
std_errs = {}
        std_errs_no = {}
        std_errs_u = {}
        std_errs_u_no = {}
        std_errs_r = {}
        std_errs_r_no = {}
        vals = np.zeros((NUM_REPS, 5, 7))
        for b in range(NUM_REPS):
            if b % 25 == 0:
                print(key, n, b)
            data = generate_data(0.00, 'pandas', ntk=(n, 3, 5), other_effects=1, const=False, rng=rs)
            mo, fo = options[key]

            mod_type, cluster_type = key.split(':')

            y = PanelData(data.y)
            random_effects = np.random.randint(0, n // 3, size=y.dataframe.shape)
            other_random = np.random.randint(0, n // 5, size=y.dataframe.shape)

            if mod_type == 'random':
                effects = y.copy()
                effects.dataframe.iloc[:, :] = random_effects
                mo['other_effects'] = effects

            if cluster_type in ('random', 'other-random', 'entity-nested', 'random-nested'):
                clusters = y.copy()
                if cluster_type == 'random':
                    clusters.dataframe.iloc[:, :] = random_effects
                elif cluster_type == 'other-random':
                    clusters.dataframe.iloc[:, :] = other_random
                elif cluster_type == 'entity_nested':
                    eid = y.entity_ids
github bashtage / linearmodels / linearmodels / system / model.py View on Github external
gls_eps = np.reshape(gls_eps, (k, gls_eps.shape[0] // k)).T
        eps = np.reshape(eps, (k, eps.shape[0] // k)).T
        cov_est = cov_est(self._wxhat, gls_eps, sigma, full_sigma, gls=True,
                          constraints=self._constraints, **cov_config)
        cov = cov_est.cov

        # Repackage results for individual equations
        individual = AttrDict()
        debiased = cov_config.get('debiased', False)
        method = 'Iterative GLS' if iter_count > 1 else 'GLS'
        for i in range(k):
            cons = int(self.has_constant.iloc[i])

            if cons:
                c = np.sqrt(self._w[i])
                ye = self._wy[i] - c @ lstsq(c, self._wy[i])[0]
            else:
                ye = self._wy[i]
            total_ss = float(ye.T @ ye)
            stats = self._common_indiv_results(i, beta, cov, gls_eps, eps,
                                               method, cov_type, cov_est, iter_count,
                                               debiased, cons, total_ss)
            key = self._eq_labels[i]
            individual[key] = stats

        # Populate results dictionary
        nobs = eps.size
        results = self._common_results(beta, cov, method, iter_count, nobs,
                                       cov_type, sigma, individual, debiased)

        # wresid is different between GLS and OLS
        wresid = []
github bashtage / linearmodels / experiment3.py View on Github external
import numpy as np
from linearmodels.panel.lsmr.lsmr import LSMR
from scipy.sparse.linalg import lsmr
from scipy.sparse import csr_matrix
from linearmodels.panel.utility import dummy_matrix
from numpy import sqrt, finfo
import pstats, cProfile
from timer_cm import Timer

rs = np.random.RandomState(1234)
m = 2000000
c1 = rs.randint(0, m // 3, m)
c2 = rs.randint(0, m // 20, m)
y = c1 / 10000 + c2 / 1000 + rs.randn(m)
eps = finfo(np.double).eps
d = dummy_matrix(np.column_stack([c1, c2]))  # type: scipy.sparse.csc.csc_matrix

b = lsmr(d, y, atol=sqrt(eps), btol=sqrt(eps), conlim=1 / (10 * sqrt(eps)), show=True)[0]
#with Timer('scipy'):
github bashtage / linearmodels / linearmodels / iv / results.py View on Github external
from linearmodels.iv import IVGMM
        mod = IVGMM(dependent, exog_e, endog_e, instruments)
        res_e = mod.fit(cov_type=self.cov_type, **self.cov_config)
        j_e = res_e.j_stat.stat

        x = self.model._x
        y = self.model._y
        z = self.model._z
        nz = z.shape[1]
        weight_mat_c = res_e.weight_matrix.values[:nz, :nz]
        params_c = mod.estimate_parameters(x, y, z, weight_mat_c)
        j_c = self.model._j_statistic(params_c, weight_mat_c).stat

        stat = j_e - j_c
        df = exog_e.shape[1] - exog.shape[1]
        return WaldTestStatistic(stat, null, df, name='C-statistic')
github bashtage / linearmodels / linearmodels / iv / absorbing.py View on Github external
a 256 bit value. This allows variables to be reused in different
        models if the set of absorbing variables and interactions is held
        constant.

        See also
        --------
        linearmodels.iv.covariance.HomoskedasticCovariance
        linearmodels.iv.covariance.HeteroskedasticCovariance
        linearmodels.iv.covariance.KernelCovariance
        linearmodels.iv.covariance.ClusteredCovariance
        """

        if self._absorbed_dependent is None:
            self._first_time_fit(use_cache, lsmr_options)

        self._x = exog_resid = to_numpy(self.absorbed_exog)
        dep_resid = to_numpy(self.absorbed_dependent)
        if self._exog.shape[1] == 0:
            params = empty((0, 1))
        else:
            if exog_resid.shape[1]:
                check_absorbed(exog_resid, self.exog.cols)
            params = lstsq(exog_resid, dep_resid)[0]
            self._num_params += exog_resid.shape[1]

        cov_estimator = COVARIANCE_ESTIMATORS[cov_type]
        cov_config['debiased'] = debiased
        cov_config['kappa'] = 0.0
        cov_config_copy = {k: v for k, v in cov_config.items()}
        if 'center' in cov_config_copy:
            del cov_config_copy['center']
        cov_estimator = cov_estimator(exog_resid, dep_resid, exog_resid, params, **cov_config_copy)
github bashtage / linearmodels / linearmodels / system / model.py View on Github external
def _multivariate_ls_fit(self):
        wy, wx, wxhat = self._wy, self._wx, self._wxhat
        k = len(wxhat)

        xpx = blocked_inner_prod(wxhat, np.eye(len(wxhat)))
        xpy = []
        for i in range(k):
            xpy.append(wxhat[i].T @ wy[i])
        xpy = np.vstack(xpy)
        beta = _parameters_from_xprod(xpx, xpy, constraints=self.constraints)

        loc = 0
        eps = []
        for i in range(k):
            nb = wx[i].shape[1]
            b = beta[loc:loc + nb]
            eps.append(wy[i] - wx[i] @ b)
            loc += nb
        eps = np.hstack(eps)

        return beta, eps
github bashtage / linearmodels / linearmodels / _version.py View on Github external
if not keywords:
        raise NotThisMethod("no keywords at all, weird")
    date = keywords.get("date")
    if date is not None:
        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
        # -like" string, which we must then edit to make compliant), because
        # it's been around since git-1.5.3, and it's too difficult to
        # discover which version we're using, or to work around using an
        # older one.
        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
    refnames = keywords["refnames"].strip()
    if refnames.startswith("$Format"):
        if verbose:
            print("keywords are unexpanded, not using")
        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
    refs = set([r.strip() for r in refnames.strip("()").split(",")])
    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
    TAG = "tag: "
    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
    if not tags:
        # Either we're using git < 1.8.3, or there really are no tags. We use
        # a heuristic: assume all version tags have a digit. The old git %d
        # expansion behaves like git log --decorate=short and strips out the
        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
        # between branches and tags. By ignoring refnames without digits, we
        # filter out many common branch names like "release" and
        # "stabilization", as well as "HEAD" and "master".
        tags = set([r for r in refs if re.search(r'\d', r)])
        if verbose:
            print("discarding '%s', no digits" % ",".join(refs - tags))
github bashtage / linearmodels / linearmodels / _version.py View on Github external
def git_versions_from_keywords(keywords, tag_prefix, verbose):
    """Get version information from git keywords."""
    if not keywords:
        raise NotThisMethod("no keywords at all, weird")
    date = keywords.get("date")
    if date is not None:
        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
        # -like" string, which we must then edit to make compliant), because
        # it's been around since git-1.5.3, and it's too difficult to
        # discover which version we're using, or to work around using an
        # older one.
        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
    refnames = keywords["refnames"].strip()
    if refnames.startswith("$Format"):
        if verbose:
            print("keywords are unexpanded, not using")
        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
    refs = set([r.strip() for r in refnames.strip("()").split(",")])
    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
github bashtage / linearmodels / experiment-2.py View on Github external
b = (x.T @ z) @ (x.T @ z).T
a
b
np.linalg.inv(a) @ b
np.trace(np.linalg.inv(a) @ b)
30
30

data = generate_data(0, 'pandas', ntk=(101, 3, 5), other_effects=1, const=False)

y = PanelData(data.y)
x = PanelData(data.x)
w = PanelData(data.w)

x.dataframe.iloc[:, 0] = 1
mod = PanelOLS(data.y, data.x, weights=data.w)
mod.fit()
mod = PanelOLS(y, x, weights=data.w, entity_effects=True)
mod.fit()
mod = PanelOLS(data.y, data.x, weights=data.w, time_effects=True)
mod.fit()
mod = PanelOLS(data.y, data.x, weights=data.w, time_effects=True, entity_effects=True)
mod.fit()

missing = y.isnull | x.isnull | w.isnull
y.drop(missing)
x.drop(missing)
w.drop(missing)

x.dataframe.iloc[:, 0] = 1
ydw = y.demean(weights=w)
xdw = x.demean(weights=w)
github bashtage / linearmodels / experiment-2.py View on Github external
lm.WARN_ON_MISSING = False
from linearmodels import utility
utility.missing_warning(np.array([True, True, False]))

from linearmodels.panel import PanelOLS, RandomEffects, PooledOLS, FamaMacBeth
from linearmodels.datasets import wage_panel
import statsmodels.api as sm
data = wage_panel.load()
data = data.set_index(['nr','year'])
dependent = data.lwage
exog = sm.add_constant(data[['expersq','married','union']])
out = FamaMacBeth(dependent, exog).fit()
print(out)

raise NotImplementedError
mod = PanelOLS(dependent, exog, entity_effects=True, time_effects=True)
res = mod.fit(cov_type='unadjusted')
res2 = mod.fit(cov_type='robust')
exog = sm.add_constant(data[['exper', 'expersq','married','union']])
mod = PanelOLS(dependent, exog, entity_effects=True)
res3 = mod.fit(cov_type='clustered',cluster_entity=True)
mod = RandomEffects(dependent, exog)
res4 = mod.fit(cov_type='robust')
from linearmodels.panel.results import compare

exog = sm.add_constant(data[['exper', 'expersq','married','union']].copy())
import pandas as pd
exog['year'] = pd.Categorical(data.reset_index()['year'])
mod = PooledOLS(dependent, exog)
res5 = mod.fit(cov_type='robust')
print(compare([res,res2, res3, res4, res5]))