How to use the statsmodels.regression.linear_model.OLS function in statsmodels

To help you get started, we’ve selected a few statsmodels examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dgwozdz / HN_SO_analysis / codes / grangercausalitytests_mod.py View on Github external
# create lagmat of both time series
        dta = lagmat2ds(x, mxlg, trim='both')
        dta = np.delete(dta, -1, axis = 1) # removal of the not lagged xs

        #add constant
        if addconst:
            dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)
            dtajoint = add_constant(dta[:, 1:], prepend=False)
        else:
            raise NotImplementedError('Not Implemented')
            #dtaown = dta[:, 1:mxlg]
            #dtajoint = dta[:, 1:]

        # Run ols on both models without and with lags of second variable
        res2down = OLS(dta[:, 0], dtaown).fit()
        res2djoint = OLS(dta[:, 0], dtajoint).fit()

        #print results
        #for ssr based tests see:
        #http://support.sas.com/rnd/app/examples/ets/granger/index.htm
        #the other tests are made-up

        # Granger Causality test using ssr (F statistic)
        fgc1 = ((res2down.ssr - res2djoint.ssr) /
                res2djoint.ssr / mxlg * res2djoint.df_resid)
        if verbose:
            print('ssr based F test:         F=%-8.4f, p=%-8.4f, df_denom=%d,'
                   ' df_num=%d' % (fgc1,
                                    stats.f.sf(fgc1, mxlg,
                                               res2djoint.df_resid),
                                    res2djoint.df_resid, mxlg))
github statsmodels / statsmodels / statsmodels / graphics / regressionplots.py View on Github external
if isinstance(RHS, np.ndarray) and RHS.size==0:
        RHS_isemtpy = True
    elif isinstance(RHS, pd.DataFrame) and RHS.empty:
        RHS_isemtpy = True
    if isinstance(exog_i, str):
        exog_i = dmatrix(exog_i + "-1", data)

    # all arrays or pandas-like

    if RHS_isemtpy:
        ax.plot(endog, exog_i, 'o', **kwargs)
        fitted_line = OLS(endog, exog_i).fit()
        x_axis_endog_name = 'x' if isinstance(exog_i, np.ndarray) else exog_i.name
        y_axis_endog_name = 'y' if isinstance(endog, np.ndarray) else endog.design_info.column_names[0]
    else:
        res_yaxis = OLS(endog, RHS).fit()
        res_xaxis = OLS(exog_i, RHS).fit()
        xaxis_resid = res_xaxis.resid
        yaxis_resid = res_yaxis.resid
        x_axis_endog_name = res_xaxis.model.endog_names
        y_axis_endog_name = res_yaxis.model.endog_names
        ax.plot(xaxis_resid, yaxis_resid, 'o', **kwargs)
        fitted_line = OLS(yaxis_resid, xaxis_resid).fit()

    fig = abline_plot(0, fitted_line.params[0], color='k', ax=ax)

    if x_axis_endog_name == 'y':  # for no names regression will just get a y
        x_axis_endog_name = 'x'  # this is misleading, so use x
    ax.set_xlabel("e(%s | X)" % x_axis_endog_name)
    ax.set_ylabel("e(%s | X)" % y_axis_endog_name)
    ax.set_title('Partial Regression Plot', **title_kwargs)
github statsmodels / statsmodels / statsmodels / regression / feasible_gls.py View on Github external
import collections
        self.history = collections.defaultdict(list) #not really necessary
        res_resid = None  #if maxiter < 2 no updating
        for i in range(maxiter):
            #pinv_wexog is cached
            if hasattr(self, 'pinv_wexog'):
                del self.pinv_wexog
            #self.initialize()
            #print 'wls self',
            results = self.fit()
            self.history['self_params'].append(results.params)
            if not i == maxiter-1:  #skip for last iteration, could break instead
                #print 'ols',
                self.results_old = results #for debugging
                #estimate heteroscedasticity
                res_resid = OLS(self.link(results.resid**2), self.exog_var).fit()
                self.history['ols_params'].append(res_resid.params)
                #update weights
                self.weights = 1./self.linkinv(res_resid.fittedvalues)
                self.weights /= self.weights.max()  #not required
                self.weights[self.weights < 1e-14] = 1e-14  #clip
                #print 'in iter', i, self.weights.var() #debug, do weights change
                self.initialize()

        #note results is the wrapper, results._results is the results instance
        results._results.results_residual_regression = res_resid
        return results
github statsmodels / statsmodels / statsmodels / sandbox / stats / diagnostic.py View on Github external
xdiff = np.diff(x)
    #
    xdall = lagmat(x[:,None], maxlag, trim='both')
    nobs = xdall.shape[0]
    xdall = np.c_[np.ones((nobs,1)), xdall]
    xshort = x[-nobs:]

    if store:
        resstore = ResultsStore()

    if autolag:
        #search for lag length with highest information criteria
        #Note: I use the same number of observations to have comparable IC
        results = {}
        for mlag in range(1, maxlag+1):
            results[mlag] = OLS(xshort, xdall[:,:mlag+1]).fit()

        if autolag.lower() == 'aic':
            bestic, icbestlag = min((v.aic,k) for k,v in iteritems(results))
        elif autolag.lower() == 'bic':
            icbest, icbestlag = min((v.bic,k) for k,v in iteritems(results))
        else:
            raise ValueError("autolag can only be None, 'AIC' or 'BIC'")

        #rerun ols with best ic
        xdall = lagmat(x[:,None], icbestlag, trim='both')
        nobs = xdall.shape[0]
        xdall = np.c_[np.ones((nobs,1)), xdall]
        xshort = x[-nobs:]
        usedlag = icbestlag
        if regresults:
            resstore.results = results
github statsmodels / statsmodels / statsmodels / graphics / regressionplots.py View on Github external
>>> sm.graphics.plot_ccpr(results, 'single')
    >>> plt.show()

    .. plot:: plots/graphics_regression_ccpr.py
    """
    fig, ax = utils.create_mpl_ax(ax)

    exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)
    results = maybe_unwrap_results(results)

    x1 = results.model.exog[:, exog_idx]
    #namestr = ' for %s' % self.name if self.name else ''
    x1beta = x1*results.params[exog_idx]
    ax.plot(x1, x1beta + results.resid, 'o')
    from statsmodels.tools.tools import add_constant
    mod = OLS(x1beta, add_constant(x1)).fit()
    params = mod.params
    fig = abline_plot(*params, **dict(ax=ax))
    #ax.plot(x1, x1beta, '-')
    ax.set_title('Component and component plus residual plot')
    ax.set_ylabel("Residual + %s*beta_%d" % (exog_name, exog_idx))
    ax.set_xlabel("%s" % exog_name)

    return fig
github statsmodels / statsmodels / statsmodels / tsa / stattools.py View on Github external
regresults=regresults)
            resstore.autolag_results = alres

        bestlag -= startlag  # convert to lag not column index

        # rerun ols with best autolag
        xdall = lagmat(xdiff[:, None], bestlag, trim='both', original='in')
        nobs = xdall.shape[0]
        xdall[:, 0] = x[-nobs - 1:-1]  # replace 0 xdiff with level of x
        xdshort = xdiff[-nobs:]
        usedlag = bestlag
    else:
        usedlag = maxlag
        icbest = None
    if regression != 'nc':
        resols = OLS(xdshort, add_trend(xdall[:, :usedlag + 1],
                     regression)).fit()
    else:
        resols = OLS(xdshort, xdall[:, :usedlag + 1]).fit()

    adfstat = resols.tvalues[0]
#    adfstat = (resols.params[0]-1.0)/resols.bse[0]
    # the "asymptotically correct" z statistic is obtained as
    # nobs/(1-np.sum(resols.params[1:-(trendorder+1)])) (resols.params[0] - 1)
    # I think this is the statistic that is used for series that are integrated
    # for orders higher than I(1), ie., not ADF but cointegration tests.

    # Get approx p-value and critical values
    pvalue = mackinnonp(adfstat, regression=regression, N=1)
    critvalues = mackinnoncrit(N=1, regression=regression, nobs=nobs)
    critvalues = {"1%" : critvalues[0], "5%" : critvalues[1],
                  "10%" : critvalues[2]}
github statsmodels / statsmodels / statsmodels / graphics / regressionplots.py View on Github external
# all arrays or pandas-like

    if RHS_isemtpy:
        ax.plot(endog, exog_i, 'o', **kwargs)
        fitted_line = OLS(endog, exog_i).fit()
        x_axis_endog_name = 'x' if isinstance(exog_i, np.ndarray) else exog_i.name
        y_axis_endog_name = 'y' if isinstance(endog, np.ndarray) else endog.design_info.column_names[0]
    else:
        res_yaxis = OLS(endog, RHS).fit()
        res_xaxis = OLS(exog_i, RHS).fit()
        xaxis_resid = res_xaxis.resid
        yaxis_resid = res_yaxis.resid
        x_axis_endog_name = res_xaxis.model.endog_names
        y_axis_endog_name = res_yaxis.model.endog_names
        ax.plot(xaxis_resid, yaxis_resid, 'o', **kwargs)
        fitted_line = OLS(yaxis_resid, xaxis_resid).fit()

    fig = abline_plot(0, fitted_line.params[0], color='k', ax=ax)

    if x_axis_endog_name == 'y':  # for no names regression will just get a y
        x_axis_endog_name = 'x'  # this is misleading, so use x
    ax.set_xlabel("e(%s | X)" % x_axis_endog_name)
    ax.set_ylabel("e(%s | X)" % y_axis_endog_name)
    ax.set_title('Partial Regression Plot', **title_kwargs)

    #NOTE: if we want to get super fancy, we could annotate if a point is
    #clicked using this widget
    #http://stackoverflow.com/questions/4652439/
    #is-there-a-matplotlib-equivalent-of-matlabs-datacursormode/
    #4674445#4674445
    if obs_labels is True:
        if data is not None:
github statsmodels / statsmodels / statsmodels / examples / ex_feasible_gls_het.py View on Github external
nsample = 1000
    sig = 0.5
    x1 = np.linspace(0, 20, nsample)
    X = np.c_[x1, (x1-5)**2, np.ones(nsample)]
    np.random.seed(0)#9876789) #9876543)
    beta = [0.5, -0.015, 1.]
    y_true2 = np.dot(X, beta)
    w = np.ones(nsample)
    w[nsample*6//10:] = 4  #Note this is the squared value
    #y2[:nsample*6/10] = y_true2[:nsample*6/10] + sig*1. * np.random.normal(size=nsample*6/10)
    #y2[nsample*6/10:] = y_true2[nsample*6/10:] + sig*4. * np.random.normal(size=nsample*4/10)
    y2 = y_true2 + sig*np.sqrt(w)* np.random.normal(size=nsample)
    X2 = X[:,[0,2]]
    X2 = X

    res_ols = OLS(y2, X2).fit()
    print('OLS beta estimates')
    print(res_ols.params)
    print('OLS stddev of beta')
    print(res_ols.bse)
    print('\nWLS')
    mod0 = GLSHet2(y2, X2, exog_var=w)
    res0 = mod0.fit()
    print('new version')
    mod1 = GLSHet(y2, X2, exog_var=w)
    res1 = mod1.iterative_fit(2)
    print('WLS beta estimates')
    print(res1.params)
    print(res0.params)
    print('WLS stddev of beta')
    print(res1.bse)
    #compare with previous version GLSHet2, refactoring check
github statsmodels / statsmodels / statsmodels / stats / _diagnostic_other.py View on Github external
if x2 is not None:
        dm_excl = dm(x2, lin_pred)
        if mean_deriv is not None:
            # allow both and stack
            dm_excl = np.column_stack((dm_excl, mean_deriv))
    elif mean_deriv is not None:
        dm_excl = mean_deriv
    else:
        raise ValueError('either exog_extra or mean_deriv have to be provided')

    # TODO check for rank or redundant, note OLS calculates the rank
    k_constraint = dm_excl.shape[1]
    fittedvalues = res.predict()  # discrete has linpred instead of mean
    v = var_func(fittedvalues)
    std = np.sqrt(v)
    res_ols1 = OLS(res.resid_response / std, np.column_stack((dm_incl, dm_excl)) / std[:, None]).fit()

    # case: nonrobust assumes variance implied by distribution is correct
    c1 = res_ols1.ess
    pval1 = stats.chi2.sf(c1, k_constraint)
    #print c1, stats.chi2.sf(c1, 2)

    # case: robust to dispersion
    c2 = nobs * res_ols1.rsquared
    pval2 = stats.chi2.sf(c2, k_constraint)
    #print c2, stats.chi2.sf(c2, 2)

    # case: robust to heteroscedasticity
    from statsmodels.stats.multivariate_tools import partial_project
    pp = partial_project(dm_excl / std[:,None], dm_incl / std[:,None])
    resid_p = res.resid_response / std
    res_ols3 = OLS(np.ones(nobs), pp.resid * resid_p[:,None]).fit()