How to use the statsmodels.api.OLS function in statsmodels

To help you get started, we’ve selected a few statsmodels examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pandas-dev / pandas / pandas / stats / ols.py View on Github external
self._nw_lags = nw_lags
        self._nw_overlap = nw_overlap

        (self._y, self._x, self._weights, self._x_filtered,
         self._index, self._time_has_obs) = self._prepare_data()

        if self._weights is not None:
            self._x_trans = self._x.mul(np.sqrt(self._weights), axis=0)
            self._y_trans = self._y * np.sqrt(self._weights)
            self.sm_ols = sm.WLS(self._y.get_values(),
                                 self._x.get_values(),
                                 weights=self._weights.values).fit()
        else:
            self._x_trans = self._x
            self._y_trans = self._y
            self.sm_ols = sm.OLS(self._y.get_values(),
                                 self._x.get_values()).fit()
github statsmodels / statsmodels / docs / source / plots / graphics_gofplots_qqplot.py View on Github external
# -*- coding: utf-8 -*-
"""
Created on Sun May 06 05:32:15 2012

Author: Josef Perktold
editted by: Paul Hobson (2012-08-19)
"""
from scipy import stats
from matplotlib import pyplot as plt
import statsmodels.api as sm

#example from docstring
data = sm.datasets.longley.load(as_pandas=False)
data.exog = sm.add_constant(data.exog, prepend=True)
mod_fit = sm.OLS(data.endog, data.exog).fit()
res = mod_fit.resid

left = -1.8   #x coordinate for text insert

fig = plt.figure()

ax = fig.add_subplot(2, 2, 1)
sm.graphics.qqplot(res, ax=ax)
top = ax.get_ylim()[1] * 0.75
txt = ax.text(left, top, 'no keywords', verticalalignment='top')
txt.set_bbox(dict(facecolor='k', alpha=0.1))

ax = fig.add_subplot(2, 2, 2)
sm.graphics.qqplot(res, line='s', ax=ax)
top = ax.get_ylim()[1] * 0.75
txt = ax.text(left, top, "line='s'", verticalalignment='top')
github bartchr808 / Quantopian_Pairs_Trader / algo.py View on Github external
def hedge_ratio(Y, X):
    # Look into using Kalman Filter to calculate the hedge ratio
    X = sm.add_constant(X)
    model = sm.OLS(Y, X).fit()
    return model.params[1]
github statsmodels / statsmodels / statsmodels / examples / ex_pandas.py View on Github external
import matplotlib as mpl

from pandas import DataFrame, Series

import statsmodels.api as sm
import statsmodels.tsa.api as tsa
from statsmodels.tsa.arima_process import arma_generate_sample


data = sm.datasets.stackloss.load(as_pandas=False)
X = DataFrame(data.exog, columns=data.exog_name)
X['intercept'] = 1.
Y = Series(data.endog)

#Example: OLS
model = sm.OLS(Y, X)
results = model.fit()
print(results.summary())

print(results.params)
print(results.cov_params())

infl = results.get_influence()
print(infl.summary_table())

#raise

#Example RLM
huber_t = sm.RLM(Y, X, M=sm.robust.norms.HuberT())
hub_results = huber_t.fit()
print(hub_results.params)
print(hub_results.bcov_scaled)
github pgmpy / pgmpy / pgmpy / estimators / SEMEstimator.py View on Github external
civs = [civ for civ in civs if civ[0] not in ivs]

        reg_covars = []
        for var in self.model.graph.predecessors(X):
            if var in self.model.observed:
                reg_covars.append(var)

        # Get CIV conditionals
        civ_conditionals = []
        for civ in civs:
            civ_conditionals.extend(civ[1])

        # First stage regression.
        params = (
            sm.OLS(data.loc[:, X], data.loc[:, reg_covars + civ_conditionals])
            .fit()
            .params
        )

        data["X_pred"] = np.zeros(data.shape[0])
        for var in reg_covars:
            data.X_pred += params[var] * data.loc[:, var]

        summary = sm.OLS(
            data.loc[:, Y], data.loc[:, ["X_pred"] + civ_conditionals]
        ).fit()
        return summary.params["X_pred"], summary
github je-suis-tm / quant-trading / Oil Money project / Oil Money CAD.py View on Github external
'gold',
 'jpy',
 'cad'])


# In[6]:

#create r squared bar charts

var=locals()

for i in df.columns:
    if i!='cad':
            x=sm.add_constant(df[i])
            y=df['cad']
            m=sm.OLS(y,x).fit()
            var[str(i)]=m.rsquared
     
ax=plt.figure(figsize=(10,5)).add_subplot(111)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

width=0.7
colorlist=['#9499a6','#9499a6','#9499a6','#9499a6',
           '#9499a6','#9499a6','#9499a6','#582a20',
           '#be7052','#f2c083','#9499a6','#9499a6']

temp=list(df.columns)
for i in temp:
    if i!='cad':
        plt.bar(temp.index(i)+width,            
            var[str(i)],width=width,label=i,
github statsmodels / statsmodels / statsmodels / regression / process_regression.py View on Github external
def _get_start(self):

        # Use OLS to get starting values for mean structure parameters
        model = sm.OLS(self.endog, self.exog)
        result = model.fit()

        m = self.exog_scale.shape[1] + self.exog_smooth.shape[1]
        m += self.exog_noise.shape[1]

        return np.concatenate((result.params, np.zeros(m)))
github statsmodels / statsmodels / examples / example_gls.py View on Github external
"""Generalized Least Squares
"""

import statsmodels.api as sm
data = sm.datasets.longley.load()
data.exog = sm.add_constant(data.exog)

# The Longley dataset is a time series dataset
# Let's assume that the data is heteroskedastic and that we know
# the nature of the heteroskedasticity.  We can then define
# `sigma` and use it to give us a GLS model

# First we will obtain the residuals from an OLS fit

ols_resid = sm.OLS(data.endog, data.exog).fit().resid

# Assume that the error terms follow an AR(1) process with a trend
# resid[i] = beta_0 + rho*resid[i-1] + e[i]
# where e ~ N(0,some_sigma**2)
# and that rho is simply the correlation of the residuals
# a consistent estimator for rho is to regress the residuals
# on the lagged residuals

resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit()
print resid_fit.tvalues[1]
print resid_fit.pvalues[1]
# While we don't have strong evidence that the errors follow an AR(1)
# process we continue

rho = resid_fit.params[1]
github nyoka-pmml / nyoka / examples / statsmodels / exponential_smoothing / stats_models / examples / incomplete / wls_extended.py View on Github external
"""
Weighted Least Squares

example is extended to look at the meaning of rsquared in WLS,
at outliers, compares with RLM and a short bootstrap

"""
from __future__ import print_function
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

data = sm.datasets.ccard.load()
data.exog = sm.add_constant(data.exog, prepend=False)
ols_fit = sm.OLS(data.endog, data.exog).fit()

# perhaps the residuals from this fit depend on the square of income
incomesq = data.exog[:,2]
plt.scatter(incomesq, ols_fit.resid)
#@savefig wls_resid_check.png
plt.grid()


# If we think that the variance is proportional to income**2
# we would want to weight the regression by income
# the weights argument in WLS weights the regression by its square root
# and since income enters the equation, if we have income/income
# it becomes the constant, so we would want to perform
# this type of regression without an explicit constant in the design

#..data.exog = data.exog[:,:-1]
github LinkageIO / Camoco / camoco / COB.py View on Github external
else:
            genes_list = self.refgen.bootstrap_candidate_genes(
                locus_list, flank_limit=flank_limit, chain=True,
                include_parent_locus=True
            )
        self.log("Found {} candidate genes", len(genes_list))
        # Get global and local degree for candidates
        gdegree = self.global_degree(genes_list, trans_locus_only=True)
        ldegree = self.local_degree(genes_list, trans_locus_only=True)
        # Merge the columns
        degree = ldegree.merge(gdegree,left_index=True,right_index=True)
        degree.columns = ['local', 'global']
        degree = degree.sort_values(by='global')
        if include_regression:
            # Add the regression lines
            ols = sm.OLS(degree['local'], degree['global']).fit()
            degree['resid'] = ols.resid
            degree['fitted'] = ols.fittedvalues
            degree = degree.sort_values(by='resid',ascending=False)
        if iter_name is not None:
            degree['iter_name'] = iter_name
        return degree