How to use the statsmodels.api.GLM function in statsmodels

To help you get started, we’ve selected a few statsmodels examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github SPFlow / SPFlow / src / spn / structure / leaves / conditional / MLE.py View on Github external
assert dataOut.shape[1] == 1, 'more than one output variable in scope?'

    if dataOut.shape[0] == 0:
        return

    if isinstance(node, Conditional_Gaussian):
        family = sm.families.Gaussian()
    elif isinstance(node, Conditional_Poisson):
        family = sm.families.Poisson()
    elif isinstance(node, Conditional_Bernoulli):
        family = sm.families.Binomial()
    else:
        raise Exception("Unknown conditional " + str(type(node)))

    dataIn = np.c_[dataIn, np.ones((dataIn.shape[0]))]
    node.weights = sm.GLM(dataOut, dataIn, family=family).fit().params
github mwaskom / seaborn / seaborn / plotobjs.py View on Github external
# Plot once to let matplotlib sort out the axis limits
    ax.plot(x1, x2, "o", **scatter_kws)

    # Find the plot limits
    x1min, x1max = ax.get_xlim()
    x2min, x2max = ax.get_ylim()

    # Make the grid for the contour plot
    x1_points = np.linspace(x1min, x1max, 100)
    x2_points = np.linspace(x2min, x2max, 100)
    xx1, xx2 = np.meshgrid(x1_points, x2_points)

    # Fit the model with an interaction
    X = np.c_[np.ones(x1.size), x1, x2, x1 * x2]
    if logistic:
        lm = sm.GLM(y, X, family=sm.families.Binomial()).fit()
    else:
        lm = sm.OLS(y, X).fit()

    # Evaluate the model on the grid
    eval = np.vectorize(lambda x1_, x2_: lm.predict([1, x1_, x2_, x1_ * x2_]))
    yhat = eval(xx1, xx2)

    # Draw the contour plot
    vmin = contour_kws.pop("vmin", min(y.min(), yhat.min()))
    vmax = contour_kws.pop("vmax", max(y.max(), yhat.max()))
    c = ax.contourf(xx1, xx2, yhat, levels, cmap=cmap,
                    vmin=vmin, vmax=vmax, **contour_kws)

    # Draw a colorbar, maybe
    if colorbar:
        bar = plt.colorbar(c)
github pysal / pysal / pysal / contrib / spint / count_base.py View on Github external
method              : string
                            estimation method for GLM framework; default is
                            itertaively weighted least sqaures (iwls)
                            "iwls" | "TODO - add other methods"

       Returns
       -------
       betas                : array
                              K+L+D+1 x 1; estimated parameters for
                              origin/destination/cost variables and constant
        '''
        if (framework.lower() == 'sm_glm'):
            
            if self.constant:
                self.X = sm.add_constant(X)
            results = sm.GLM(self.y, self.X, family = families.Poisson()).fit() 
            self.params = results.params
            self.se = results.bse
            self.tvals = results.tvalues
            self.fitted = results.fittedvalues
            self.fit_stats = {}
            self.fit_stats['aic'] = results.aic

        elif (framework.lower() == 'glm'):
            model = GLM(self.y, self.X, family = families.Poisson(),
                    constant=self.constant).fit()
           
            self.params = model.params
            self.aic = model.aic
            self.bic = model.bic
            self.deviance = model.deviance
            self.df_model = model.df_model
github mwaskom / seaborn / seaborn / plotobjs.py View on Github external
def _regress(x, y):
                        if logistic:
                            x_ = sm.add_constant(x, prepend=True)
                            fit = sm.GLM(y, x_,
                                         family=sm.families.Binomial()).fit()
                            reg = fit.predict(xx_)
                        else:
                            fit = np.polyfit(x, y, order)
                            reg = np.polyval(fit, xx)
                        return reg
github ratschlab / RiboDiff / src / rawdispersion.py View on Github external
explanatory = data.matrix
    librarySizes = np.hstack([data.libSizesRibo, data.libSizesRNA])

    for i in range(num):

        if i % 50 == 0:
            print '%i genes finished...' % i
        if i+1 == num:
            print '%i genes finished...' % num

        if sum(data.countRibo[i, :]) >= cntCutoff and sum(data.countRNA[i, :]) >= cntCutoff:
            response = np.hstack([data.countRibo[i, :], data.countRNA[i, :]])
            disper = 0.1
            for j in range(10):
                modNB  = sm.GLM(response, explanatory, family=sm.families.NegativeBinomial(alpha=disper), offset=np.log(librarySizes))
                #modNB = sm.NegativeBinomial(response, explanatory, loglike_method='nb2')
                result = modNB.fit()
                #print result.summary()
                #print result.mu
                disperBef = disper
                yhat = result.mu
                sign = -1.0
                res  = minimize_scalar(al.adj_loglikelihood, bounds=(0, 1), args=(explanatory, response, yhat, sign), tol=1e-5, method='Bounded')
                disper = res.x
                if abs(np.log(disper) - np.log(disperBef)) < 0.03:
                    break
            disperRaw[i] = disper

    data.disperRaw = disperRaw

    return data
github statsmodels / statsmodels / examples / python / glm.py View on Github external
data = sm.datasets.star98.load()
data.exog = sm.add_constant(data.exog, prepend=False)

#  The dependent variable is N by 2 (Success: NABOVE, Failure: NBELOW):

print(data.endog[:5, :])

#  The independent variables include all the other variables described
# above, as
#  well as the interaction terms:

print(data.exog[:2, :])

# ### Fit and summary

glm_binom = sm.GLM(data.endog, data.exog, family=sm.families.Binomial())
res = glm_binom.fit()
print(res.summary())

# ### Quantities of interest

print('Total number of trials:', data.endog[0].sum())
print('Parameters: ', res.params)
print('T-values: ', res.tvalues)

# First differences: We hold all explanatory variables constant at their
# means and manipulate the percentage of low income households to assess its
# impact on the response variables:

means = data.exog.mean(axis=0)
means25 = means.copy()
means25[0] = stats.scoreatpercentile(data.exog[:, 0], 25)
github statsmodels / statsmodels / examples / example_glm.py View on Github external
#Load data
#^^^^^^^^^
# In the example above, we printed the ``NOTE`` attribute to learn about the
# Star98 dataset. Statsmodels datasets ships with other useful information. For
# example:
print sm.datasets.scotland.DESCRLONG

# Load the data and add a constant to the exogenous variables:
data2 = sm.datasets.scotland.load()
data2.exog = sm.add_constant(data2.exog, prepend=False)
print data2.exog[:5, :]
print data2.endog[:5]

#Fit and summary
#^^^^^^^^^^^^^^^
glm_gamma = sm.GLM(data2.endog, data2.exog, family=sm.families.Gamma())
glm_results = glm_gamma.fit()
print glm_results.summary()

#GLM: Gaussian distribution with a noncanonical link
#---------------------------------------------------
#Artificial data
#^^^^^^^^^^^^^^^
nobs2 = 100
x = np.arange(nobs2)
np.random.seed(54321)
X = np.column_stack((x, x**2))
X = sm.add_constant(X, prepend=False)
lny = np.exp(-(.03 * x + .0001 * x**2 - 1.0)) + .001 * np.random.rand(nobs2)

#Fit and summary
#^^^^^^^^^^^^^^^
github ratschlab / RiboDiff / src / ribodiff / rawdisp.py View on Github external
sys.stdout.flush()

        if i % 50 == 0:
            print '\r%i genes finished...' % i ,
        if i+1 == num:
            print '\r%i genes finished.' % num

        if sum(data.countRibo[i, :] / data.libSizesRibo) >= cntCutoff and sum(data.countRna[i, :] / data.libSizesRna) >= cntCutoff:

            response = np.hstack([data.countRibo[i, :], data.countRna[i, :]])

            disp = opts.dispInitial

            for k in range(10):
                try:
                    modNB  = sm.GLM(response, explanatory, family=sm.families.NegativeBinomial(alpha=disp), offset=np.log(librarySizes))
                    result = modNB.fit()

                    dispBef = disp
                    yhat = result.mu
                    sign = -1.0
                    res = minimize_scalar(al.adj_loglikelihood_scalar, args=(explanatory, response, yhat, sign), method='Bounded', bounds=(0, 10.0), tol=1e-5)
                    disp = res.x

                    if abs(np.log(disp) - np.log(dispBef)) < 1e-4:
                        dispRaw[i] = disp
                        dispRawConv[i] = True
                        muRaw[i, :] = yhat
                        muRawRibo[i, :] = yhat[data.idxRibo]
                        muRawRna[i, :] = yhat[data.idxRna]
                        break
                    elif k == 9: