Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def Nagelkerke_Rsquare(self,columns):
cols=columns.copy()
cols.append('intercept')
log_clf=sm.Logit(self.data[self.target],self.data[cols])
N=self.data.shape[0]
# result=log_clf.fit(disp=0,method='powell')
try:
result=log_clf.fit(disp=0)
except:
result=log_clf.fit(disp=0,method='powell')
llf=result.llf
llnull=result.llnull
lm=np.exp(llf)
lnull=np.exp(llnull)
naglkerke_rsquare=(1-(lnull/lm)**(2/N))/(1-lnull**(2/N))
return naglkerke_rsquare
def McFadden_RSquare(self,columns):
cols=columns.copy()
cols.append('intercept')
# print("model columns :",cols)
log_clf=sm.Logit(self.data[self.target],self.data[cols])
# result=log_clf.fit(disp=0,method='powell')
try:
result=log_clf.fit(disp=0)
except:
result=log_clf.fit(disp=0,method='powell')
mcfadden_rsquare=result.prsquared
return mcfadden_rsquare
data_file_work = norm.load_csv(dataFile)
norm.delete_unknowns(data_file_work)
norm.col_delete(data_file_work, 0)
norm.col_replace(data_file_work, 9, 4, 1, 0)
for i in range(0, 9):
norm.make_col_numeric(data_file_work, i)
df = pd.DataFrame(data_file_work)
df.columns = ["clump_thickness", "size_uniformity", "shape_uniformity", "marginal_adhesion", "epithelial_size",
"bare_nucleoli", "bland_chromatin", "normal_nucleoli", "mitoses", "class"]
train_cols = df.columns[0:9]
# Perform the logistic regression.
logit = sm.Logit(df['class'], df[train_cols])
# fit the model
result = logit.fit()
# Display the results.
print(result.summary())
Run logistic regression model to predict whether a signed up driver ever actually drove.
:param input_df: Data frame prepared for statsmodels regression
:type input_df: pd.DataFrame
:return: AUC for model generated
:rtype: float
"""
# Run model on all observations
# Use dmatrices to format data
logging.info('Running model w/ description: %s' %model_description)
logging.debug('Train df: \n%s' % train.describe())
logging.debug('Test df: \n%s' % test.describe())
y_train, X_train = dmatrices(model_description, data=train, return_type='dataframe', NA_action='drop')
y_test, X_test = dmatrices(model_description, data=test, return_type='dataframe', NA_action='drop')
# Create, fit model
mod = sm.Logit(endog=y_train, exog=X_train)
res = mod.fit(method='bfgs', maxiter=100)
# Output model summary
print train['city_name'].value_counts()
print train['signup_channel'].value_counts()
print res.summary()
# Create, output AUC
predicted = res.predict(X_test)
auc = roc_auc_score(y_true=y_test, y_score=predicted)
print 'AUC for 20%% holdout: %s' %auc
# Return AUC for model generated
return auc
x2 = []
y = []
for line in lines:
line = line.replace("\n", "")
vals = line.split(",")
x1.append(float(vals[0]))
x2.append(float(vals[1]))
y.append(int(vals[2]))
x1 = np.array(x1)
x2 = np.array(x2)
y = np.array(y)
x = np.vstack([x1, x2]).T
logit = sm.Logit(y, x)
result = logit.fit()
result.params
result.predict([45.0, 85.0])
# Build X, Y from 2nd file
f = open('ex2data2.txt')
lines = f.readlines()
x1 = []
x2 = []
y = []
for line in lines:
line = line.replace("\n", "")
vals = line.split(",")
x1.append(float(vals[0]))
x2.append(float(vals[1]))
def run_logistic_regression(df):
# Logistic regression
X = df['pageviews_cumsum']
X = sm.add_constant(X)
y = df['is_conversion']
logit = sm.Logit(y, X)
logistic_regression_results = logit.fit()
print(logistic_regression_results.summary())
return logistic_regression_results
def baseModel(data):
"""
原有模型
"""
formula = "label_code ~ education_num + capital_gain + capital_loss + hours_per_week"
model = sm.Logit.from_formula(formula, data=data)
re = model.fit()
return re
X_val = val_split[feature_set].values.astype(float)
mean = np.mean(X_train, 0)
std = np.std(X_train, 0)
X_train -= mean
X_train /= std
X_val -= mean
X_val /= std
train_split[feature_set] = X_train
val_split[feature_set] = X_val
if not regularization:
model = sm.Logit(train_split.Progressor.values, sm.add_constant(X_train))
clf = model.fit(disp=0)
p_val = clf.predict(sm.add_constant(X_val)).flatten().tolist()
else:
clf = LogisticRegression(C=C, random_state=seed, solver='lbfgs')
clf.fit(X_train, train_split.Progressor.values)
p_val = clf.predict_proba(X_val)[:, 1].flatten().tolist()
folds_means_stds.append([mean, std])
folds_predicts.extend(p_val)
folds_gt.extend(val_split.Progressor.values.flatten().tolist())
folds_models.append(clf)
auc = metric(folds_gt, folds_predicts)
cv_scores.append(auc)
models.append(folds_models)
means_stds.append(folds_means_stds)