Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def Transform_KM_Features(training_data, training_labels, test_data, km_max=0):
seed = 99
preds = list(training_data)
target = training_labels.name
train_index = training_data.index
test_index = test_data.index
if km_max == 0:
km_max = int(np.log10(training_data.shape[0])+0.49)
if km_max <= 2:
k_max = 2
else:
k_max = copy.deepcopy(km_max)
kmf = KMeansFeaturizer(k=k_max, target_scale=0, random_state=seed)
kmf_hint = kmf.fit(training_data, training_labels)
training_cluster_features = kmf_hint.transform(training_data)
test_cluster_features = kmf_hint.transform(test_data)
npx = np.c_[training_data, training_labels.values]
training_with_cluster = np.c_[npx,training_cluster_features]
test_with_cluster = np.c_[test_data, test_cluster_features]
train_with_cluster_df = pd.DataFrame(training_with_cluster,index=train_index,
columns=preds+[target,'cluster'])
test_with_cluster_df = pd.DataFrame(test_with_cluster,index=test_index,
columns=preds+['cluster'])
return train_with_cluster_df, test_with_cluster_df