Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _kprototypes(k, n_init, n_jobs, seed):
KPrototypes(n_clusters=k, init='Huang', n_init=n_init, n_jobs=n_jobs,
random_state=seed) \
.fit(data[:N_kproto, :], categorical=list(range(M - MN, M)))
def kproto(self): # TODO- solve clustering issue with PCA + K-means
cluster_data = self.data
opt_k = self.silouhette_analysis(cluster_data, prototype=True)
kp = KPrototypes(n_clusters=opt_k)
kp.fit(cluster_data, categorical=self.categorical_features)
labels = kp.predict(
cluster_data, categorical=self.categorical_features)
cluster_data['labels'] = labels
self.data_clustered = cluster_data
return cluster_data
def huang():
KPrototypes(n_clusters=K, init='Huang', n_init=1, verbose=2)\
.fit_predict(data, categorical=list(range(M - MN, M)))
#!/usr/bin/env python
import numpy as np
from kmodes.kprototypes import KPrototypes
# stocks with their market caps, sectors and countries
syms = np.genfromtxt('stocks.csv', dtype=str, delimiter=',')[:, 0]
X = np.genfromtxt('stocks.csv', dtype=object, delimiter=',')[:, 1:]
X[:, 0] = X[:, 0].astype(float)
kproto = KPrototypes(n_clusters=4, init='Cao', verbose=2)
clusters = kproto.fit_predict(X, categorical=[1, 2])
# Print cluster centroids of the trained model.
print(kproto.cluster_centroids_)
# Print training statistics
print(kproto.cost_)
print(kproto.n_iter_)
for s, c in zip(syms, clusters):
print("Symbol: {}, cluster:{}".format(s, c))
def __init__(self, n_clusters=8, max_iter=100, num_dissim=euclidean_dissim,
cat_dissim=matching_dissim, init='Huang', n_init=10, gamma=None,
verbose=0, random_state=None, n_jobs=1):
super(KPrototypes, self).__init__(n_clusters, max_iter, cat_dissim, init,
verbose=verbose, random_state=random_state,
n_jobs=n_jobs)
self.num_dissim = num_dissim
self.gamma = gamma
self.n_init = n_init
if isinstance(self.init, list) and self.n_init > 1:
if self.verbose:
print("Initialization method is deterministic. "
"Setting n_init to 1.")
self.n_init = 1
def cao():
KPrototypes(n_clusters=K, init='Cao', verbose=2)\
.fit_predict(data, categorical=list(range(M - MN, M)))