Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def fit(self, X, y=None, **kwargs):
"""Compute k-modes clustering.
Parameters
----------
X : array-like, shape=[n_samples, n_features]
"""
X = pandas_to_numpy(X)
random_state = check_random_state(self.random_state)
self._enc_cluster_centroids, self._enc_map, self.labels_, self.cost_, \
self.n_iter_, self.epoch_costs_ = k_modes(
X,
self.n_clusters,
self.max_iter,
self.cat_dissim,
self.init,
self.n_init,
self.verbose,
random_state,
self.n_jobs,
)
return self
def fit(self, X, y=None, categorical=None):
"""Compute k-prototypes clustering.
Parameters
----------
X : array-like, shape=[n_samples, n_features]
categorical : Index of columns that contain categorical data
"""
if categorical is not None:
assert isinstance(categorical, (int, list, tuple)), "The 'categorical' \
argument needs to be an integer with the index of the categorical \
column in your data, or a list or tuple of several of them, \
but it is a {}.".format(type(categorical))
X = pandas_to_numpy(X)
random_state = check_random_state(self.random_state)
# If self.gamma is None, gamma will be automatically determined from
# the data. The function below returns its value.
self._enc_cluster_centroids, self._enc_map, self.labels_, self.cost_, \
self.n_iter_, self.epoch_costs_, self.gamma = k_prototypes(
X,
categorical,
self.n_clusters,
self.max_iter,
self.num_dissim,
self.cat_dissim,
self.gamma,
self.init,
self.n_init,
self.verbose,
categorical : Indices of columns that contain categorical data
Returns
-------
labels : array, shape [n_samples,]
Index of the cluster each sample belongs to.
"""
assert hasattr(self, '_enc_cluster_centroids'), "Model not yet fitted."
if categorical is not None:
assert isinstance(categorical, (int, list, tuple)), "The 'categorical' \
argument needs to be an integer with the index of the categorical \
column in your data, or a list or tuple of several of them, \
but it is a {}.".format(type(categorical))
X = pandas_to_numpy(X)
Xnum, Xcat = _split_num_cat(X, categorical)
Xnum, Xcat = check_array(Xnum), check_array(Xcat, dtype=None)
Xcat, _ = encode_features(Xcat, enc_map=self._enc_map)
return _labels_cost(Xnum, Xcat, self._enc_cluster_centroids,
self.num_dissim, self.cat_dissim, self.gamma)[0]
New data to predict.
Returns
-------
labels : array, shape [n_samples,]
Index of the cluster each sample belongs to.
"""
assert hasattr(self, '_enc_cluster_centroids'), "Model not yet fitted."
if self.verbose and self.cat_dissim == ng_dissim:
print("Ng's dissimilarity measure was used to train this model, "
"but now that it is predicting the model will fall back to "
"using simple matching dissimilarity.")
X = pandas_to_numpy(X)
X = check_array(X, dtype=None)
X, _ = encode_features(X, enc_map=self._enc_map)
return _labels_cost(X, self._enc_cluster_centroids, self.cat_dissim)[0]