Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def cluster_data(self, row_distance="euclidean", row_linkage="single", axis="row", column_distance="euclidean", column_linkage="ward"):
"""Performs clustering according to the given parameters.
@datatype - numeric/binary
@row_distance/column_distance - see. DISTANCES variable
@row_linkage/column_linkage - see. LINKAGES variable
@axis - row/both
"""
print("Clustering rows:", row_distance, row_linkage)
self.clustering_axis = axis
row_linkage = str(row_linkage)
if row_linkage in RAW_LINKAGES:
self.clustering = fastcluster.linkage(self.data, method=row_linkage, metric=row_distance)
else:
self.distance_vector = fastcluster.pdist(self.data, row_distance)
if self.datatype == "numeric" and not row_distance in DISTANCES[self.datatype]:
raise Exception("".join(["When clustering numeric data you must choose from these distance measures: ", ", ".join(DISTANCES[self.datatype])]))
elif (self.datatype == "binary" or self.datatype == "nominal") and not row_distance in DISTANCES[self.datatype]:
raise Exception("".join(["When clustering binary or nominal data you must choose from these distance measures: ", ", ".join(DISTANCES[self.datatype])]))
self.clustering = fastcluster.linkage(self.distance_vector, method=str(row_linkage))
if not self.missing_value is False:
self.data = self.__return_missing_values__(self.data, self.missing_values_indexes)
self.column_clustering = []
if axis == "both" and len(self.data[0]) > 2:
print("Clustering columns:", column_distance, column_linkage)
self.__cluster_columns__(column_distance, column_linkage)