Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@patch("pynndescent.NNDescent", wraps=pynndescent.NNDescent)
def test_random_state_being_passed_through(self, nndescent):
random_state = 1
knn_index = nearest_neighbors.NNDescent("euclidean", random_state=random_state)
knn_index.build(self.x1, k=30)
nndescent.assert_called_once()
check_mock_called_with_kwargs(nndescent, {"random_state": random_state})
k_neighbors = 40
try:
metric = clusteringFunction[metric]
except:
metric = 'euclidean'
try:
clusterExpression = clusteringFunction[clusterExpression]
except:
clusterExpression = False
data = df_expr.values.T if clusterExpression else X_pca.T
print('Searching for %s nearest neighbors'%(k_neighbors))
knn = pynndescent.NNDescent(data, metric=metric, n_neighbors=k_neighbors).query(data, k=k_neighbors)
print('k(=%s) nearest neighbors found. Constructing a NetworkX graph'%(k_neighbors))
A = np.zeros((len(knn[0]),len(knn[0])))
for i in range(len(knn[0])):
A[i, knn[0][i]] = knn[1][i]
G = nx.from_numpy_array(A)
print('Clustering the graph')
cellClusterIndex = pd.Series(community.best_partition(G)).sort_index().values
else:
cellClusterIndex = clusteringFunction(n_clusters=self.nClusters).fit(X_pca.T).labels_
return cellClusterIndex
try:
metric = self.clusteringFunction[metric]
except Exception as exception:
print(exception)
metric = 'euclidean'
try:
clusterExpression = self.clusteringFunction[clusterExpression]
except Exception as exception:
print(exception)
clusterExpression = False
data = self._df_expr.values.T if clusterExpression else df_xpca.values.T
print('Searching for %s nearest neighbors' % (k_neighbors), flush=True)
knn = pynndescent.NNDescent(data, metric=metric, n_neighbors=k_neighbors).query(data, k=k_neighbors)
print('k(=%s) nearest neighbors found. Constructing a NetworkX graph' % (k_neighbors), flush=True)
A = np.zeros((len(knn[0]),len(knn[0])))
for i in range(len(knn[0])):
A[i, knn[0][i]] = knn[1][i]
G = nx.from_numpy_array(A)
print('Clustering the graph', flush=True)
cellClusterIndex = pd.Series(community.best_partition(G)).sort_index().values
else:
data = df_xpca.values
cellClusterIndex = self.clusteringFunction(n_clusters=self.nClusters).fit(data.T).labels_.astype(float).astype(str)
print(np.unique(cellClusterIndex, return_counts=True))
def build(self, data, k):
# These values were taken from UMAP, which we assume to be sensible defaults
n_trees = 5 + int(round((data.shape[0]) ** 0.5 / 20))
n_iters = max(5, int(round(np.log2(data.shape[0]))))
# Numba takes a while to load up, so there's little point in loading it
# unless we're actually going to use it
import pynndescent
# UMAP uses the "alternative" algorithm, but that sometimes causes
# memory corruption, so use the standard one, which seems to work fine
self.index = pynndescent.NNDescent(
data,
n_neighbors=15,
metric=self.metric,
metric_kwds=self.metric_params,
random_state=self.random_state,
n_trees=n_trees,
n_iters=n_iters,
algorithm="standard",
max_candidates=60,
n_jobs=self.n_jobs,
)
indices, distances = self.index.query(data, k=k + 1, queue_size=1)
return indices[:, 1:], distances[:, 1:]