Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_non_zero_indexed_semi_supervised_classificaton_classes():
iris = datasets.load_iris()
x = iris.data
y = iris.target
# Make labels non-zero indexed
y = y + 1
# Mark points as unlabeled
mask = np.random.choice(range(len(y)), size=len(y) // 2, replace=False)
y[mask] = -1
supervision_metric = 'sparse_categorical_crossentropy'
ivis_iris = Ivis(k=15, batch_size=16, epochs=5,
supervision_metric=supervision_metric)
with pytest.raises(ValueError):
embeddings = ivis_iris.fit_transform(x, y)
def test_non_consecutive_indexed_semi_supervised_classificaton_classes():
iris = datasets.load_iris()
x = iris.data
y = iris.target
# Make labels non-consecutive indexed
y[y == max(y)] = max(y) + 1
# Mark points as unlabeled
mask = np.random.choice(range(len(y)), size=len(y) // 2, replace=False)
y[mask] = -1
supervision_metric = 'sparse_categorical_crossentropy'
ivis_iris = Ivis(k=15, batch_size=16, epochs=5,
supervision_metric=supervision_metric)
with pytest.raises(ValueError):
embeddings = ivis_iris.fit_transform(x, y)
def test_correctly_indexed_semi_supervised_classificaton_classes():
iris = datasets.load_iris()
x = iris.data
y = iris.target
# Mark points as unlabeled
mask = np.random.choice(range(len(y)), size=len(y) // 2, replace=False)
y[mask] = -1
supervision_metric = 'sparse_categorical_crossentropy'
ivis_iris = Ivis(k=15, batch_size=16, epochs=5,
supervision_metric=supervision_metric)
embeddings = ivis_iris.fit_transform(x, y)
def test_h5_file(h5_filepath):
rows, dims = 258, 32
create_random_dataset(h5_filepath, rows, dims)
# Load data
test_index = rows // 5
X_train = HDF5Matrix(h5_filepath, 'data', start=0, end=test_index)
y_train = HDF5Matrix(h5_filepath, 'labels', start=0, end=test_index)
X_test = HDF5Matrix(h5_filepath, 'data', start=test_index, end=rows)
y_test = HDF5Matrix(h5_filepath, 'labels', start=test_index, end=rows)
# Train and transform with ivis
ivis_iris = Ivis(epochs=5, k=15, batch_size=16)
y_pred_iris = ivis_iris.fit_transform(X_train, shuffle_mode='batch')
y_pred = ivis_iris.transform(X_test)
assert y_pred.shape[0] == len(X_test)
assert y_pred.shape[1] == ivis_iris.embedding_dims
def test_AnnoyTripletGenerator():
neighbour_list = np.load('tests/data/test_knn_k4.npy')
iris = datasets.load_iris()
X = iris.data
batch_size = 32
data_generator = KnnTripletGenerator(X, neighbour_list,
batch_size=batch_size)
# Run generator thorugh one iteration of dataset and into the next
for i in range((X.shape[0] // batch_size) + 1):
batch = data_generator.__getitem__(i)
# Check that everything is the expected shape
assert isinstance(batch, tuple)
assert len(batch) == 2
assert len(batch[0]) == 3
assert len(batch[1]) <= batch_size
assert batch[0][0].shape[-1] == X.shape[-1]
def test_KnnTripletGenerator():
neighbour_list = np.load('tests/data/test_knn_k4.npy')
iris = datasets.load_iris()
X = iris.data
batch_size = 32
data_generator = KnnTripletGenerator(X, neighbour_list,
batch_size=batch_size)
# Run generator thorugh one iteration of dataset and into the next
for i in range((X.shape[0] // batch_size) + 1):
batch = data_generator.__getitem__(i)
# Check that everything is the expected shape
assert isinstance(batch, tuple)
assert len(batch) == 2
assert len(batch[0]) == 3
assert len(batch[1]) <= batch_size
assert batch[0][0].shape[-1] == X.shape[-1]
n_dims = X.shape[1]
chunk_size = X.shape[0] // cpu_count()
remainder = (X.shape[0] % cpu_count()) > 0
process_pool = []
results_queue = Queue()
# Split up the indices and assign processes for each chunk
i = 0
while (i + chunk_size) <= X.shape[0]:
process_pool.append(KNN_Worker(index_filepath, k, search_k, n_dims,
(i, i+chunk_size), results_queue))
i += chunk_size
if remainder:
process_pool.append(KNN_Worker(index_filepath, k, search_k, n_dims,
(i, X.shape[0]), results_queue))
try:
for process in process_pool:
process.start()
# Read from queue constantly to prevent it from becoming full
with tqdm(total=X.shape[0], disable=verbose < 1) as pbar:
neighbour_list = []
neighbour_list_length = len(neighbour_list)
while any(process.is_alive() for process in process_pool):
while not results_queue.empty():
neighbour_list.append(results_queue.get())
progress = len(neighbour_list) - neighbour_list_length
pbar.update(progress)
neighbour_list_length = len(neighbour_list)
def test_supervised_model_saving(model_filepath):
model = Ivis(k=15, batch_size=16, epochs=5,
supervision_metric='sparse_categorical_crossentropy')
iris = datasets.load_iris()
X = iris.data
Y = iris.target
model.fit(X, Y)
model.save_model(model_filepath, overwrite=True)
model_2 = Ivis()
model_2.load_model(model_filepath)
# Check that model embeddings are same
assert np.all(model.transform(X) == model_2.transform(X))
# Check that model supervised predictions are same
assert np.all(model.score_samples(X) == model_2.score_samples(X))
# Serializable dict eles same
assert model.__getstate__() == model_2.__getstate__()
# Check all weights are the same
for model_layer, model_2_layer in zip(model.encoder.layers,
model_2.encoder.layers):
model_layer_weights = model_layer.get_weights()
model_2_layer_weights = model_2_layer.get_weights()
for i in range(len(model_layer_weights)):
assert np.all(model_layer_weights[i] == model_2_layer_weights[i])
def test_ivis_model_saving(model_filepath):
model = Ivis(k=15, batch_size=16, epochs=5)
iris = datasets.load_iris()
X = iris.data
model.fit(X)
model.save_model(model_filepath)
model_2 = Ivis()
model_2.load_model(model_filepath)
# Check that model predictions are same
assert np.all(model.transform(X) == model_2.transform(X))
# Serializable dict eles same
assert model.__getstate__() == model_2.__getstate__()
# Check all weights are the same
for model_layer, model_2_layer in zip(model.encoder.layers,
def test_ivis_model_saving(model_filepath):
model = Ivis(k=15, batch_size=16, epochs=5)
iris = datasets.load_iris()
X = iris.data
model.fit(X)
model.save_model(model_filepath)
model_2 = Ivis()
model_2.load_model(model_filepath)
# Check that model predictions are same
assert np.all(model.transform(X) == model_2.transform(X))
# Serializable dict eles same
assert model.__getstate__() == model_2.__getstate__()
# Check all weights are the same
for model_layer, model_2_layer in zip(model.encoder.layers,
model_2.encoder.layers):
model_layer_weights = model_layer.get_weights()
model_2_layer_weights = model_2_layer.get_weights()
for i in range(len(model_layer_weights)):
assert np.all(model_layer_weights[i] == model_2_layer_weights[i])
# Check optimizer weights are the same