How to use hnswlib - 6 common examples

To help you get started, we’ve selected a few hnswlib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github semi-technologies / weaviate / adapters / repos / db / vector / hnsw / test_recall_hnswlib.py View on Github external
with open("recall_vectors.json", 'r') as f:
    data = json.load(f)

with open("recall_queries.json", 'r') as f:
    queries = json.load(f)

with open("recall_truths.json", 'r') as f:
    truths = json.load(f)

num_elements = len(data)
dim = len(data[0])
data_labels = np.arange(num_elements)

# Declaring index
p = hnswlib.Index(space = 'cosine', dim = dim) # possible options are l2, cosine or ip

# Initializing index - the maximum number of elements should be known beforehand
p.init_index(max_elements = num_elements, ef_construction = 2000, M = 100)

before = time.time()
# Element insertion (can be called several times):
p.add_items(data, data_labels)
print("import took {}".format(time.time() - before))

# Controlling the recall by setting ef:
p.set_ef(100) # ef should always be > k

# Query dataset, k - number of closest elements (returns 2 numpy arrays)
results, distances = p.knn_query(queries, k = 1)

relevant=0
github theislab / scvelo / scvelo / preprocessing / neighbors.py View on Github external
def fit(self, X, metric="l2", M=16, ef=100, ef_construction=100, random_state=0):
        try:
            import hnswlib
        except ImportError:
            print(
                "In order to use fast approx neighbor search, "
                "you need to `pip install hnswlib`\n"
            )

        ef_c, ef = max(ef_construction, self.n_neighbors), max(self.n_neighbors, ef)
        metric = "l2" if metric == "euclidean" else metric

        X = X.A if issparse(X) else X
        ns, dim = X.shape

        knn = hnswlib.Index(space=metric, dim=dim)
        knn.init_index(
            max_elements=ns, ef_construction=ef_c, M=M, random_seed=random_state
        )
        knn.add_items(X)
        knn.set_ef(ef)

        knn_indices, knn_distances = knn.knn_query(
            X, k=self.n_neighbors, num_threads=self.num_threads
        )

        n_neighbors = self.n_neighbors

        if metric == "l2":
            knn_distances = np.sqrt(knn_distances)

        self.distances, self.connectivities = compute_connectivities_umap(
github nmslib / hnswlib / examples / example.py View on Github external
print("Adding first batch of %d elements" % (len(data1)))
p.add_items(data1)

# Query the elements for themselves and measure recall:
labels, distances = p.knn_query(data1, k=1)
print("Recall for the first batch:", np.mean(labels.reshape(-1) == np.arange(len(data1))), "\n")

# Serializing and deleting the index:
index_path='first_half.bin'
print("Saving index to '%s'" % index_path)
p.save_index("first_half.bin")
del p

# Reiniting, loading the index
p = hnswlib.Index(space='l2', dim=dim)  # the space can be changed - keeps the data, alters the distance function.

print("\nLoading index from 'first_half.bin'\n")

# Increase the total capacity (max_elements), so that it will handle the new data
p.load_index("first_half.bin", max_elements = num_elements)

print("Adding the second batch of %d elements" % (len(data2)))
p.add_items(data2)

# Query the elements for themselves and measure recall:
labels, distances = p.knn_query(data, k=1)
print("Recall for two batches:", np.mean(labels.reshape(-1) == np.arange(len(data))), "\n")
github CLUEbenchmark / PyCLUE / pyclue / tf1 / tasks / text_matching / siamese / predict.py View on Github external
def _load_cache(self):
        self.ef = self.model_config.get('ef')
        self.cache_texts, self.cache_embeddings, self.cache_labels = self.get_embedding_from_file(self.cache_file)
        self.num_cache, self.embedding_dim = self.cache_embeddings.shape

        # application of hnswlib
        # declaring index
        self.index_nms = hnswlib.Index(space='cosine', dim=self.embedding_dim)
        # initializing index - the maximum number of elements should be know beforehand
        self.index_nms.init_index(max_elements=self.num_cache, ef_construction=100, M=100)
        # element insertion (can be called several times)
        self.index_nms.add_items(data=self.cache_embeddings, ids=range(self.num_cache))
        # self.index_nms.save_index(os.path.join(self.model_file, 'cache.index'))
        # controlling the recall by setting ef:
        self.index_nms.set_ef(ef=self.ef)  # ef should always be > k (knn)
github CLUEbenchmark / PyCLUE / pyclue / tf1 / tasks / text_matching / siamese / train.py View on Github external
def load_cache(self, cache_file=None):
        if cache_file:
            self.cache_file = cache_file
        if self.cache_file:
            self.cache_texts, self.cache_embeddings, self.cache_labels = self.get_embedding_from_file(cache_file)
            self.num_cache, self.embedding_dim = self.cache_embeddings.shape

            # application of hnswlib
            # declaring index
            self.index_nms = hnswlib.Index(space='cosine', dim=self.embedding_dim)
            # initializing index - the maximum number of elements should be know beforehand
            self.index_nms.init_index(max_elements=self.num_cache, ef_construction=100, M=100)
            # element insertion (can be called several times)
            self.index_nms.add_items(data=self.cache_embeddings, ids=range(self.num_cache))
            self.index_nms.save_index(os.path.join(self.pb_model_file, 'cache.index'))
            # controlling the recall by setting ef:
            self.index_nms.set_ef(ef=self.ef)  # ef should always be > k (knn)
            # copy cache.txt to pb_model_file
            shutil.copy(cache_file, os.path.join(self.pb_model_file, 'cache.txt'))
github ThomasDelteil / VisualSearch_MXNet / mms / visualservice.py View on Github external
os.makedirs(data_dir)
        index_url = os.environ.get('INDEX_URL', INDEX_URL)
        idx_to_ASIN_url = os.environ.get('IDX_ASIN_URL', IDX_ASIN_URL)
        ASIN_to_data_url = os.environ.get('ASIN_DATA_URL', ASIN_DATA_URL)

        mx.test_utils.download(index_url, dirname=data_dir)
        mx.test_utils.download(idx_to_ASIN_url, dirname=data_dir)
        mx.test_utils.download(ASIN_to_data_url, dirname=data_dir)
        ############################################
        
        ############################################
        logging.info('Loading Resources files')
        
        self.idx_ASIN = pickle.load(open(os.path.join(data_dir, 'idx_ASIN.pkl'), 'rb'))
        self.ASIN_data = pickle.load(open(os.path.join(data_dir,'ASIN_data.pkl'), 'rb'))        
        self.p = hnswlib.Index(space = 'l2', dim = EMBEDDING_SIZE)
        self.p.load_index(os.path.join(data_dir,'index.idx'))
        ############################################
        
        logging.info('Resources files loaded')
        
        
        self.p.set_ef(EF)        
        self.k = K

hnswlib

hnswlib

Apache-2.0
Latest version published 12 months ago

Package Health Score

58 / 100
Full package analysis

Popular hnswlib functions