How to use the nmslib.addDataPointBatch function in nmslib

To help you get started, we’ve selected a few nmslib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nmslib / nmslib / python_bindings / integration_tests / test_nmslib.py View on Github external
f = '/tmp/foo.txt'
    if not os.path.isfile(f):
        print('creating %s' % f)
        np.savetxt(f, np.random.rand(100000,1000), delimiter="\t")
        print('done')

    if fast:
        index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)
        with TimeIt('fast add data point'):
            data = read_data_fast(f)
            nmslib.addDataPointBatch(index, np.arange(len(data), dtype=np.int32), data)
        nmslib.freeIndex(index)

    if fast_batch:
        index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)
        with TimeIt('fast_batch add data point'):
            offset = 0
            for data in read_data_fast_batch(f, 10000):
                nmslib.addDataPointBatch(index, np.arange(len(data), dtype=np.int32) + offset, data)
                offset += data.shape[0]
        print('offset', offset)
        nmslib.freeIndex(index)
github nmslib / nmslib / python_bindings / integration_tests / test_nmslib.py View on Github external
QUERY_STRS = ["abc", "def", "ghik"]
    space_type = 'leven'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '.index'

    index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.OBJECT_AS_STRING,
                             nmslib.DistType.INT)

    if batch:
        print('DATA_STRS', DATA_STRS)
        positions = nmslib.addDataPointBatch(index, np.arange(len(DATA_STRS), dtype=np.int32), DATA_STRS)
    else:
        for id, data in enumerate(DATA_STRS):
            nmslib.addDataPoint(index, id, data)

    print('Let\'s print a few data entries')
    print('We have added %d data points' % nmslib.getDataPointQty(index))

    print("Distance between points (0,0) " + str(nmslib.getDistance(index, 0, 0)));
    print("Distance between points (1,1) " + str(nmslib.getDistance(index, 1, 1)));
    print("Distance between points (0,1) " + str(nmslib.getDistance(index, 0, 1)));
    print("Distance between points (1,0) " + str(nmslib.getDistance(index, 1, 0)));

    for i in range(0,min(MAX_PRINT_QTY,nmslib.getDataPointQty(index))):
        print(nmslib.getDataPoint(index,i))

    print('Let\'s invoke the index-build process')
github nmslib / nmslib / python_bindings / integration_tests / sparse_bench.py View on Github external
#space_type = 'cosinesimil_sparse'
    space_type = 'cosinesimil_sparse_fast'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '_sparse.index'
    if os.path.isfile(index_name):
        os.remove(index_name)
    index = nmslib.init(space_type,
                        space_param,
                        method_name,
                        nmslib.DataType.SPARSE_VECTOR,
                        nmslib.DistType.FLOAT)

    if batch:
        with TimeIt('batch add'):
            positions = nmslib.addDataPointBatch(index, np.arange(len(dataset), dtype=np.int32), data_matrix)
        print('positions', positions)
    else:
        d = []
        q = []
        with TimeIt('preparing'):
            for data in dataset:
                d.append([[i, v] for i, v in enumerate(data) if v > 0])
            for data in queryset:
                q.append([[i, v] for i, v in enumerate(data) if v > 0])
        with TimeIt('adding points'):
            for id, data in enumerate(d):
                nmslib.addDataPoint(index, id, data)

    print('Let\'s invoke the index-build process')

    index_param = ['NN=17', 'efConstruction=50', 'indexThreadQty=4']
github nmslib / nmslib / python_bindings / integration_tests / test_nmslib.py View on Github external
method_name = 'small_world_rand'
    index_name  = method_name + '.index'
    if os.path.isfile(index_name):
        os.remove(index_name)
    index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)

    start = time.time()
    if fast:
        data = read_data_fast('sample_dataset.txt')
        print('data.shape', data.shape)
        positions = nmslib.addDataPointBatch(index, np.arange(len(data), dtype=np.int32), data)
    else:
        for id, data in enumerate(read_data('sample_dataset.txt')):
            pos = nmslib.addDataPoint(index, id, data)
            if id != pos:
                print('id %s != pos %s' % (id, pos))
                sys.exit(1)
    end = time.time()
    print('added data in %s secs' % (end - start))

    print('Let\'s print a few data entries')
    print('We have added %d data points' % nmslib.getDataPointQty(index))

    print("Distance between points (0,0) " + str(nmslib.getDistance(index, 0, 0)));
    print("Distance between points (1,1) " + str(nmslib.getDistance(index, 1, 1)));
    print("Distance between points (0,1) " + str(nmslib.getDistance(index, 0, 1)));
    print("Distance between points (1,0) " + str(nmslib.getDistance(index, 1, 0)));
github nmslib / nmslib / python_bindings / unit_tests.py View on Github external
def test_add_points_batch5(self):
        positions = nmslib.addDataPointBatch(self.index,
                          np.array([0,1,2], dtype=np.int32),
                          ["string1", "string2", "string3"])
        nt.assert_array_equal(np.array([0,1,2], dtype=np.int32), positions)
github nmslib / nmslib / python_bindings / unit_tests.py View on Github external
def test_add_points_batch5(self):
        positions = nmslib.addDataPointBatch(self.index,
                          np.array([0,1,2], dtype=np.int32),
                          np.array([[0.34,0.54], [0.55,0.52], [0.21,0.68]], dtype=np.float32))
        nt.assert_array_equal(np.array([0,1,2], dtype=np.int32), positions)