How to use lmdb - 10 common examples

To help you get started, we’ve selected a few lmdb examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_too_small_max_file_size_init(self):
        with tempfile.TemporaryDirectory(prefix="tsinf_format_test") as tempdir:
            # Fail immediately if the max_size is so small we can't even create a file
            filename = os.path.join(tempdir, "samples.tmp")
            self.assertRaises(
                lmdb.MapFullError,
                formats.SampleData,
                path=filename,
                sequence_length=1,
                max_file_size=1,
            )
github NVIDIA / DIGITS / digits / dataset / images / generic / test_lmdb_creator.py View on Github external
def _write_to_lmdb(db, key, value):
    """
    Write (key,value) to db
    """
    success = False
    while not success:
        txn = db.begin(write=True)
        try:
            txn.put(key, value)
            txn.commit()
            success = True
        except lmdb.MapFullError:
            txn.abort()

            # double the map_size
            curr_limit = db.info()['map_size']
            new_limit = curr_limit*2
            db.set_mapsize(new_limit) # double it
github nigroup / nideep / nideep / iow / test_read_lmdb.py View on Github external
])

        img_data_str = ['\x08\x03\x10\x04\x18\x02"\x18\x01\x04\x07\n\r\x10\x13\x16\x02\x05\x08\x0b\x0e\x11\x14\x17\x03\x06\t\x0c\x0f\x12\x15\x18(\x01',
                        '\x08\x03\x10\x02\x18\x01"\x06\x10\x16\x11\x17\x12\x18(\x00']

        # write fake data to lmdb
        self.path_lmdb_num_ord = os.path.join(self.dir_tmp, 'imgs_num_ord_lmdb')
        db = lmdb.open(self.path_lmdb_num_ord, map_size=int(1e12))
        with db.begin(write=True) as in_txn:

            for idx, data_str in enumerate(img_data_str):
                in_txn.put('{:0>10d}'.format(idx), data_str)
        db.close()

        self.path_lmdb_rand_ord = os.path.join(self.dir_tmp, 'imgs_rand_ord_lmdb')
        db = lmdb.open(self.path_lmdb_rand_ord, map_size=int(1e12))
        with db.begin(write=True) as in_txn:

            for data_str in img_data_str:
                in_txn.put('{:0>10d}'.format(np.random.randint(10, 1000)), data_str)
        db.close()

        self.path_lmdb_non_num = os.path.join(self.dir_tmp, 'imgs_non_num_lmdb')
        db = lmdb.open(self.path_lmdb_non_num, map_size=int(1e12))
        with db.begin(write=True) as in_txn:

            for data_str in img_data_str:
                in_txn.put('key' + data_str, data_str)
        db.close()

        assert_not_equal(self.path_lmdb_num_ord, self.path_lmdb_rand_ord)
        assert_not_equal(self.path_lmdb_num_ord, self.path_lmdb_non_num)
github nigroup / nideep / nideep / iow / test_read_lmdb.py View on Github external
def test_num_entries_does_not_exist(self):

        path_lmdb = os.path.join(self.dir_tmp, 'test_num_entries_does_not_exist_lmdb')
        assert_false(os.path.exists(path_lmdb))
        assert_raises(lmdb.Error, r.num_entries, path_lmdb)
github tskit-dev / tsinfer / tests / test_formats.py View on Github external
def test_too_small_max_file_size_add(self):
        with tempfile.TemporaryDirectory(prefix="tsinf_format_test") as tempdir:
            base_size = 2 ** 16  # Big enough to allow the initial file to be created
            # Fail during adding a large amount of data
            with self.assertRaises(lmdb.MapFullError):
                filename = os.path.join(tempdir, "samples.tmp")
                with formats.SampleData(
                    path=filename, sequence_length=1, max_file_size=base_size
                ) as small_sample_file:
                    small_sample_file.add_site(
                        0,
                        alleles=["0", "1"],
                        genotypes=np.zeros(base_size, dtype=np.int8),
                    )
            # Work around https://github.com/tskit-dev/tsinfer/issues/201
            small_sample_file.data.store.close()
github clovaai / deep-text-recognition-benchmark / create_lmdb_dataset.py View on Github external
def createDataset(inputPath, gtFile, outputPath, checkValid=True):
    """
    Create LMDB dataset for training and evaluation.
    ARGS:
        inputPath  : input folder path where starts imagePath
        outputPath : LMDB output path
        gtFile     : list of image path and label
        checkValid : if true, check the validity of every image
    """
    os.makedirs(outputPath, exist_ok=True)
    env = lmdb.open(outputPath, map_size=1099511627776)
    cache = {}
    cnt = 1

    with open(gtFile, 'r', encoding='utf-8') as data:
        datalist = data.readlines()

    nSamples = len(datalist)
    for i in range(nSamples):
        imagePath, label = datalist[i].strip('\n').split('\t')
        imagePath = os.path.join(inputPath, imagePath)

        # # only use alphanumeric data
        # if re.search('[^a-zA-Z0-9]', label):
        #     continue

        if not os.path.exists(imagePath):
github dsindex / blog / make_lmdb.py View on Github external
parser = OptionParser()
    parser.add_option("--verbose", action="store_const", const=1, dest="verbose", help="verbose mode")
    parser.add_option("-d", "--db", dest="dbpath",help="db path", metavar="DB")
    (options, args) = parser.parse_args()

    if options.verbose == 1 : VERBOSE = 1

    db_path = options.dbpath
    if db_path == None :
        parser.print_help()
        sys.exit(1)

    startTime = time.time()

    # env == db coz max_dbs=0
    env = lmdb.Environment(db_path,map_size=24*(1023**3),subdir=False,readonly=False,create=False,max_dbs=0,lock=False)
    txn = lmdb.Transaction(env,db=None,write=True)
    
    linecount = 0
    while 1 :
        try : line = sys.stdin.readline()
        except KeyboardInterrupt : break
        if not line : break
        try : line = line.strip()
        except : continue
        if not line : continue
        linecount += 1
        if linecount % 1000 == 0 :
            sys.stderr.write("[linecount]" + "\t" + str(linecount) + "\n")

        key,value = line.split('\t',1)
        if not key or not value : continue
github dsindex / blog / search_lmdb.py View on Github external
parser = OptionParser()
    parser.add_option("--verbose", action="store_const", const=1, dest="verbose", help="verbose mode")
    parser.add_option("-d", "--db", dest="dbpath",help="db path", metavar="DB")
    (options, args) = parser.parse_args()

    if options.verbose == 1 : VERBOSE = 1

    db_path = options.dbpath
    if db_path == None :
        parser.print_help()
        sys.exit(1)


    # env == db coz max_dbs=0
    env = lmdb.Environment(db_path,map_size=24*(1023**3),subdir=False,readonly=True,create=False,max_dbs=0,lock=False)
    txn = lmdb.Transaction(env,db=None,write=False)

    startTime = time.time()
    
    linecount = 0
    while 1 :
        try : line = sys.stdin.readline()
        except KeyboardInterrupt : break
        if not line : break
        try : line = line.strip()
        except : continue
        if not line : continue
        linecount += 1
        if linecount % 1000 == 0 :
            sys.stderr.write("[linecount]" + "\t" + str(linecount) + "\n")

        key,value = line.split('\t',1)
github dsindex / blog / make_lmdb.py View on Github external
parser.add_option("--verbose", action="store_const", const=1, dest="verbose", help="verbose mode")
    parser.add_option("-d", "--db", dest="dbpath",help="db path", metavar="DB")
    (options, args) = parser.parse_args()

    if options.verbose == 1 : VERBOSE = 1

    db_path = options.dbpath
    if db_path == None :
        parser.print_help()
        sys.exit(1)

    startTime = time.time()

    # env == db coz max_dbs=0
    env = lmdb.Environment(db_path,map_size=24*(1023**3),subdir=False,readonly=False,create=False,max_dbs=0,lock=False)
    txn = lmdb.Transaction(env,db=None,write=True)
    
    linecount = 0
    while 1 :
        try : line = sys.stdin.readline()
        except KeyboardInterrupt : break
        if not line : break
        try : line = line.strip()
        except : continue
        if not line : continue
        linecount += 1
        if linecount % 1000 == 0 :
            sys.stderr.write("[linecount]" + "\t" + str(linecount) + "\n")

        key,value = line.split('\t',1)
        if not key or not value : continue
github NVIDIA / DIGITS / examples / text-classification / create_dataset.py View on Github external
def _write_batch_to_lmdb(db, batch):
    """
    Write a batch of (key,value) to db
    """
    try:
        with db.begin(write=True) as lmdb_txn:
            for key, datum in batch:
                lmdb_txn.put(key, datum.SerializeToString())
    except lmdb.MapFullError:
        # double the map_size
        curr_limit = db.info()['map_size']
        new_limit = curr_limit * 2
        try:
            db.set_mapsize(new_limit)  # double it
        except AttributeError as e:
            version = tuple(int(x) for x in lmdb.__version__.split('.'))
            if version < (0, 87):
                raise ImportError('py-lmdb is out of date (%s vs 0.87)' % lmdb.__version__)
            else:
                raise e
        # try again
        _write_batch_to_lmdb(db, batch)