How to use the kraken.rpred.ocr_record function in kraken

To help you get started, we’ve selected a few kraken examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mittagessen / kraken / tests / test_serialization.py View on Github external
def setUp(self):
        with open(os.path.join(resources, 'records.json'), 'r') as fp:
            self.records = [rpred.ocr_record(**x) for x in json.load(fp)]
        self.validator = HocrValidator('standard')
github mittagessen / kraken / kraken / rpred.py View on Github external
display.
    Yields:
        An ocr_record containing the recognized text, absolute character
        positions, and confidence values for each character.
    """
    im_str = get_im_str(im)
    logger.info('Running recognizer on {} with {} lines'.format(im_str, len(bounds['boxes'])))
    logger.debug('Loading line transform')
    batch, channels, height, width = network.nn.input
    ts = generate_input_transforms(batch, height, width, channels, pad)

    for box, coords in extract_boxes(im, bounds):
        # check if boxes are non-zero in any dimension
        if sum(coords[::2]) == 0 or coords[3] - coords[1] == 0:
            logger.warning('bbox {} with zero dimension. Emitting empty record.'.format(coords))
            yield ocr_record('', [], [])
            continue
        # try conversion into tensor
        try:
            line = ts(box)
        except Exception:
            yield ocr_record('', [], [])
            continue
        # check if line is non-zero
        if line.max() == line.min():
            yield ocr_record('', [], [])
            continue

        preds = network.predict(line)
        # calculate recognized LSTM locations of characters
        # scale between network output and network input
        net_scale = line.shape[2]/network.outputs.shape[1]
github mittagessen / kraken / kraken / rpred.py View on Github external
for _, start, end, c in preds:
            if bounds['text_direction'].startswith('horizontal'):
                xmin = coords[0] + _scale_val(start, 0, box.size[0])
                xmax = coords[0] + _scale_val(end, 0, box.size[0])
                pos.append((xmin, coords[1], xmax, coords[3]))
            else:
                ymin = coords[1] + _scale_val(start, 0, box.size[1])
                ymax = coords[1] + _scale_val(start, 0, box.size[1])
                pos.append((coords[0], ymin, coords[2], ymax))
            conf.append(c)
        if bidi_reordering:
            logger.debug('BiDi reordering record.')
            yield bidi_record(ocr_record(pred, pos, conf))
        else:
            logger.debug('Emitting raw record')
            yield ocr_record(pred, pos, conf)
github mittagessen / kraken / kraken / rpred.py View on Github external
pred = ''.join(x[0] for x in preds)
        pos = []
        conf = []
        for _, start, end, c in preds:
            if bounds['text_direction'].startswith('horizontal'):
                xmin = coords[0] + _scale_val(start, 0, box.size[0])
                xmax = coords[0] + _scale_val(end, 0, box.size[0])
                pos.append((xmin, coords[1], xmax, coords[3]))
            else:
                ymin = coords[1] + _scale_val(start, 0, box.size[1])
                ymax = coords[1] + _scale_val(start, 0, box.size[1])
                pos.append((coords[0], ymin, coords[2], ymax))
            conf.append(c)
        if bidi_reordering:
            logger.debug('BiDi reordering record.')
            yield bidi_record(ocr_record(pred, pos, conf))
        else:
            logger.debug('Emitting raw record')
            yield ocr_record(pred, pos, conf)
github mittagessen / kraken / kraken / rpred.py View on Github external
bd.resolve_weak_types(storage)
    bd.resolve_neutral_types(storage, False)
    bd.resolve_implicit_levels(storage, False)
    for i, j in enumerate(record):
        storage['chars'][i]['record'] = j
    bd.reorder_resolved_levels(storage, False)
    bd.apply_mirroring(storage, False)
    prediction = ''
    cuts = []
    confidences = []
    for ch in storage['chars']:
        # code point may have been mirrored
        prediction = prediction + ch['ch']
        cuts.append(ch['record'][1])
        confidences.append(ch['record'][2])
    return ocr_record(prediction, cuts, confidences)
github mittagessen / kraken / kraken / rpred.py View on Github external
'boxes': map(lambda x: x[1], line)})):
            # skip if script is set to ignore
            if script_ignore is not None and script in script_ignore:
                logger.info('Ignoring {} line segment.'.format(script))
                continue
            # check if boxes are non-zero in any dimension
            if sum(coords[::2]) == 0 or coords[3] - coords[1] == 0:
                logger.warning('Run with zero dimension. Skipping.')
                continue
            # try conversion into tensor
            try:
                logger.debug('Preparing run.')
                line = ts[script](box)
            except Exception:
                logger.warning('Conversion of line {} failed. Skipping.'.format(coords))
                yield ocr_record('', [], [])
                continue

            # check if line is non-zero
            if line.max() == line.min():
                logger.warning('Empty run. Skipping.')
                yield ocr_record('', [], [])
                continue

            logger.debug('Forward pass with model {}'.format(script))
            preds = nets[script].predict(line)

            # calculate recognized LSTM locations of characters
            logger.debug('Convert to absolute coordinates')
            scale = box.size[0]/(len(nets[script].outputs)-2 * pad)
            pred = ''.join(x[0] for x in preds)
            pos = []
github mittagessen / kraken / kraken / rpred.py View on Github external
im_str = get_im_str(im)
    logger.info('Running {} multi-script recognizers on {} with {} lines'.format(len(nets), im_str, len(bounds['boxes'])))

    miss = [x[0] for x in bounds['boxes'] if not nets.get(x[0])]
    if miss:
        raise KrakenInputException('Missing models for scripts {}'.format(miss))

    # build dictionary for line preprocessing
    ts = {}
    for script, network in nets.items():
        logger.debug('Loading line transforms for {}'.format(script))
        batch, channels, height, width = network.nn.input
        ts[script] = generate_input_transforms(batch, height, width, channels, pad)

    for line in bounds['boxes']:
        rec = ocr_record('', [], [])
        for script, (box, coords) in zip(map(lambda x: x[0], line),
                                         extract_boxes(im, {'text_direction': bounds['text_direction'],
                                                            'boxes': map(lambda x: x[1], line)})):
            # skip if script is set to ignore
            if script_ignore is not None and script in script_ignore:
                logger.info('Ignoring {} line segment.'.format(script))
                continue
            # check if boxes are non-zero in any dimension
            if sum(coords[::2]) == 0 or coords[3] - coords[1] == 0:
                logger.warning('Run with zero dimension. Skipping.')
                continue
            # try conversion into tensor
            try:
                logger.debug('Preparing run.')
                line = ts[script](box)
            except Exception: