How to use the kraken.lib.dataset.generate_input_transforms function in kraken

To help you get started, we’ve selected a few kraken examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mittagessen / kraken / kraken / ketos.py View on Github external
# preparse input sizes from vgsl string to seed ground truth data set
    # sizes and dimension ordering.
    if not nn:
        spec = spec.strip()
        if spec[0] != '[' or spec[-1] != ']':
            raise click.BadOptionUsage('spec', 'VGSL spec {} not bracketed'.format(spec))
        blocks = spec[1:-1].split(' ')
        m = re.match(r'(\d+),(\d+),(\d+),(\d+)', blocks[0])
        if not m:
            raise click.BadOptionUsage('spec', 'Invalid input spec {}'.format(blocks[0]))
        batch, height, width, channels = [int(x) for x in m.groups()]
    else:
        batch, channels, height, width = nn.input
    try:
        transforms = generate_input_transforms(batch, height, width, channels, pad)
    except KrakenInputException as e:
        raise click.BadOptionUsage('spec', str(e))

    # disable automatic partition when given evaluation set explicitly
    if evaluation_files:
        partition = 1
    ground_truth = list(ground_truth)

    # merge training_files into ground_truth list
    if training_files:
        ground_truth.extend(training_files)

    if len(ground_truth) == 0:
        raise click.UsageError('No training data was provided to the train command. Use `-t` or the `ground_truth` argument.')

    np.random.shuffle(ground_truth)
github mittagessen / kraken / kraken / ketos.py View on Github external
raise click.UsageError('No evaluation data was provided to the test command. Use `-e` or the `test_set` argument.')

    def _get_text(im):
        with open(os.path.splitext(im)[0] + '.gt.txt', 'r') as fp:
            return get_display(fp.read())

    acc_list = []
    for p, net in nn.items():
        algn_gt: List[str] = []
        algn_pred: List[str] = []
        chars = 0
        error = 0
        message('Evaluating {}'.format(p))
        logger.info('Evaluating {}'.format(p))
        batch, channels, height, width = net.nn.input
        ts = generate_input_transforms(batch, height, width, channels, pad)
        with log.progressbar(test_set, label='Evaluating') as bar:
            for im_path in bar:
                i = ts(Image.open(im_path))
                text = _get_text(im_path)
                pred = net.predict_string(i)
                chars += len(text)
                c, algn1, algn2 = global_align(text, pred)
                algn_gt.extend(algn1)
                algn_pred.extend(algn2)
                error += c
        acc_list.append((chars-error)/chars)
        confusions, scripts, ins, dels, subs = compute_confusions(algn_gt, algn_pred)
        rep = render_report(p, chars, error, confusions, scripts, ins, dels, subs)
        logger.info(rep)
        message(rep)
    logger.info('Average accuracy: {:0.2f}%, (stddev: {:0.2f})'.format(np.mean(acc_list) * 100, np.std(acc_list) * 100))
github mittagessen / kraken / kraken / rpred.py View on Github external
KrakenInputException if the mapping between segmentation scripts and
        networks is incomplete.
    """
    im_str = get_im_str(im)
    logger.info('Running {} multi-script recognizers on {} with {} lines'.format(len(nets), im_str, len(bounds['boxes'])))

    miss = [x[0] for x in bounds['boxes'] if not nets.get(x[0])]
    if miss:
        raise KrakenInputException('Missing models for scripts {}'.format(miss))

    # build dictionary for line preprocessing
    ts = {}
    for script, network in nets.items():
        logger.debug('Loading line transforms for {}'.format(script))
        batch, channels, height, width = network.nn.input
        ts[script] = generate_input_transforms(batch, height, width, channels, pad)

    for line in bounds['boxes']:
        rec = ocr_record('', [], [])
        for script, (box, coords) in zip(map(lambda x: x[0], line),
                                         extract_boxes(im, {'text_direction': bounds['text_direction'],
                                                            'boxes': map(lambda x: x[1], line)})):
            # skip if script is set to ignore
            if script_ignore is not None and script in script_ignore:
                logger.info('Ignoring {} line segment.'.format(script))
                continue
            # check if boxes are non-zero in any dimension
            if sum(coords[::2]) == 0 or coords[3] - coords[1] == 0:
                logger.warning('Run with zero dimension. Skipping.')
                continue
            # try conversion into tensor
            try:
github mittagessen / kraken / kraken / rpred.py View on Github external
'horizontal-lr/rl/vertical-lr/rl'.
        pad (int): Extra blank padding to the left and right of text line.
                   Auto-disabled when expected network inputs are incompatible
                   with padding.
        bidi_reordering (bool): Reorder classes in the ocr_record according to
                                the Unicode bidirectional algorithm for correct
                                display.
    Yields:
        An ocr_record containing the recognized text, absolute character
        positions, and confidence values for each character.
    """
    im_str = get_im_str(im)
    logger.info('Running recognizer on {} with {} lines'.format(im_str, len(bounds['boxes'])))
    logger.debug('Loading line transform')
    batch, channels, height, width = network.nn.input
    ts = generate_input_transforms(batch, height, width, channels, pad)

    for box, coords in extract_boxes(im, bounds):
        # check if boxes are non-zero in any dimension
        if sum(coords[::2]) == 0 or coords[3] - coords[1] == 0:
            logger.warning('bbox {} with zero dimension. Emitting empty record.'.format(coords))
            yield ocr_record('', [], [])
            continue
        # try conversion into tensor
        try:
            line = ts(box)
        except Exception:
            yield ocr_record('', [], [])
            continue
        # check if line is non-zero
        if line.max() == line.min():
            yield ocr_record('', [], [])
github mittagessen / kraken / kraken / blla.py View on Github external
model = vgsl.TorchVGSLModel.load_model(model)
    model.eval()
    if mask:
        if mask.mode != '1' and not is_bitonal(mask):
            logger.error('Mask is not bitonal')
            raise KrakenInputException('Mask is not bitonal')
        mask = mask.convert('1')
        if mask.size != im.size:
            logger.error('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
            raise KrakenInputException('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
        logger.info('Masking enabled in segmenter.')
        mask = pil2array(mask)

    batch, channels, height, width = model.input
    transforms = dataset.generate_input_transforms(batch, height, width, channels, 0, valid_norm=False)
    res_tf = tf.Compose(transforms.transforms[:2])
    scal_im = res_tf(im).convert('L')

    with torch.no_grad():
        logger.debug('Running network forward pass')
        o = model.nn(transforms(im).unsqueeze(0))
    logger.debug('Upsampling network output')
    o = F.interpolate(o, size=scal_im.size[::-1])
    o = o.squeeze().numpy()
    logger.debug('Vectorizing network output')
    baselines = vectorize_lines(o)
    logger.debug('Polygonizing lines')
    lines = list(zip(baselines, calculate_polygonal_environment(scal_im, baselines)))
    logger.debug('Scaling vectorized lines')
    scale = np.divide(im.size, o.shape[:0:-1])
    lines = scale_polygonal_lines(lines, scale)
github mittagessen / kraken / kraken / contrib / heatmap_overlay.py View on Github external
"""
Produces semi-transparent neural segmenter output overlays
"""

import sys
import torch
from PIL import Image
from kraken.lib import vgsl, dataset
import torch.nn.functional as F
from os.path import splitext

model = vgsl.TorchVGSLModel.load_model(sys.argv[1])
model.eval()
batch, channels, height, width = model.input

transforms = dataset.generate_input_transforms(batch, height, width, channels, 0, valid_norm=False)

imgs = sys.argv[2:]
torch.set_num_threads(1)

for img in imgs:
    print(img)
    im = Image.open(img)
    with torch.no_grad():
        o = model.nn(transforms(im).unsqueeze(0))
        o = F.interpolate(o, size=im.size[::-1])
        o = o.squeeze().numpy()
    heat = Image.fromarray((o[1]*255).astype('uint8'))
    heat.save(splitext(img)[0] + '.heat.png')
    overlay = Image.new('RGBA', im.size, (0, 130, 200, 255))
    Image.composite(overlay, im.convert('RGBA'), heat).save(splitext(img)[0] + '.overlay.png')
    del o