How to use the kraken.lib.util.pil2array function in kraken

To help you get started, we’ve selected a few kraken examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mittagessen / kraken / kraken / blla.py View on Github external
"""
    im_str = get_im_str(im)
    logger.info('Segmenting {}'.format(im_str))

    model = vgsl.TorchVGSLModel.load_model(model)
    model.eval()
    if mask:
        if mask.mode != '1' and not is_bitonal(mask):
            logger.error('Mask is not bitonal')
            raise KrakenInputException('Mask is not bitonal')
        mask = mask.convert('1')
        if mask.size != im.size:
            logger.error('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
            raise KrakenInputException('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
        logger.info('Masking enabled in segmenter.')
        mask = pil2array(mask)

    batch, channels, height, width = model.input
    transforms = dataset.generate_input_transforms(batch, height, width, channels, 0, valid_norm=False)
    res_tf = tf.Compose(transforms.transforms[:2])
    scal_im = res_tf(im).convert('L')

    with torch.no_grad():
        logger.debug('Running network forward pass')
        o = model.nn(transforms(im).unsqueeze(0))
    logger.debug('Upsampling network output')
    o = F.interpolate(o, size=scal_im.size[::-1])
    o = o.squeeze().numpy()
    logger.debug('Vectorizing network output')
    baselines = vectorize_lines(o)
    logger.debug('Polygonizing lines')
    lines = list(zip(baselines, calculate_polygonal_environment(scal_im, baselines)))
github mittagessen / kraken / kraken / linegen.py View on Github external
im (PIL.Image): Input image
        distort (float):
        sigma (float):
        eps (float):
        delta (float):

    Returns:
        PIL.Image in mode 'L'
    """
    w, h = im.size
    # XXX: determine correct output shape from transformation matrices instead
    # of guesstimating.
    logger.debug(u'Pasting source image into canvas')
    image = Image.new('L', (int(1.5*w), 4*h), 255)
    image.paste(im, (int((image.size[0] - w) / 2), int((image.size[1] - h) / 2)))
    line = pil2array(image.convert('L'))

    # shear in y direction with factor eps * randn(), scaling with 1 + eps *
    # randn() in x/y axis (all offset at d)
    logger.debug(u'Performing affine transformation')
    m = np.array([[1 + eps * np.random.randn(), 0.0], [eps * np.random.randn(), 1.0 + eps * np.random.randn()]])
    c = np.array([w/2.0, h/2])
    d = c - np.dot(m, c) + np.array([np.random.randn() * delta, np.random.randn() * delta])
    line = affine_transform(line, m, offset=d, order=1, mode='constant', cval=255)

    hs = gaussian_filter(np.random.randn(4*h, int(1.5*w)), sigma)
    ws = gaussian_filter(np.random.randn(4*h, int(1.5*w)), sigma)
    hs *= distort/np.amax(hs)
    ws *= distort/np.amax(ws)

    def _f(p):
        return (p[0] + hs[p[0], p[1]], p[1] + ws[p[0], p[1]])
github mittagessen / kraken / kraken / linegen.py View on Github external
For parameter meanings consult [1].

    Args:
        im (PIL.Image): Input image
        eta (float):
        alpha (float):
        beta (float):
        alpha_0 (float):
        beta_0 (float):

    Returns:
        PIL.Image in mode '1'
    """
    logger.debug(u'Inverting and normalizing input image')
    im = pil2array(im)
    im = np.amax(im)-im
    im = im*1.0/np.amax(im)

    logger.debug(u'Calculating foreground distance transform')
    fg_dist = distance_transform_cdt(1-im, metric='taxicab')
    logger.debug(u'Calculating flip to white probability')
    fg_prob = alpha_0 * np.exp(-alpha * (fg_dist**2)) + eta
    fg_prob[im == 1] = 0
    fg_flip = np.random.binomial(1, fg_prob)

    logger.debug(u'Calculating background distance transform')
    bg_dist = distance_transform_cdt(im, metric='taxicab')
    logger.debug(u'Calculating flip to black probability')
    bg_prob = beta_0 * np.exp(-beta * (bg_dist**2)) + eta
    bg_prob[im == 0] = 0
    bg_flip = np.random.binomial(1, bg_prob)
github mittagessen / kraken / kraken / binarization.py View on Github external
Returns:
        PIL.Image containing the binarized image

    Raises:
        KrakenInputException when trying to binarize an empty image.
    """
    im_str = get_im_str(im)
    logger.info('Binarizing {}'.format(im_str))
    if is_bitonal(im):
        logger.info('Skipping binarization because {} is bitonal.'.format(im_str))
        return im
    # convert to grayscale first
    logger.debug('Converting {} to grayscale'.format(im_str))
    im = im.convert('L')
    raw = pil2array(im)
    logger.debug('Scaling and normalizing')
    # rescale image to between -1 or 0 and 1
    raw = raw/np.float(np.iinfo(raw.dtype).max)
    # perform image normalization
    if np.amax(raw) == np.amin(raw):
        logger.warning('Trying to binarize empty image {}'.format(im_str))
        raise KrakenInputException('Image is empty')
    image = raw-np.amin(raw)
    image /= np.amax(image)

    logger.debug('Interpolation and percentile filtering')
    with warnings.catch_warnings():
        warnings.simplefilter('ignore', UserWarning)
        m = interpolation.zoom(image, zoom)
        m = filters.percentile_filter(m, perc, size=(range, 2))
        m = filters.percentile_filter(m, perc, size=(2, range))
github mittagessen / kraken / kraken / lib / lineest.py View on Github external
def dewarp(normalizer: CenterNormalizer, im: Image.Image) -> Image.Image:
    """
    Dewarps an image of a line using a kraken.lib.lineest.CenterNormalizer
    instance.

    Args:
        normalizer (kraken.lib.lineest.CenterNormalizer): A line normalizer
                                                          instance
        im (PIL.Image.Image): Image to dewarp

    Returns:
        PIL.Image containing the dewarped image.
    """
    line = pil2array(im)
    temp = np.amax(line)-line
    temp = temp*1.0/np.amax(temp)
    normalizer.measure(temp)
    line = normalizer.normalize(line, cval=np.amax(line))
    return array2pil(line)
github mittagessen / kraken / kraken / pageseg.py View on Github external
angle = 0
        offset = (0, 0)
    elif text_direction == 'vertical-lr':
        angle = 270
        offset = (0, im.size[1])
    elif text_direction == 'vertical-rl':
        angle = 90
        offset = (im.size[0], 0)
    else:
        logger.error('Invalid text direction \'{}\''.format(text_direction))
        raise KrakenInputException('Invalid text direction {}'.format(text_direction))

    logger.debug('Rotating input image by {} degrees'.format(angle))
    im = im.rotate(angle, expand=True)

    a = pil2array(im)
    binary = np.array(a > 0.5*(np.amin(a) + np.amax(a)), 'i')
    binary = 1 - binary

    if not scale:
        scale = estimate_scale(binary)

    if no_hlines:
        binary = remove_hlines(binary, scale)
    # emptyish images wll cause exceptions here.

    try:
        if mask:
            if mask.mode != '1' and not is_bitonal(mask):
                logger.error('Mask is not bitonal')
                raise KrakenInputException('Mask is not bitonal')
            mask = mask.convert('1')
github mittagessen / kraken / kraken / linegen.py View on Github external
dsigma (float):
        eps (float):
        delta (float):
        degradations (list): list returning 4-tuples corresponding to
                             the degradations argument of ocropus-linegen.

    Returns:
        PIL.Image in mode 'L'
    """
    w, h = im.size
    # XXX: determine correct output shape from transformation matrices instead
    # of guesstimating.
    logger.debug(u'Pasting source image into canvas')
    image = Image.new('L', (int(1.5*w), 4*h), 255)
    image.paste(im, (int((image.size[0] - w) / 2), int((image.size[1] - h) / 2)))
    a = pil2array(image.convert('L'))
    logger.debug(u'Selecting degradations')
    (sigma, ssigma, threshold, sthreshold) = degradations[np.random.choice(len(degradations))]
    sigma += (2 * np.random.rand() - 1) * ssigma
    threshold += (2 * np.random.rand() - 1) * sthreshold
    a = a * 1.0 / np.amax(a)
    if sigma > 0.0:
        logger.debug(u'Apply Gaussian filter')
        a = gaussian_filter(a, sigma)
    logger.debug(u'Adding noise')
    a += np.clip(np.random.randn(*a.shape) * 0.2, -0.25, 0.25)
    logger.debug(u'Perform affine transformation and resize')
    m = np.array([[1 + eps * np.random.randn(), 0.0], [eps * np.random.randn(), 1.0 + eps * np.random.randn()]])
    w, h = a.shape
    c = np.array([w / 2.0, h / 2])
    d = c - np.dot(m, c) + np.array([np.random.randn() * delta, np.random.randn() * delta])
    a = affine_transform(a, m, offset=d, order=1, mode='constant', cval=a[0, 0])
github mittagessen / kraken / kraken / pageseg.py View on Github external
if no_hlines:
        binary = remove_hlines(binary, scale)
    # emptyish images wll cause exceptions here.

    try:
        if mask:
            if mask.mode != '1' and not is_bitonal(mask):
                logger.error('Mask is not bitonal')
                raise KrakenInputException('Mask is not bitonal')
            mask = mask.convert('1')
            if mask.size != im.size:
                logger.error('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
                raise KrakenInputException('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
            logger.info('Masking enabled in segmenter. Disabling column detection.')
            mask = mask.rotate(angle, expand=True)
            colseps = pil2array(mask)
        elif black_colseps:
            colseps, binary = compute_black_colseps(binary, scale, maxcolseps)
        else:
            colseps = compute_white_colseps(binary, scale, maxcolseps)
    except ValueError:
        logger.warning('Exception in column finder (probably empty image) for {}.'.format(im_str))
        return {'text_direction': text_direction, 'boxes':  []}

    bottom, top, boxmap = compute_gradmaps(binary, scale)
    seeds = compute_line_seeds(binary, bottom, top, colseps, scale)
    llabels = morph.propagate_labels(boxmap, seeds, conflict=0)
    spread = morph.spread_labels(seeds, maxdist=scale)
    llabels = np.where(llabels > 0, llabels, spread*binary)
    segmentation = llabels*binary

    lines = compute_lines(segmentation, scale)