How to use the kraken.lib.util.get_im_str function in kraken

To help you get started, we’ve selected a few kraken examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mittagessen / kraken / kraken / binarization.py View on Github external
threshold (float):
        zoom (float): Zoom for background page estimation
        escale (float): Scale for estimating a mask over the text region
        border (float): Ignore this much of the border
        perc (int): Percentage for filters
        range (int): Range for filters
        low (int): Percentile for black estimation
        high (int): Percentile for white estimation

    Returns:
        PIL.Image containing the binarized image

    Raises:
        KrakenInputException when trying to binarize an empty image.
    """
    im_str = get_im_str(im)
    logger.info('Binarizing {}'.format(im_str))
    if is_bitonal(im):
        logger.info('Skipping binarization because {} is bitonal.'.format(im_str))
        return im
    # convert to grayscale first
    logger.debug('Converting {} to grayscale'.format(im_str))
    im = im.convert('L')
    raw = pil2array(im)
    logger.debug('Scaling and normalizing')
    # rescale image to between -1 or 0 and 1
    raw = raw/np.float(np.iinfo(raw.dtype).max)
    # perform image normalization
    if np.amax(raw) == np.amin(raw):
        logger.warning('Trying to binarize empty image {}'.format(im_str))
        raise KrakenInputException('Image is empty')
    image = raw-np.amin(raw)
github mittagessen / kraken / kraken / pageseg.py View on Github external
2-tuple, uses (padding_left, padding_right).
        mask (PIL.Image): A bi-level mask image of the same size as `im` where
                          0-valued regions are ignored for segmentation
                          purposes. Disables column detection.
        skip_order (bool): Skips reading order determination of lines.

    Returns:
        {'text_direction': '$dir', 'boxes': [(x1, y1, x2, y2),...]}: A
        dictionary containing the text direction and a list of reading order
        sorted bounding boxes under the key 'boxes'.

    Raises:
        KrakenInputException if the input image is not binarized or the text
        direction is invalid.
    """
    im_str = get_im_str(im)
    logger.info('Segmenting {}'.format(im_str))

    if im.mode != '1' and not is_bitonal(im):
        logger.error('Image {} is not bi-level'.format(im_str))
        raise KrakenInputException('Image {} is not bi-level'.format(im_str))

    # rotate input image for vertical lines
    if text_direction.startswith('horizontal'):
        angle = 0
        offset = (0, 0)
    elif text_direction == 'vertical-lr':
        angle = 270
        offset = (0, im.size[1])
    elif text_direction == 'vertical-rl':
        angle = 90
        offset = (im.size[0], 0)
github mittagessen / kraken / kraken / pageseg.py View on Github external
'horizontal-lr/rl/vertical-lr/rl'.
        model (str): Location of the script classification model or None for default.
        valid_scripts (list): List of valid scripts.

    Returns:
        {'script_detection': True, 'text_direction': '$dir', 'boxes':
        [[(script, (x1, y1, x2, y2)),...]]}: A dictionary containing the text
        direction and a list of lists of reading order sorted bounding boxes
        under the key 'boxes' with each list containing the script segmentation
        of a single line. Script is a ISO15924 4 character identifier.

    Raises:
        KrakenInvalidModelException if no clstm module is available.
    """
    raise NotImplementedError('Temporarily unavailable. Please open a github ticket if you want this fixed sooner.')
    im_str = get_im_str(im)
    logger.info(u'Detecting scripts with {} in {} lines on {}'.format(model, len(bounds['boxes']), im_str))
    logger.debug(u'Loading detection model {}'.format(model))
    rnn = models.load_any(model)
    # load numerical to 4 char identifier map
    logger.debug(u'Loading label to identifier map')
    with pkg_resources.resource_stream(__name__, 'iso15924.json') as fp:
        n2s = json.load(fp)
    # convert allowed scripts to labels
    val_scripts = []
    if valid_scripts:
        logger.debug(u'Converting allowed scripts list {}'.format(valid_scripts))
        for k, v in n2s.items():
            if v in valid_scripts:
                val_scripts.append(chr(int(k) + 0xF0000))
    else:
        valid_scripts = []
github mittagessen / kraken / kraken / transcribe.py View on Github external
def add_page(self, im, segmentation=None, records=None):
        """
        Adds an image to the transcription interface, optionally filling in
        information from a list of ocr_record objects.

        Args:
            im (PIL.Image): Input image
            segmentation (dict): Output of the segment method.
            records (list): A list of ocr_record objects.
        """
        im_str = get_im_str(im)
        logger.info(u'Adding page {} with {} lines'.format(im_str, len(segmentation) if segmentation else len(records)))
        page = {}
        fd = BytesIO()
        im.save(fd, format='png', optimize=True)
        page['index'] = self.page_idx
        self.page_idx += 1
        logger.debug(u'Base64 encoding image')
        page['img'] = 'data:image/png;base64,' + base64.b64encode(fd.getvalue()).decode('ascii')
        page['lines'] = []
        if records:
            logger.debug(u'Adding records.')
            self.text_direction = segmentation['text_direction']
            for record, bbox in zip(records, segmentation['boxes']):
                page['lines'].append({'index': self.line_idx, 'text': record.prediction,
                                      'left': 100*int(bbox[0]) / im.size[0],
                                      'top': 100*int(bbox[1]) / im.size[1],
github mittagessen / kraken / kraken / blla.py View on Github external
{'text_direction': '$dir',
         'type': 'baseline',
         'lines': [
            {'baseline': [[x0, y0], [x1, y1], ..., [x_n, y_n]], 'boundary': [[x0, y0, x1, y1], ... [x_m, y_m]]},
            {'baseline': [[x0, ...]], 'boundary': [[x0, ...]]}
          ]
        }: A dictionary containing the text direction and under the key 'lines'
        a list of reading order sorted baselines (polylines) and their
        respective polygonal boundaries. The last and first point of each
        boundary polygon is connected.

    Raises:
        KrakenInputException if the input image is not binarized or the text
        direction is invalid.
    """
    im_str = get_im_str(im)
    logger.info('Segmenting {}'.format(im_str))

    model = vgsl.TorchVGSLModel.load_model(model)
    model.eval()
    if mask:
        if mask.mode != '1' and not is_bitonal(mask):
            logger.error('Mask is not bitonal')
            raise KrakenInputException('Mask is not bitonal')
        mask = mask.convert('1')
        if mask.size != im.size:
            logger.error('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
            raise KrakenInputException('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
        logger.info('Masking enabled in segmenter.')
        mask = pil2array(mask)

    batch, channels, height, width = model.input
github mittagessen / kraken / kraken / rpred.py View on Github external
im (PIL.Image.Image): Image to extract text from
        bounds (dict): A dictionary containing a 'boxes' entry with a list of
                       coordinates (x0, y0, x1, y1) of a text line in the image
                       and an entry 'text_direction' containing
                       'horizontal-lr/rl/vertical-lr/rl'.
        pad (int): Extra blank padding to the left and right of text line.
                   Auto-disabled when expected network inputs are incompatible
                   with padding.
        bidi_reordering (bool): Reorder classes in the ocr_record according to
                                the Unicode bidirectional algorithm for correct
                                display.
    Yields:
        An ocr_record containing the recognized text, absolute character
        positions, and confidence values for each character.
    """
    im_str = get_im_str(im)
    logger.info('Running recognizer on {} with {} lines'.format(im_str, len(bounds['boxes'])))
    logger.debug('Loading line transform')
    batch, channels, height, width = network.nn.input
    ts = generate_input_transforms(batch, height, width, channels, pad)

    for box, coords in extract_boxes(im, bounds):
        # check if boxes are non-zero in any dimension
        if sum(coords[::2]) == 0 or coords[3] - coords[1] == 0:
            logger.warning('bbox {} with zero dimension. Emitting empty record.'.format(coords))
            yield ocr_record('', [], [])
            continue
        # try conversion into tensor
        try:
            line = ts(box)
        except Exception:
            yield ocr_record('', [], [])
github mittagessen / kraken / kraken / rpred.py View on Github external
'text_direction' containing
                        'horizontal-lr/rl/vertical-lr/rl'.
        pad (int): Extra blank padding to the left and right of text line
        bidi_reordering (bool): Reorder classes in the ocr_record according to
                                the Unicode bidirectional algorithm for correct
                                display.
        script_ignore (list): List of scripts to ignore during recognition
    Yields:
        An ocr_record containing the recognized text, absolute character
        positions, and confidence values for each character.

    Raises:
        KrakenInputException if the mapping between segmentation scripts and
        networks is incomplete.
    """
    im_str = get_im_str(im)
    logger.info('Running {} multi-script recognizers on {} with {} lines'.format(len(nets), im_str, len(bounds['boxes'])))

    miss = [x[0] for x in bounds['boxes'] if not nets.get(x[0])]
    if miss:
        raise KrakenInputException('Missing models for scripts {}'.format(miss))

    # build dictionary for line preprocessing
    ts = {}
    for script, network in nets.items():
        logger.debug('Loading line transforms for {}'.format(script))
        batch, channels, height, width = network.nn.input
        ts[script] = generate_input_transforms(batch, height, width, channels, pad)

    for line in bounds['boxes']:
        rec = ocr_record('', [], [])
        for script, (box, coords) in zip(map(lambda x: x[0], line),