Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
threshold (float):
zoom (float): Zoom for background page estimation
escale (float): Scale for estimating a mask over the text region
border (float): Ignore this much of the border
perc (int): Percentage for filters
range (int): Range for filters
low (int): Percentile for black estimation
high (int): Percentile for white estimation
Returns:
PIL.Image containing the binarized image
Raises:
KrakenInputException when trying to binarize an empty image.
"""
im_str = get_im_str(im)
logger.info('Binarizing {}'.format(im_str))
if is_bitonal(im):
logger.info('Skipping binarization because {} is bitonal.'.format(im_str))
return im
# convert to grayscale first
logger.debug('Converting {} to grayscale'.format(im_str))
im = im.convert('L')
raw = pil2array(im)
logger.debug('Scaling and normalizing')
# rescale image to between -1 or 0 and 1
raw = raw/np.float(np.iinfo(raw.dtype).max)
# perform image normalization
if np.amax(raw) == np.amin(raw):
logger.warning('Trying to binarize empty image {}'.format(im_str))
raise KrakenInputException('Image is empty')
image = raw-np.amin(raw)
2-tuple, uses (padding_left, padding_right).
mask (PIL.Image): A bi-level mask image of the same size as `im` where
0-valued regions are ignored for segmentation
purposes. Disables column detection.
skip_order (bool): Skips reading order determination of lines.
Returns:
{'text_direction': '$dir', 'boxes': [(x1, y1, x2, y2),...]}: A
dictionary containing the text direction and a list of reading order
sorted bounding boxes under the key 'boxes'.
Raises:
KrakenInputException if the input image is not binarized or the text
direction is invalid.
"""
im_str = get_im_str(im)
logger.info('Segmenting {}'.format(im_str))
if im.mode != '1' and not is_bitonal(im):
logger.error('Image {} is not bi-level'.format(im_str))
raise KrakenInputException('Image {} is not bi-level'.format(im_str))
# rotate input image for vertical lines
if text_direction.startswith('horizontal'):
angle = 0
offset = (0, 0)
elif text_direction == 'vertical-lr':
angle = 270
offset = (0, im.size[1])
elif text_direction == 'vertical-rl':
angle = 90
offset = (im.size[0], 0)
'horizontal-lr/rl/vertical-lr/rl'.
model (str): Location of the script classification model or None for default.
valid_scripts (list): List of valid scripts.
Returns:
{'script_detection': True, 'text_direction': '$dir', 'boxes':
[[(script, (x1, y1, x2, y2)),...]]}: A dictionary containing the text
direction and a list of lists of reading order sorted bounding boxes
under the key 'boxes' with each list containing the script segmentation
of a single line. Script is a ISO15924 4 character identifier.
Raises:
KrakenInvalidModelException if no clstm module is available.
"""
raise NotImplementedError('Temporarily unavailable. Please open a github ticket if you want this fixed sooner.')
im_str = get_im_str(im)
logger.info(u'Detecting scripts with {} in {} lines on {}'.format(model, len(bounds['boxes']), im_str))
logger.debug(u'Loading detection model {}'.format(model))
rnn = models.load_any(model)
# load numerical to 4 char identifier map
logger.debug(u'Loading label to identifier map')
with pkg_resources.resource_stream(__name__, 'iso15924.json') as fp:
n2s = json.load(fp)
# convert allowed scripts to labels
val_scripts = []
if valid_scripts:
logger.debug(u'Converting allowed scripts list {}'.format(valid_scripts))
for k, v in n2s.items():
if v in valid_scripts:
val_scripts.append(chr(int(k) + 0xF0000))
else:
valid_scripts = []
def add_page(self, im, segmentation=None, records=None):
"""
Adds an image to the transcription interface, optionally filling in
information from a list of ocr_record objects.
Args:
im (PIL.Image): Input image
segmentation (dict): Output of the segment method.
records (list): A list of ocr_record objects.
"""
im_str = get_im_str(im)
logger.info(u'Adding page {} with {} lines'.format(im_str, len(segmentation) if segmentation else len(records)))
page = {}
fd = BytesIO()
im.save(fd, format='png', optimize=True)
page['index'] = self.page_idx
self.page_idx += 1
logger.debug(u'Base64 encoding image')
page['img'] = 'data:image/png;base64,' + base64.b64encode(fd.getvalue()).decode('ascii')
page['lines'] = []
if records:
logger.debug(u'Adding records.')
self.text_direction = segmentation['text_direction']
for record, bbox in zip(records, segmentation['boxes']):
page['lines'].append({'index': self.line_idx, 'text': record.prediction,
'left': 100*int(bbox[0]) / im.size[0],
'top': 100*int(bbox[1]) / im.size[1],
{'text_direction': '$dir',
'type': 'baseline',
'lines': [
{'baseline': [[x0, y0], [x1, y1], ..., [x_n, y_n]], 'boundary': [[x0, y0, x1, y1], ... [x_m, y_m]]},
{'baseline': [[x0, ...]], 'boundary': [[x0, ...]]}
]
}: A dictionary containing the text direction and under the key 'lines'
a list of reading order sorted baselines (polylines) and their
respective polygonal boundaries. The last and first point of each
boundary polygon is connected.
Raises:
KrakenInputException if the input image is not binarized or the text
direction is invalid.
"""
im_str = get_im_str(im)
logger.info('Segmenting {}'.format(im_str))
model = vgsl.TorchVGSLModel.load_model(model)
model.eval()
if mask:
if mask.mode != '1' and not is_bitonal(mask):
logger.error('Mask is not bitonal')
raise KrakenInputException('Mask is not bitonal')
mask = mask.convert('1')
if mask.size != im.size:
logger.error('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
raise KrakenInputException('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
logger.info('Masking enabled in segmenter.')
mask = pil2array(mask)
batch, channels, height, width = model.input
im (PIL.Image.Image): Image to extract text from
bounds (dict): A dictionary containing a 'boxes' entry with a list of
coordinates (x0, y0, x1, y1) of a text line in the image
and an entry 'text_direction' containing
'horizontal-lr/rl/vertical-lr/rl'.
pad (int): Extra blank padding to the left and right of text line.
Auto-disabled when expected network inputs are incompatible
with padding.
bidi_reordering (bool): Reorder classes in the ocr_record according to
the Unicode bidirectional algorithm for correct
display.
Yields:
An ocr_record containing the recognized text, absolute character
positions, and confidence values for each character.
"""
im_str = get_im_str(im)
logger.info('Running recognizer on {} with {} lines'.format(im_str, len(bounds['boxes'])))
logger.debug('Loading line transform')
batch, channels, height, width = network.nn.input
ts = generate_input_transforms(batch, height, width, channels, pad)
for box, coords in extract_boxes(im, bounds):
# check if boxes are non-zero in any dimension
if sum(coords[::2]) == 0 or coords[3] - coords[1] == 0:
logger.warning('bbox {} with zero dimension. Emitting empty record.'.format(coords))
yield ocr_record('', [], [])
continue
# try conversion into tensor
try:
line = ts(box)
except Exception:
yield ocr_record('', [], [])
'text_direction' containing
'horizontal-lr/rl/vertical-lr/rl'.
pad (int): Extra blank padding to the left and right of text line
bidi_reordering (bool): Reorder classes in the ocr_record according to
the Unicode bidirectional algorithm for correct
display.
script_ignore (list): List of scripts to ignore during recognition
Yields:
An ocr_record containing the recognized text, absolute character
positions, and confidence values for each character.
Raises:
KrakenInputException if the mapping between segmentation scripts and
networks is incomplete.
"""
im_str = get_im_str(im)
logger.info('Running {} multi-script recognizers on {} with {} lines'.format(len(nets), im_str, len(bounds['boxes'])))
miss = [x[0] for x in bounds['boxes'] if not nets.get(x[0])]
if miss:
raise KrakenInputException('Missing models for scripts {}'.format(miss))
# build dictionary for line preprocessing
ts = {}
for script, network in nets.items():
logger.debug('Loading line transforms for {}'.format(script))
batch, channels, height, width = network.nn.input
ts[script] = generate_input_transforms(batch, height, width, channels, pad)
for line in bounds['boxes']:
rec = ocr_record('', [], [])
for script, (box, coords) in zip(map(lambda x: x[0], line),