How to use the kraken.lib.morph function in kraken

To help you get started, we’ve selected a few kraken examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mittagessen / kraken / kraken / pageseg.py View on Github external
Separators
    """
    logger.debug('Finding column separators')
    # find vertical whitespace by thresholding
    smoothed = gaussian_filter(1.0*binary, (scale, scale*0.5))
    smoothed = uniform_filter(smoothed, (5.0*scale, 1))
    thresh = (smoothed < np.amax(smoothed)*0.1)
    # find column edges by filtering
    grad = gaussian_filter(1.0*binary, (scale, scale*0.5), order=(0, 1))
    grad = uniform_filter(grad, (10.0*scale, 1))
    grad = (grad > 0.5*np.amax(grad))
    # combine edges and whitespace
    seps = np.minimum(thresh, maximum_filter(grad, (int(scale), int(5*scale))))
    seps = maximum_filter(seps, (int(2*scale), 1))
    # select only the biggest column separators
    seps = morph.select_regions(seps, sl.dim0, min=minheight*scale,
                                nbest=maxcolseps)
    return seps
github mittagessen / kraken / kraken / pageseg.py View on Github external
def remove_hlines(binary: np.array, scale: float, maxsize: int = 10) -> np.array:
    """
    Removes horizontal black lines that only interfere with page segmentation.

        Args:
            binary (numpy.array):
            scale (float):
            maxsize (int): maximum size of removed lines

        Returns:
            numpy.array containing the filtered image.

    """
    logger.debug('Filtering horizontal lines')
    labels, _ = morph.label(binary)
    objects = morph.find_objects(labels)
    for i, b in enumerate(objects):
        if sl.width(b) > maxsize*scale:
            labels[b][labels[b] == i+1] = 0
    return np.array(labels != 0, 'B')
github mittagessen / kraken / kraken / pageseg.py View on Github external
def binary_objects(binary: np.array) -> np.array:
    """
    Labels features in an array and segments them into objects.
    """
    labels, _ = morph.label(binary)
    objects = morph.find_objects(labels)
    return objects
github mittagessen / kraken / kraken / pageseg.py View on Github external
def remove_hlines(binary: np.array, scale: float, maxsize: int = 10) -> np.array:
    """
    Removes horizontal black lines that only interfere with page segmentation.

        Args:
            binary (numpy.array):
            scale (float):
            maxsize (int): maximum size of removed lines

        Returns:
            numpy.array containing the filtered image.

    """
    logger.debug('Filtering horizontal lines')
    labels, _ = morph.label(binary)
    objects = morph.find_objects(labels)
    for i, b in enumerate(objects):
        if sl.width(b) > maxsize*scale:
            labels[b][labels[b] == i+1] = 0
    return np.array(labels != 0, 'B')
github mittagessen / kraken / kraken / pageseg.py View on Github external
delta = max(3, int(scale/2))
    for x in range(bmarked.shape[1]):
        transitions = sorted([(y, 1) for y in find(bmarked[:, x])] +
                             [(y, 0) for y in find(tmarked[:, x])])[::-1]
        transitions += [(0, 0)]
        for l in range(len(transitions)-1):
            y0, s0 = transitions[l]
            if s0 == 0:
                continue
            seeds[y0-delta:y0, x] = 1
            y1, s1 = transitions[l+1]
            if s1 == 0 and (y0-y1) < 5*scale:
                seeds[y1:y0, x] = 1
    seeds = maximum_filter(seeds, (1, int(1+scale)))
    seeds = seeds * (1-colseps)
    seeds, _ = morph.label(seeds)
    return seeds
github mittagessen / kraken / kraken / pageseg.py View on Github external
def compute_separators_morph(binary: np.array, scale: float,
                             sepwiden: int = 10, maxcolseps: int = 2) -> np.array:
    """Finds vertical black lines corresponding to column separators."""
    logger.debug('Finding vertical black column lines')
    d0 = int(max(5, scale/4))
    d1 = int(max(5, scale)) + sepwiden
    thick = morph.r_dilation(binary, (d0, d1))
    vert = morph.rb_opening(thick, (10*scale, 1))
    vert = morph.r_erosion(vert, (d0//2, sepwiden))
    vert = morph.select_regions(vert, sl.dim1, min=3, nbest=2*maxcolseps)
    vert = morph.select_regions(vert, sl.dim0, min=20*scale, nbest=maxcolseps)
    return vert
github mittagessen / kraken / kraken / pageseg.py View on Github external
def compute_separators_morph(binary: np.array, scale: float,
                             sepwiden: int = 10, maxcolseps: int = 2) -> np.array:
    """Finds vertical black lines corresponding to column separators."""
    logger.debug('Finding vertical black column lines')
    d0 = int(max(5, scale/4))
    d1 = int(max(5, scale)) + sepwiden
    thick = morph.r_dilation(binary, (d0, d1))
    vert = morph.rb_opening(thick, (10*scale, 1))
    vert = morph.r_erosion(vert, (d0//2, sepwiden))
    vert = morph.select_regions(vert, sl.dim1, min=3, nbest=2*maxcolseps)
    vert = morph.select_regions(vert, sl.dim0, min=20*scale, nbest=maxcolseps)
    return vert
github mittagessen / kraken / kraken / pageseg.py View on Github external
def compute_separators_morph(binary: np.array, scale: float,
                             sepwiden: int = 10, maxcolseps: int = 2) -> np.array:
    """Finds vertical black lines corresponding to column separators."""
    logger.debug('Finding vertical black column lines')
    d0 = int(max(5, scale/4))
    d1 = int(max(5, scale)) + sepwiden
    thick = morph.r_dilation(binary, (d0, d1))
    vert = morph.rb_opening(thick, (10*scale, 1))
    vert = morph.r_erosion(vert, (d0//2, sepwiden))
    vert = morph.select_regions(vert, sl.dim1, min=3, nbest=2*maxcolseps)
    vert = morph.select_regions(vert, sl.dim0, min=20*scale, nbest=maxcolseps)
    return vert