How to use the pyocr.tesseract.image_to_string function in pyocr

To help you get started, we’ve selected a few pyocr examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openpaperwork / pyocr / tests / tests_base.py View on Github external
def _read_from_img(self, image_path, lang=None):
        boxes = tesseract.image_to_string(
            Image.open(image_path),
            lang=lang,
            builder=self._builder
        )
        boxes.sort()

        return boxes
github aryaminus / memento / mementor / ocr_save.py View on Github external
other_files = 0

        for f in os.listdir(path): #Return list of files in path directory

            ext = os.path.splitext(f)[1] #Split the pathname path into a pair i.e take .png/ .jpg etc

            if ext.lower() not in VALIDITY: #Convert to lowercase and check in validity list          
                other_files += 1 #Increment if other than validity extension found
                continue

            else:

                count += 1
                image_file_name = path + '/' + f #Full /dir/path/filename.extension

                txt = tool.image_to_string(
                    Im.open(image_file_name), lang=self.lang,
                    builder=pyocr.builders.TextBuilder()
                )
                
                #txt = txt.split()[:5]
                initial = txt.replace('\a', ' ').replace('\b', ' ').replace('\f', ' ').replace('\n',' ').replace('\r', '').replace('\t',' ').replace('\v',' ') #.replace(' ','_') #.replace('.','_') #Replace \n and \t with space
                initial = initial[:60] #Take 1st 100 words
                print('Filename:' + initial + '\n')

                os.chmod(path, 0o777)
                self.savefile(initial, txt, directory_path)

                print(str(count) + (" file" if count == 1 else " files") + " processed")

        if count + other_files == 0:
            print("No files found") #No files found
github aryaminus / memento / mementor / ocr_rename.py View on Github external
for f in os.listdir(path): #Return list of files in path directory

            ext = os.path.splitext(f)[1] #Split the pathname path into a pair i.e take .png/ .jpg etc

            if ext.lower() not in VALIDITY: #Convert to lowercase and check in validity list          
                other_files += 1 #Increment if other than validity extension found
                #sys.stdout.write("Extension other than image is not supported. \n")
                continue

            else:

                count += 1

                image_file_name = path + '/' + f #Full /dir/path/filename.extension

                txt = tool.image_to_string(
                    Im.open(image_file_name), lang=self.lang,
                    builder=pyocr.builders.TextBuilder()
                )
                
                initial = txt.replace('\a', ' ').replace('\b', ' ').replace('\f', ' ').replace('\n',' ').replace('\r', '').replace('\t',' ').replace('\v',' ') #.replace(' ','_') #.replace('.','_') #Replace \n and \t with space
                initial = initial[:60] #Take 1st 100 words
                print('Filename:' + initial + '\n')

                os.chmod(path, 0o777)
                os.rename(image_file_name, path + '/' + initial + ext)

                print(str(count) + (" file" if count == 1 else " files") + " processed")

        if count + other_files == 0:
            print("No files found") #No files found
        else :
github aryaminus / memento / mementor / image_ocr.py View on Github external
def main(self, text_img_name):
        
        txt = tool.image_to_string(
            Im.open(text_img_name), lang=self.lang,
            builder=pyocr.builders.TextBuilder()
        )

        return txt