Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _read_from_img(self, image_path, lang=None):
boxes = tesseract.image_to_string(
Image.open(image_path),
lang=lang,
builder=self._builder
)
boxes.sort()
return boxes
other_files = 0
for f in os.listdir(path): #Return list of files in path directory
ext = os.path.splitext(f)[1] #Split the pathname path into a pair i.e take .png/ .jpg etc
if ext.lower() not in VALIDITY: #Convert to lowercase and check in validity list
other_files += 1 #Increment if other than validity extension found
continue
else:
count += 1
image_file_name = path + '/' + f #Full /dir/path/filename.extension
txt = tool.image_to_string(
Im.open(image_file_name), lang=self.lang,
builder=pyocr.builders.TextBuilder()
)
#txt = txt.split()[:5]
initial = txt.replace('\a', ' ').replace('\b', ' ').replace('\f', ' ').replace('\n',' ').replace('\r', '').replace('\t',' ').replace('\v',' ') #.replace(' ','_') #.replace('.','_') #Replace \n and \t with space
initial = initial[:60] #Take 1st 100 words
print('Filename:' + initial + '\n')
os.chmod(path, 0o777)
self.savefile(initial, txt, directory_path)
print(str(count) + (" file" if count == 1 else " files") + " processed")
if count + other_files == 0:
print("No files found") #No files found
for f in os.listdir(path): #Return list of files in path directory
ext = os.path.splitext(f)[1] #Split the pathname path into a pair i.e take .png/ .jpg etc
if ext.lower() not in VALIDITY: #Convert to lowercase and check in validity list
other_files += 1 #Increment if other than validity extension found
#sys.stdout.write("Extension other than image is not supported. \n")
continue
else:
count += 1
image_file_name = path + '/' + f #Full /dir/path/filename.extension
txt = tool.image_to_string(
Im.open(image_file_name), lang=self.lang,
builder=pyocr.builders.TextBuilder()
)
initial = txt.replace('\a', ' ').replace('\b', ' ').replace('\f', ' ').replace('\n',' ').replace('\r', '').replace('\t',' ').replace('\v',' ') #.replace(' ','_') #.replace('.','_') #Replace \n and \t with space
initial = initial[:60] #Take 1st 100 words
print('Filename:' + initial + '\n')
os.chmod(path, 0o777)
os.rename(image_file_name, path + '/' + initial + ext)
print(str(count) + (" file" if count == 1 else " files") + " processed")
if count + other_files == 0:
print("No files found") #No files found
else :
def main(self, text_img_name):
txt = tool.image_to_string(
Im.open(text_img_name), lang=self.lang,
builder=pyocr.builders.TextBuilder()
)
return txt