Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def clean_input(text):
# first clean out symbols
text = re.sub(r'[^\w]', ' ', text)
# then tokenize
text = text.split()
# then correct all spellings
text = map(spell, text)
text = " ".join(text)
return text
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "scraper/{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
# load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open(filename))
os.remove(filename)
text = text.strip().split()
chars_to_remove = ['.', '!', ':']
extracted = list()
for t in text:
for c in chars_to_remove:
t = t.replace(c, '')
extracted.append(spell(t))
return(' '.join(extracted))
except BaseException as e:
print(e)
def correct(self, phrase):
words = phrase.split()
correct = [spell(word).upper() for word in words]
return ' '.join(correct)
def spell_check(input_question):
pattern = "\w"
prog = re.compile(pattern)
input_question_word_list = input_question.split()
en_dict = enchant.Dict("en_US")
for word_index in input_question_word_list:
if (not en_dict.check(input_question_word_list[word_index]) and
prog.match(input_question_word_list[word_index]) is None):
correct_word = spell(input_question_word_list[word_index])
input_question_word_list[word_index] = correct_word
return " ".join(input_question_word_list)
from flask import Flask, request, redirect, url_for
from flask import send_from_directory
from werkzeug.utils import secure_filename
import autocorrect
import receiptparser
# Store pics temporarily on api server
OCR_SCRIPT = './ocr.sh'
UPLOAD_FOLDER = 'uploads/'
STATIC_FOLDER = '../web-client/'
ALLOWED_EXTENSIONS = set(['png','jpg','jpeg','gif'])
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
autocorrect.init('wordlist.txt')
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS
def optical_character_recognition(imagepath):
""" Does OCR on an image and returns tuple:
(raw text, autocorrected text, parsed receipt data) """
# Process image with ImageMagick
tempimagepath = os.path.join(app.config['UPLOAD_FOLDER'], 'temp.png')
im_proc = subprocess.Popen(['convert',imagepath,'-resize','600x800',
'-blur','2','-lat','8x8-2%',tempimagepath],
stdout=subprocess.PIPE)
def _replaces(self):
"""tge"""
return {concat(a, c, b[1:])
for a, b in self.slices[:-1]
for c in ALPHABET}
def _deletes(self):
"""th"""
return {concat(a, b[1:])
for a, b in self.slices[:-1]}
def _inserts(self):
"""thwe"""
return {concat(a, c, b)
for a, b in self.slices
for c in ALPHABET}
def _transposes(self):
"""teh"""
return {concat(a, reversed(b[:2]), b[2:])
for a, b in self.slices[:-2]}
def performSpellCorrection(featureObj):
checker = SpellChecker("en_US", featureObj.getText())
for word in checker:
word.replace(spell(word.word))
featureObj.getLexicalFeatures().setSpellCorrection(checker.get_text())
return featureObj