Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
error = None)
return self._results[path]
except google.api_core.exceptions.PermissionDenied as ex:
text = 'Authentication failure for Google service -- {}'.format(ex)
raise AuthenticationFailure(text)
except KeyboardInterrupt as ex:
raise
except Exception as ex:
if isinstance(ex, KeyError):
# Can happen if you control-C in the middle of the Google call.
# Result is "Exception ignored in: 'grpc._cython.cygrpc._next'"
# printed to the terminal and we end up here.
raise KeyboardInterrupt
else:
text = 'Error: failed to convert "{}": {}'.format(path, ex)
return TRResult(path = path, data = {}, boxes = [],
text = '', error = text)
full_text = ''
if 'recognitionResult' in analysis:
lines = analysis['recognitionResult']['lines']
sorted_lines = sorted(lines, key = lambda x: (x['boundingBox'][1], x['boundingBox'][0]))
full_text = '\n'.join(x['text'] for x in sorted_lines)
# Create our particular box structure for annotations. The Microsoft
# structure is like this: data['recognitionResult']['lines'] contains
# a list of dict with keys 'words', 'boundingBox', and 'text'.
boxes = []
for chunk in lines:
boxes.append(TextBox(boundingBox = chunk['boundingBox'], text = chunk['text']))
# Put it all together.
self._results[path] = TRResult(path = path, data = analysis,
text = full_text, boxes = boxes,
error = None)
return self._results[path]
else:
if __debug__: log('No operation-location in response headers')
raise ServiceFailure('Unexpected response from Microsoft server')
if __debug__: log('Polling MS for results ...')
analysis = {}
poll = True
while poll:
# I never have seen results returned in 1 second, and meanwhile
# the repeated polling counts against your rate limit. So, wait
# for 2 s to reduce the number of calls.
sleep(2)
response, error = net('get', polling_url, polling = True, headers = headers)
if isinstance(error, NetworkFailure):
if __debug__: log('Network exception: {}', str(error))
return TRResult(path = path, data = {}, text = '', error = str(error))
elif isinstance(error, RateLimitExceeded):
# Pause to let the server reset its timers. It seems that MS
# doesn't send back a Retry-After header when rated limited
# during polling, but I'm going to check it anyway, in case.
sleep_time = 30
if 'Retry-After' in response.headers:
sleep_time = int(response.headers['Retry-After'])
if __debug__: log('Sleeping for {} s and retrying', sleep_time)
sleep(sleep_time)
elif error:
raise error
# Sometimes the response comes back without content. I don't know
# if that's a bug in the Azure system or not. It's not clear what
# else should be done except keep going.
if response.text:
for para in block['paragraphs']:
for word in para['words']:
text = ''
for symbol in word['symbols']:
text += symbol['text']
bb = word['boundingBox']['vertices']
corners = corner_list(bb)
if corners:
boxes.append(TextBox(boundingBox = corners,
text = text))
else:
# Something is wrong with the vertex list.
# Skip it and continue.
if __debug__: log('Bad bb for {}: {}', text, bb)
self._results[path] = TRResult(path = path, data = result,
boxes = boxes, text = full_text,
error = None)
return self._results[path]
except google.api_core.exceptions.PermissionDenied as ex:
text = 'Authentication failure for Google service -- {}'.format(ex)
raise AuthenticationFailure(text)
except KeyboardInterrupt as ex:
raise
except Exception as ex:
if isinstance(ex, KeyError):
# Can happen if you control-C in the middle of the Google call.
# Result is "Exception ignored in: 'grpc._cython.cygrpc._next'"
# printed to the terminal and we end up here.
raise KeyboardInterrupt
else:
text = 'Error: failed to convert "{}": {}'.format(path, ex)
def result(self, path):
'''Returns the results from calling the service on the 'path'. The
results are returned as an TRResult named tuple.
'''
# Check if we already processed it.
if path in self._results:
return self._results[path]
if __debug__: log('Reading {}', path)
image = open(path, 'rb').read()
if len(image) > self.max_size():
text = 'File exceeds {} byte limit for Google service'.format(self.max_size())
return TRResult(path = path, data = {}, text = '', error = text)
try:
if __debug__: log('Building Google vision API object')
client = gv.ImageAnnotatorClient()
image = gv.types.Image(content = image)
context = gv.types.ImageContext(language_hints = ['en-t-i0-handwrit'])
# Iterate over the known API calls and store each result.
result = dict.fromkeys(self._known_features)
for feature in self._known_features:
if __debug__: log('Sending image to Google for {} ...', feature)
response = getattr(client, feature)(image = image, image_context = context)
if __debug__: log('Received result.')
result[feature] = MessageToDict(response)
full_text = ''