Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _get(self, item, base_name, index):
# Shortcuts to make the code more readable.
output_dir = self._output_dir
# For URLs, we download the corresponding files and name them with
# the base_name.
if is_url(item):
# First make sure the URL actually points to an image.
if __debug__: log('testing if URL contains an image: {}', item)
try:
response = urllib.request.urlopen(item)
except Exception as ex:
warn('Skipping URL due to error: {}', ex)
return (None, None)
if response.headers.get_content_maintype() != 'image':
warn('Did not find an image at {}', item)
return (None, None)
orig_fmt = response.headers.get_content_subtype()
base = '{}-{}'.format(base_name, index)
# If we weren't given an output dir, then for URLs, we have no
# choice but to use the current dir to download the file.
# Important: don't change self._output_dir because if other
# inputs *are* files, then those files will need other output dirs.
if not output_dir:
output_dir = os.getcwd()
file = path.realpath(path.join(output_dir, base + '.' + orig_fmt))
if not download_file(item, file):
warn('Unable to download {}', item)
return (None, None)
url_file = path.realpath(path.join(output_dir, base + '.url'))
with open(url_file, 'w') as f:
f.write(url_file_content(item))
warn('Skipping URL due to error: {}', ex)
return (None, None)
if response.headers.get_content_maintype() != 'image':
warn('Did not find an image at {}', item)
return (None, None)
orig_fmt = response.headers.get_content_subtype()
base = '{}-{}'.format(base_name, index)
# If we weren't given an output dir, then for URLs, we have no
# choice but to use the current dir to download the file.
# Important: don't change self._output_dir because if other
# inputs *are* files, then those files will need other output dirs.
if not output_dir:
output_dir = os.getcwd()
file = path.realpath(path.join(output_dir, base + '.' + orig_fmt))
if not download_file(item, file):
warn('Unable to download {}', item)
return (None, None)
url_file = path.realpath(path.join(output_dir, base + '.url'))
with open(url_file, 'w') as f:
f.write(url_file_content(item))
inform('Wrote URL to {}', styled(relative(url_file), 'white_on_gray'))
else:
file = path.realpath(path.join(os.getcwd(), item))
orig_fmt = filename_extension(file)[1:]
if not path.getsize(file) > 0:
warn('File has zero length: {}', relative(file))
return (None, None)
if __debug__: log('{} has original format {}', relative(file), orig_fmt)
return (file, orig_fmt)
inform('Sending to {} and waiting for response ...', service_name)
last_time = timer()
try:
output = service.result(image.file)
except AuthFailure as ex:
raise AuthFailure('Unable to use {}: {}', service, ex)
except RateLimitExceeded as ex:
time_passed = timer() - last_time
if time_passed < 1/service.max_rate():
warn('Pausing {} due to rate limits', service_name)
time.sleep(1/service.max_rate() - time_passed)
# FIXME resend after pause
if output.error:
alert('{} failed: {}', service_name, output.error)
warn('No result from {} for {}', service_name, relative(image.file))
return None
inform('Got result from {}.', service_name)
file_name = path.basename(image.file)
base_path = path.join(image.dest_dir, file_name)
annot_path = None
report_path = None
if self._make_grid:
annot_path = self._renamed(base_path, str(service), 'png')
inform('Creating annotated image for {}.', service_name)
with self._lock:
self._save(annotated_image(image.file, output.boxes, service), annot_path)
if self._extended_results:
txt_file = self._renamed(base_path, str(service), 'txt')
json_file = self._renamed(base_path, str(service), 'json')
inform('Saving all data for {}.', service_name)
def _save(self, result, file):
# First perform some sanity checks.
if result is None:
warn('No data for {}', file)
return
if isinstance(result, tuple):
# Assumes 2 elements: data, and error
(data, error) = result
if error:
alert('Error: {}', error)
warn('Unable to write {}', file)
return
else:
result = data
if __debug__: log('writing output to file {}', relative(file))
if isinstance(result, str):
with open(file, 'w') as f:
f.write(result)
elif isinstance(result, io.BytesIO):
with open(file, 'wb') as f:
shutil.copyfileobj(result, f)
else:
# There's no other type in the code, so if we get here ...
raise InternalError('Unexpected data in save_output() -- please report this.')
inform('Starting on {}', styled(item, 'white'))
try:
(item_file, item_fmt) = self._get(item, base_name, index)
if not item_file:
return
dest_dir = self._output_dir if self._output_dir else path.dirname(item_file)
if not writable(dest_dir):
alert('Cannot write output in {}.', dest_dir)
return
# Normalize input image to the lowest common denominator.
image = self._normalized(item, item_fmt, item_file, dest_dir)
if not image.file:
warn('Skipping {}', relative(item_file))
return
# Send the file to the services and get Result tuples back.
if self._num_threads == 1:
# For 1 thread, avoid thread pool to make debugging easier.
results = [self._send(image, s) for s in services]
else:
with ThreadPoolExecutor(max_workers = self._num_threads) as tpe:
results = list(tpe.map(self._send, repeat(image), iter(services)))
# If a service failed for some reason (e.g., a network glitch), we
# get no result back. Remove empty results & go on with the rest.
results = [x for x in results if x is not None]
# Create grid file if requested.
if self._make_grid:
def _send(self, image, service):
'''Send the "image" to the service named "service" and write output in
directory "dest_dir".
'''
service_name = styled(service.name(), service.name_color())
inform('Sending to {} and waiting for response ...', service_name)
last_time = timer()
try:
output = service.result(image.file)
except AuthFailure as ex:
raise AuthFailure('Unable to use {}: {}', service, ex)
except RateLimitExceeded as ex:
time_passed = timer() - last_time
if time_passed < 1/service.max_rate():
warn('Pausing {} due to rate limits', service_name)
time.sleep(1/service.max_rate() - time_passed)
# FIXME resend after pause
if output.error:
alert('{} failed: {}', service_name, output.error)
warn('No result from {} for {}', service_name, relative(image.file))
return None
inform('Got result from {}.', service_name)
file_name = path.basename(image.file)
base_path = path.join(image.dest_dir, file_name)
annot_path = None
report_path = None
if self._make_grid:
annot_path = self._renamed(base_path, str(service), 'png')
inform('Creating annotated image for {}.', service_name)
with self._lock:
json_file = self._renamed(base_path, str(service), 'json')
inform('Saving all data for {}.', service_name)
self._save(json.dumps(output.data), json_file)
inform('Saving extracted text for {}.', service_name)
self._save(output.text, txt_file)
if self._compare:
gt_file = alt_extension(image.item_file, 'gt.txt')
report_path = self._renamed(image.item_file, str(service), 'tsv')
relaxed = (self._compare == 'relaxed')
if readable(gt_file) and nonempty(gt_file):
if __debug__: log('reading ground truth from {}', gt_file)
gt_text = open(gt_file, 'r').read()
inform('Saving {} comparison to ground truth', service_name)
self._save(text_comparison(output.text, gt_text, relaxed), report_path)
elif not nonempty(gt_file):
warn('Skipping {} comparison because {} is empty',
service_name, relative(gt_file))
else:
warn('Skipping {} comparison because {} not available',
service_name, relative(gt_file))
return Result(service, image, annot_path, report_path)
inform('Saving extracted text for {}.', service_name)
self._save(output.text, txt_file)
if self._compare:
gt_file = alt_extension(image.item_file, 'gt.txt')
report_path = self._renamed(image.item_file, str(service), 'tsv')
relaxed = (self._compare == 'relaxed')
if readable(gt_file) and nonempty(gt_file):
if __debug__: log('reading ground truth from {}', gt_file)
gt_text = open(gt_file, 'r').read()
inform('Saving {} comparison to ground truth', service_name)
self._save(text_comparison(output.text, gt_text, relaxed), report_path)
elif not nonempty(gt_file):
warn('Skipping {} comparison because {} is empty',
service_name, relative(gt_file))
else:
warn('Skipping {} comparison because {} not available',
service_name, relative(gt_file))
return Result(service, image, annot_path, report_path)
def _save(self, result, file):
# First perform some sanity checks.
if result is None:
warn('No data for {}', file)
return
if isinstance(result, tuple):
# Assumes 2 elements: data, and error
(data, error) = result
if error:
alert('Error: {}', error)
warn('Unable to write {}', file)
return
else:
result = data
if __debug__: log('writing output to file {}', relative(file))
if isinstance(result, str):
with open(file, 'w') as f:
f.write(result)
elif isinstance(result, io.BytesIO):