Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import pickle
import logging
import sys
logger = \
logging.getLogger('indra.tools.reading.pmid_reading.read_pmids_aws')
# Setting default force read/fulltext parameters
force_read = args.force_read
force_fulltext = args.force_fulltext
client = boto3.client('s3')
bucket_name = 'bigmech'
pmid_list_key = 'reading_results/%s/pmids' % args.basename
if 'reach' in [rdr.lower() for rdr in args.readers]:
path_to_reach = get_config('REACHPATH')
reach_version = get_config('REACH_VERSION')
if path_to_reach is None or reach_version is None:
print('REACHPATH and/or REACH_VERSION not defined, exiting.')
sys.exit(1)
try:
pmid_list_obj = client.get_object(
Bucket=bucket_name,
Key=pmid_list_key
)
# Handle a missing object gracefully
except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == 'NoSuchKey':
logger.info('Could not find PMID list file at %s, exiting' %
pmid_list_key)
sys.exit(1)
def check_api_keys(*args, **kwargs):
global ELSEVIER_KEYS
if ELSEVIER_KEYS is None:
ELSEVIER_KEYS = {}
# Try to read in Elsevier API keys. For each key, first check
# the environment variables, then check the INDRA config file.
if not has_config(INST_KEY_ENV_NAME):
logger.warning('Institution API key %s not found in config '
'file or environment variable: this will '
'limit access for %s'
% (INST_KEY_ENV_NAME, task_desc))
ELSEVIER_KEYS['X-ELS-Insttoken'] = get_config(INST_KEY_ENV_NAME)
if not has_config(API_KEY_ENV_NAME):
logger.error('API key %s not found in configuration file '
'or environment variable: cannot %s'
% (API_KEY_ENV_NAME, task_desc))
return failure_ret
ELSEVIER_KEYS['X-ELS-APIKey'] = get_config(API_KEY_ENV_NAME)
elif 'X-ELS-APIKey' not in ELSEVIER_KEYS.keys():
logger.error('No Elsevier API key %s found: cannot %s'
% (API_KEY_ENV_NAME, task_desc))
return failure_ret
return func(*args, **kwargs)
return check_api_keys
def get_default_ndex_cred(ndex_cred):
"""Gets the NDEx credentials from the dict, or tries the environment if None"""
if ndex_cred:
username = ndex_cred.get('user')
password = ndex_cred.get('password')
if username is not None and password is not None:
return username, password
username = get_config('NDEX_USERNAME')
password = get_config('NDEX_PASSWORD')
return username, password
def get_url_base(end_point):
url = get_config('INDRA_DB_REST_URL', failure_ok=False)
url_path = url.rstrip('/') + '/' + end_point.lstrip('/')
return url_path
def get_default_ndex_cred(ndex_cred):
"""Gets the NDEx credentials from the dict, or tries the environment if None"""
if ndex_cred:
username = ndex_cred.get('user')
password = ndex_cred.get('password')
if username is not None and password is not None:
return username, password
username = get_config('NDEX_USERNAME')
password = get_config('NDEX_PASSWORD')
return username, password
def _make_request(meth, end_point, query_str, data=None, params=None, tries=2):
if params is None:
params = {}
if end_point is None:
logger.error("Exception in submit request with args: %s"
% str([meth, end_point, query_str, data, params, tries]))
raise ValueError("end_point cannot be None.")
url = get_config('INDRA_DB_REST_URL', failure_ok=False)
api_key = get_config('INDRA_DB_REST_API_KEY', failure_ok=True)
url_path = url.rstrip('/') + '/' + end_point.lstrip('/')
url_path += query_str
headers = {}
if data:
# This is an assumption which applies to our use cases for now, but may
# not generalize.
headers['content-type'] = 'application/json'
json_data = json.dumps(data)
else:
json_data = None
params['api_key'] = api_key
logger.info('url and query string: %s',
url_path.replace(str(api_key), '[api-key]'))
logger.info('headers: %s', str(headers).replace(str(api_key), '[api-key]'))
logger.info('data: %s', str(data).replace(str(api_key), '[api-key]'))
def post(self):
"""Process text with REACH and return INDRA Statements."""
args = request.json
text = args.get('text')
offline = True if args.get('offline') else False
given_url = args.get('url')
config_url = get_config('REACH_TEXT_URL', failure_ok=True)
# Order: URL given as an explicit argument in the request. Then any URL
# set in the configuration. Then, unless offline is set, use the
# default REACH web service URL.
if 'url' in args: # This is to take None if explicitly given
url = given_url
elif config_url:
url = config_url
elif not offline:
url = reach_text_url
else:
url = None
# If a URL is set, prioritize it over the offline setting
if url:
offline = False
rp = reach.process_text(text, offline=offline, url=url)
return _stmts_from_proc(rp)
def make_db_rest_request(meth, end_point, query_str, data=None, params=None,
tries=2, timeout=None):
if params is None:
params = {}
if end_point is None:
logger.error("Exception in submit request with args: %s"
% str([meth, end_point, query_str, data, params, tries]))
raise ValueError("end_point cannot be None.")
url_path = get_url_base(end_point)
api_key = get_config('INDRA_DB_REST_API_KEY', failure_ok=True)
url_path += query_str
headers = {}
if data:
# This is an assumption which applies to our use cases for now, but may
# not generalize.
headers['content-type'] = 'application/json'
json_data = json.dumps(data)
else:
json_data = None
params['api_key'] = api_key
logger.info('query: %s', url_path.replace(str(api_key), '[api-key]'))
logger.info('params: %s', str(params).replace(str(api_key), '[api-key]'))
logger.debug('headers: %s', str(headers).replace(str(api_key),
'[api-key]'))
logger.debug('data: %s', str(data).replace(str(api_key), '[api-key]'))
method_func = getattr(requests, meth.lower())
ELSEVIER_KEYS = {}
# Try to read in Elsevier API keys. For each key, first check
# the environment variables, then check the INDRA config file.
if not has_config(INST_KEY_ENV_NAME):
logger.warning('Institution API key %s not found in config '
'file or environment variable: this will '
'limit access for %s'
% (INST_KEY_ENV_NAME, task_desc))
ELSEVIER_KEYS['X-ELS-Insttoken'] = get_config(INST_KEY_ENV_NAME)
if not has_config(API_KEY_ENV_NAME):
logger.error('API key %s not found in configuration file '
'or environment variable: cannot %s'
% (API_KEY_ENV_NAME, task_desc))
return failure_ret
ELSEVIER_KEYS['X-ELS-APIKey'] = get_config(API_KEY_ENV_NAME)
elif 'X-ELS-APIKey' not in ELSEVIER_KEYS.keys():
logger.error('No Elsevier API key %s found: cannot %s'
% (API_KEY_ENV_NAME, task_desc))
return failure_ret
return func(*args, **kwargs)
return check_api_keys
def _check_reach_env():
"""Check that the environment supports runnig reach."""
# Get the path to the REACH JAR
path_to_reach = get_config('REACHPATH')
if path_to_reach is None:
path_to_reach = environ.get('REACHPATH', None)
if path_to_reach is None or not path.exists(path_to_reach):
raise ReachError(
'Reach path unset or invalid. Check REACHPATH environment var '
'and/or config file.'
)
logger.debug('Using REACH jar at: %s' % path_to_reach)
# Get the reach version.
reach_version = get_config('REACH_VERSION')
if reach_version is None:
reach_version = environ.get('REACH_VERSION', None)
if reach_version is None:
logger.debug('REACH version not set in REACH_VERSION')