Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# If no URL is found, we consider this a local only repo (but still require that it is a git repo)
self.url = remote_url
self.remote = self.url is not None and self.url.strip() != ""
self.repo_dir = os.path.join(root_dir, repo_name)
self.resource_name = resource_name
self.offline = offline
self.logger = logging.getLogger(__name__)
self.revision = None
if self.remote and not self.offline and fetch:
# a normal git repo with a remote
if not git.is_working_copy(self.repo_dir):
git.clone(src=self.repo_dir, remote=self.url)
else:
try:
git.fetch(src=self.repo_dir)
except exceptions.SupplyError:
console.warn("Could not update %s. Continuing with your locally available state." % self.resource_name)
else:
if not git.is_working_copy(self.repo_dir):
if io.exists(self.repo_dir):
raise exceptions.SystemSetupError("[{src}] must be a git repository.\n\nPlease run:\ngit -C {src} init"
.format(src=self.repo_dir))
else:
raise exceptions.SystemSetupError("Expected a git repository at [{src}] but the directory does not exist."
.format(src=self.repo_dir))
"store running on host [%s] at port [%s]. Please adjust your x-pack configuration or specify a user with enough " \
"privileges in the configuration in [%s]." % \
(target.__name__, node["host"], node["port"], config.ConfigFile().location)
self.logger.exception(msg)
raise exceptions.SystemSetupError(msg)
except elasticsearch.exceptions.ConnectionTimeout:
if execution_count < max_execution_count:
self.logger.debug("Connection timeout in attempt [%d/%d].", execution_count, max_execution_count)
time.sleep(time_to_sleep)
else:
operation = target.__name__
self.logger.exception("Connection timeout while running [%s] (retried %d times).", operation, max_execution_count)
node = self._client.transport.hosts[0]
msg = "A connection timeout occurred while running the operation [%s] against your Elasticsearch metrics store on " \
"host [%s] at port [%s]." % (operation, node["host"], node["port"])
raise exceptions.RallyError(msg)
except elasticsearch.exceptions.ConnectionError:
node = self._client.transport.hosts[0]
msg = "Could not connect to your Elasticsearch metrics store. Please check that it is running on host [%s] at port [%s]" \
" or fix the configuration in [%s]." % (node["host"], node["port"], config.ConfigFile().location)
self.logger.exception(msg)
raise exceptions.SystemSetupError(msg)
except elasticsearch.TransportError as e:
if e.status_code in (502, 503, 504, 429) and execution_count < max_execution_count:
self.logger.debug("%s (code: %d) in attempt [%d/%d]. Sleeping for [%f] seconds.",
responses[e.status_code], e.status_code, execution_count, max_execution_count, time_to_sleep)
time.sleep(time_to_sleep)
else:
node = self._client.transport.hosts[0]
msg = "A transport error occurred while running the operation [%s] against your Elasticsearch metrics store on " \
"host [%s] at port [%s]." % (target.__name__, node["host"], node["port"])
self.logger.exception(msg)
elif root_path != p:
raise exceptions.SystemSetupError("Invalid car: {}. Multiple bootstrap hooks are forbidden.".format(name))
all_config_base_vars.update(descriptor.config_base_variables)
all_car_vars.update(descriptor.variables)
# env needs to be merged individually, consider ES_JAVA_OPTS="-Xms1G" and ES_JAVA_OPTS="-ea".
# We want it to be ES_JAVA_OPTS="-Xms1G -ea" in the end.
for k, v in descriptor.env.items():
# merge
if k not in all_env:
all_env[k] = v
else: # merge
# assume we need to separate with a space
all_env[k] = all_env[k] + " " + v
if len(all_config_paths) == 0:
raise exceptions.SystemSetupError("At least one config base is required for car {}".format(name))
variables = {}
# car variables *always* take precedence over config base variables
variables.update(all_config_base_vars)
variables.update(all_car_vars)
return Car(name, root_path, all_config_paths, variables, all_env)
"""
# ES returns all stats values in bytes or ms via "human: false"
import elasticsearch
try:
ccr_stats_api_endpoint = "/_ccr/stats"
filter_path = "follow_stats"
stats = self.client.transport.perform_request("GET", ccr_stats_api_endpoint, params={"human": "false",
"filter_path": filter_path})
except elasticsearch.TransportError as e:
msg = "A transport error occurred while collecting CCR stats from the endpoint [{}?filter_path={}] on " \
"cluster [{}]".format(ccr_stats_api_endpoint, filter_path, self.cluster_name)
self.logger.exception(msg)
raise exceptions.RallyError(msg)
if filter_path in stats and "indices" in stats[filter_path]:
for indices in stats[filter_path]["indices"]:
try:
if self.indices and indices["index"] not in self.indices:
# Skip metrics for indices not part of user supplied whitelist (ccr-stats-indices) in telemetry params.
continue
self.record_stats_per_index(indices["index"], indices["shards"])
except KeyError:
self.logger.warning(
"The 'indices' key in %s does not contain an 'index' or 'shards' key "
"Maybe the output format of the %s endpoint has changed. Skipping.", ccr_stats_api_endpoint, ccr_stats_api_endpoint
)
"privileges in the configuration in [%s]." % \
(target.__name__, node["host"], node["port"], config.ConfigFile().location)
logger.exception(msg)
raise exceptions.SystemSetupError(msg)
except elasticsearch.exceptions.ConnectionTimeout:
if execution_count < max_execution_count:
logger.info("Received a connection timeout from the metrics store in attempt [%d/%d]." %
(execution_count, max_execution_count))
time.sleep(1)
else:
operation = target.__name__
logger.exception("Got a connection timeout while running [%s] (retried %d times)." % (operation, max_execution_count))
node = self._client.transport.hosts[0]
msg = "A connection timeout occurred while running the operation [%s] against your Elasticsearch metrics store on " \
"host [%s] at port [%s]." % (operation, node["host"], node["port"])
raise exceptions.RallyError(msg)
except elasticsearch.exceptions.ConnectionError:
node = self._client.transport.hosts[0]
msg = "Could not connect to your Elasticsearch metrics store. Please check that it is running on host [%s] at port [%s]" \
" or fix the configuration in [%s]." % (node["host"], node["port"], config.ConfigFile().location)
logger.exception(msg)
raise exceptions.SystemSetupError(msg)
except elasticsearch.TransportError as e:
# gateway timeout - let's wait a bit and retry
if e.status_code == 504 and execution_count < max_execution_count:
logger.info("Received a gateway timeout from the metrics store in attempt [%d/%d]." %
(execution_count, max_execution_count))
time.sleep(1)
else:
node = self._client.transport.hosts[0]
msg = "A transport error occurred while running the operation [%s] against your Elasticsearch metrics store on " \
"host [%s] at port [%s]." % (target.__name__, node["host"], node["port"])
def union(self, other):
if self.name != other.name:
raise exceptions.RallyAssertionError("Both document corpora must have the same name")
if self is other:
return self
else:
return DocumentCorpus(self.name, list(set(self.documents).union(other.documents)))
return
if not offline:
logger.info("Downloading from [%s] to [%s]." % (url, local_path))
try:
io.ensure_dir(os.path.dirname(local_path))
if size_in_bytes:
size_in_mb = round(convert.bytes_to_mb(size_in_bytes))
# ensure output appears immediately
print("Downloading data from %s (%s MB) ... " % (url, size_in_mb), end='', flush=True)
if url.startswith("http"):
net.download(url, local_path, size_in_bytes)
elif url.startswith("s3"):
self._do_download_via_s3(url, local_path, size_in_bytes)
else:
raise exceptions.SystemSetupError("Cannot download benchmark data from [%s]. Only http(s) and s3 are supported." % url)
if size_in_bytes:
print("Done")
except urllib.error.URLError:
logger.exception("Could not download [%s] to [%s]." % (url, local_path))
if raise_url_error:
raise
# file must exist at this point -> verify
if not os.path.isfile(local_path):
if offline:
raise exceptions.SystemSetupError("Cannot find %s. Please disable offline mode and retry again." % local_path)
else:
raise exceptions.SystemSetupError("Could not download from %s to %s. Please verify that data are available at %s and "
"check your internet connection." % (url, local_path, url))
tmp_data_set_path = local_path + ".tmp"
try:
if url.startswith("s3"):
expected_size_in_bytes = download_s3(url, tmp_data_set_path, expected_size_in_bytes, progress_indicator)
else:
expected_size_in_bytes = download_http(url, tmp_data_set_path, expected_size_in_bytes, progress_indicator)
except BaseException:
if os.path.isfile(tmp_data_set_path):
os.remove(tmp_data_set_path)
raise
else:
download_size = os.path.getsize(tmp_data_set_path)
if expected_size_in_bytes is not None and download_size != expected_size_in_bytes:
if os.path.isfile(tmp_data_set_path):
os.remove(tmp_data_set_path)
raise exceptions.DataError("Download of [%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected. Please retry." %
(local_path, download_size, expected_size_in_bytes))
os.rename(tmp_data_set_path, local_path)
def wait_for_rest_layer(es, max_attempts=20):
for attempt in range(max_attempts):
import elasticsearch
try:
es.info()
return True
except elasticsearch.ConnectionError as e:
if "SSL: UNKNOWN_PROTOCOL" in str(e):
raise exceptions.SystemSetupError("Could not connect to cluster via https. Is this an https endpoint?", e)
else:
time.sleep(1)
except elasticsearch.TransportError as e:
if e.status_code == 503:
time.sleep(1)
elif e.status_code == 401:
time.sleep(1)
else:
raise e
return False