Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Initially active_version is ephemeral - this is to handle the
# possibility that might fail to complete the setup transaction,
# i.e. the transition "setup" -> "joinable".
active_version = self.client.write(
key=self.get_path("/rdzv/active_version"),
value=json.dumps({"status": "setup"}),
prevExist=False,
ttl=CONST_ETCD_SETUP_TTL,
)
try:
version_counter = self.client.get(self.get_path("/rdzv/version_counter"))
version_counter.value = str(int(version_counter.value) + 1)
self.client.update(version_counter)
except (etcd.EtcdKeyNotFound, etcd.EtcdCompareFailed):
raise RendezvousNonRetryableError(
"Unexpected state of EtcdRendezvousHandler, worker needs to die."
)
# Any failure below results in declaring a retryable rendezvous failure.
# The ephemeral /rdzv/active_version will expire and someone can then
# re-try the setup process.
# Create directory node for participant data
self.client.write(
key=self.get_path("/rdzv/v_{}".format(version_counter.value)),
value=None,
dir=True,
prevExist=False,
)
# Publish rendezvous version and signal it is ready-to-be-joined.
except EtcdRendezvousRetryableFailure:
# In case of retryable failure, wait a small delay
# to avoid spamming etcd
time.sleep(1)
except RendezvousTimeoutException:
log.info("Rendezvous timeout occured in EtcdRendezvousHandler")
raise
except RendezvousClosedException:
log.info(
f"Rendezvous for run_id={self._run_id} was observed to be closed"
)
raise
except RendezvousNonRetryableError:
raise
except Exception as e:
# In case of a general exception, wait a small delay
# to avoid spamming etcd
# FIXME: there are a few things that fall under this like
# etcd.EtcdKeyNotFound, etc, which could be handled more explicitly.
log.info("Rendezvous attempt failed, will retry. Reason: " + str(e))
time.sleep(1)