Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def clean_logs(self):
"""Remove all Hive logs."""
logger.info("Cleaning logs")
restart = False
if self.running:
logger.warn("The cluster needs to be stopped before cleaning.")
self.stop()
restart = True
action = Remote("rm -rf " + self.logs_dir + "/* ",
self.hosts)
action.run()
if restart:
self.start()
def clean_logs(self):
"""Remove all Spark logs."""
logger.info("Cleaning logs")
restart = False
if self.running:
logger.warn("The cluster needs to be stopped before cleaning.")
self.stop()
restart = True
action = Remote("rm -rf " + self.logs_dir + "/* " +
self.work_dir + "/*",
self.hosts)
action.run()
if restart:
self.start()
def make_reservation(job_name=JOB_NAME, job_type='allow_classic_ssh'):
plan = ex5.planning
end = ex.time_utils.format_date(time.time()+12600)
logging.basicConfig(level=logging.DEBUG)
oargrid_job_id, _ = ex5.planning.get_job_by_name(job_name)
if oargrid_job_id is None:
logging.info("Starting a new job")
planning = plan.get_planning(endtime=end)
slots = plan.compute_slots(planning, walltime=WALLTIME, excluded_elements=excluded)
startdate, enddate, resources = plan.find_free_slot(slots, {'grid5000':1})
logging.info("startdate = %s, enddate = %s resources = %s" % (startdate, enddate, resources))
resources = plan.distribute_hosts(resources, {'grid5000':1}, excluded_elements=excluded)
# shuffling to load balance load accros nodes
random.shuffle(resources)
specs = plan.get_jobs_specs(resources, excluded_elements=excluded)
spec, frontend = specs[0]
spec.name = job_name
logging.info("specs = %s" % spec)
def default(self, line):
global interrupted, workers, cores
interrupted = False
print 'interrupting previous command'
workers.kill()
execo.sleep(1)
print 'sending command: ' + line
workers = execo.Remote(
line,
cores).start()
print 'sending command: ' + line
workers = execo.Remote(
line,
cores).start()
app = App()
if jobid:
try:
print 'Waiting for job to start'
execo_g5k.wait_oar_job_start(jobid, site)
print 'Retrieving nodes'
nodes = execo_g5k.get_oar_job_nodes(jobid, site)
# Setup nodes
print 'Preparing workers with cmd: ' + setup_cmd
workers = execo.Remote(
setup_cmd,
nodes).start()
workers.expect('Worker Setup Completed')
workers.kill()
# Possibly open more than one connection per machine
cores = nodes * args.nb_cores
print cores
print 'Example cmd: %s' % (workers_cmd)
app.prompt = '%s (%d node(s), %d core(s)/node)> ' % (site, args.volunteers, args.nb_cores)
app.cmdloop()
# execo.sleep(600)
# print 'Workers done'
finally:
execo_g5k.oardel([(jobid, site)])
def default(self, line):
global interrupted, workers, cores
interrupted = False
print 'interrupting previous command'
workers.kill()
execo.sleep(1)
print 'sending command: ' + line
workers = execo.Remote(
line,
cores).start()
proc = SshProcess("jps", self.master)
proc.run()
ids_to_kill = []
for line in proc.stdout.splitlines():
field = line.split()
if field[1] in hive_processes:
ids_to_kill.append(field[0])
if ids_to_kill:
force_kill = True
ids_to_kill_str = ""
for pid in ids_to_kill:
ids_to_kill_str += " " + pid
proc = SshProcess("kill -9" + ids_to_kill_str, h)
proc.run()
if force_kill:
logger.info(
"Processes from previous hadoop deployments had to be killed")
self.clean_logs()
def __force_clean(self):
"""Stop previous Spark processes (if any) and remove all remote files
created by it."""
spark_processes = [
"Master",
"Worker"
]
force_kill = False
for h in self.hosts:
proc = SshProcess("jps", h)
proc.run()
ids_to_kill = []
for line in proc.stdout.splitlines():
field = line.split()
if field[1] in spark_processes:
ids_to_kill.append(field[0])
if ids_to_kill:
force_kill = True
ids_to_kill_str = ""
for pid in ids_to_kill:
ids_to_kill_str += " " + pid
logger.warn(
"Killing running Spark processes in host %s" %
def start_dfs_and_wait(self):
"""Start the NameNode and DataNodes and wait for exiting safemode."""
self._check_initialization()
self.start_dfs()
logger.info("Waiting for safe mode to be off")
proc = SshProcess(self.bin_dir + "/hadoop dfsadmin -safemode wait",
self.master)
proc.run()
if not proc.finished_ok:
logger.warn("Error while starting HDFS")
else:
self.running_dfs = True
if self.running_map_reduce:
self.running = True
for line in proc.stdout.splitlines():
field = line.split()
if field[1] in spark_processes:
ids_to_kill.append(field[0])
if ids_to_kill:
force_kill = True
ids_to_kill_str = ""
for pid in ids_to_kill:
ids_to_kill_str += " " + pid
logger.warn(
"Killing running Spark processes in host %s" %
style.host(h.address.split('.')[0]))
proc = SshProcess("kill -9" + ids_to_kill_str, h)
proc.run()
if force_kill:
logger.info(
"Processes from previous hadoop deployments had to be killed")
self.clean_logs()