Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def delete_experiments(db_collection_name, sacred_id, filter_states, batch_id, filter_dict):
collection = get_collection(db_collection_name)
if sacred_id is None:
if len({'PENDING', 'RUNNING', 'KILLED'} & set(filter_states)) > 0:
detect_killed(db_collection_name, print_detected=False)
filter_dict = build_filter_dict(filter_states, batch_id, filter_dict)
ndelete = collection.count_documents(filter_dict)
batch_ids = collection.find(filter_dict, {'batch_id'})
batch_ids_in_del = set([x['batch_id'] for x in batch_ids])
if ndelete >= 10:
if input(f"Deleting {ndelete} configuration{s_if(ndelete)} from database collection. "
f"Are you sure? (y/n) ").lower() != "y":
exit()
else:
logging.info(f"Deleting {ndelete} configuration{s_if(ndelete)} from database collection.")
collection.delete_many(filter_dict)
filter_states: list of strings or None
List of statuses to filter for. Will cancel all jobs from the database collection
with one of the given statuses.
batch_id: int or None
The ID of the batch of experiments to cancel. All experiments that are queued together (i.e. within the same
command line call) have the same batch ID.
filter_dict: dict or None
Arbitrary filter dictionary to use for cancelling experiments. Any experiments whose database entries match all
keys/values of the dictionary will be cancelled.
Returns
-------
None
"""
collection = get_collection(db_collection_name)
if sacred_id is None:
# no ID is provided: we check whether there are slurm jobs for which after this action no
# RUNNING experiment remains. These slurm jobs can be killed altogether.
# However, it is NOT possible right now to cancel a single experiment in a Slurm job with multiple
# running experiments.
try:
if len({'PENDING', 'RUNNING', 'KILLED'} & set(filter_states)) > 0:
detect_killed(db_collection_name, print_detected=False)
filter_dict = build_filter_dict(filter_states, batch_id, filter_dict)
ncancel = collection.count_documents(filter_dict)
if ncancel >= 10:
if input(f"Cancelling {ncancel} experiment{s_if(ncancel)}. "
f"Are you sure? (y/n) ").lower() != "y":
exit()
def detect_killed(db_collection_name, print_detected=True):
collection = get_collection(db_collection_name)
exps = collection.find({'status': {'$in': ['PENDING', 'RUNNING']},
'$or': [{'slurm.array_id': {'$exists': True}}, {'slurm.id': {'$exists': True}}]})
running_jobs = get_slurm_arrays_tasks()
old_running_jobs = get_slurm_jobs() # Backwards compatibility
nkilled = 0
for exp in exps:
exp_running = ('array_id' in exp['slurm'] and exp['slurm']['array_id'] in running_jobs
and (any(exp['slurm']['task_id'] in r for r in running_jobs[exp['slurm']['array_id']][0])
or exp['slurm']['task_id'] in running_jobs[exp['slurm']['array_id']][1]))
exp_running |= ('id' in exp['slurm'] and exp['slurm']['id'] in old_running_jobs)
if not exp_running:
if 'stop_time' in exp:
collection.update_one({'_id': exp['_id']}, {'$set': {'status': 'INTERRUPTED'}})
else:
nkilled += 1
collection.update_one({'_id': exp['_id']}, {'$set': {'status': 'KILLED'}})
def get_results(db_collection_name, fields=['config', 'result'],
to_data_frame=False, mongodb_config=None, suffix=None,
states=['COMPLETED'], filter_dict={}, parallel=False):
import pandas as pd
collection = get_collection(db_collection_name, mongodb_config=mongodb_config, suffix=suffix)
if len(states) > 0:
if 'status' in filter_dict:
logging.warning("'states' argument is not empty and will overwrite 'filter_dict['status']'.")
filter_dict['status'] = {'$in': states}
cursor = collection.find(filter_dict, fields)
results = [x for x in tqdm(cursor, total=collection.count_documents(filter_dict))]
if parallel:
from multiprocessing import Pool
with Pool() as p:
parsed = list(tqdm(p.imap(parse_jsonpickle, results),
total=len(results)))
else:
parsed = [parse_jsonpickle(entry) for entry in tqdm(results)]
else:
seml_config['conda_environment'] = None
# Set Slurm config with default parameters as fall-back option
if slurm_config is None:
slurm_config = {'sbatch_options': {}}
for k, v in SETTINGS.SLURM_DEFAULT['sbatch_options'].items():
if k not in slurm_config['sbatch_options']:
slurm_config['sbatch_options'][k] = v
del SETTINGS.SLURM_DEFAULT['sbatch_options']
for k, v in SETTINGS.SLURM_DEFAULT.items():
if k not in slurm_config:
slurm_config[k] = v
slurm_config['sbatch_options'] = remove_prepended_dashes(slurm_config['sbatch_options'])
collection = get_collection(db_collection_name)
configs = generate_configs(experiment_config)
batch_id = get_max_in_collection(collection, "batch_id")
if batch_id is None:
batch_id = 1
else:
batch_id = batch_id + 1
if seml_config['use_uploaded_sources']:
uploaded_files = upload_sources(seml_config, collection, batch_id)
else:
uploaded_files = None
if not no_config_check:
check_config(seml_config['executable'], seml_config['conda_environment'], configs)
parser.add_argument("--experiment_id", type=int, help="The experiment ID.")
parser.add_argument("--db_collection_name", type=str, help="The collection in the database to use.")
parser.add_argument("--verbose", default=False, type=lambda x: (str(x).lower() == 'true'),
help="Display more log messages.")
parser.add_argument("--unobserved", default=False, type=lambda x: (str(x).lower() == 'true'),
help="Run the experiments without Sacred observers.")
parser.add_argument("--post-mortem", default=False, type=lambda x: (str(x).lower() == 'true'),
help="Activate post-mortem debugging with pdb.")
parser.add_argument("--stored-sources-dir", default=None, type=str,
help="Load source files into this directory before starting.")
args = parser.parse_args()
exp_id = args.experiment_id
db_collection_name = args.db_collection_name
collection = get_collection(db_collection_name)
exp = collection.find_one({'_id': exp_id})
use_stored_sources = args.stored_sources_dir is not None
if use_stored_sources and not os.listdir(args.stored_sources_dir):
assert "source_files" in exp['seml'],\
"--stored-sources-dir was supplied but queued experiment does not contain stored source files."
load_sources_from_db(exp, collection, to_directory=args.stored_sources_dir)
exe, config = get_command_from_exp(exp, db_collection_name, verbose=args.verbose,
unobserved=args.unobserved, post_mortem=args.post_mortem)
config_args = ' '.join(config)
cmd = f"python {exe} with {config_args}"
if use_stored_sources:
# add command without the temp_dir prefix
# also add the temp dir for debugging purposes
If >0, will only submit the specified number of experiments to the cluster.
This is useful when you only want to test your setup.
filter_dict: dict
Dictionary for filtering the entries in the collection.
dry_run: bool
Just return the executables and configurations instead of running them.
output_to_file: bool
Pipe all output (stdout and stderr) to an output file.
Can only be False if slurm is False.
Returns
-------
None
"""
collection = get_collection(db_collection_name)
if unobserved and not slurm and '_id' in filter_dict:
query_dict = {}
else:
query_dict = {'status': {"$in": ['QUEUED']}}
query_dict.update(filter_dict)
if collection.count_documents(query_dict) <= 0:
logging.error("No queued experiments.")
return
exps_full = list(collection.find(query_dict))
nexps = num_exps if num_exps > 0 else len(exps_full)
exps_list = exps_full[:nexps]