How to use the seml.utils.s_if function in seml

To help you get started, we’ve selected a few seml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github TUM-DAML / seml / seml / start.py View on Github external
for exp_array in exp_arrays:
            job_name = get_exp_name(exp_array[0][0], collection.name)
            output_dir_path = get_output_dir_path(exp_array[0][0])
            slurm_config = exp_array[0][0]['slurm']
            del slurm_config['experiments_per_job']
            start_slurm_job(collection, exp_array, unobserved, post_mortem,
                            name=job_name, output_dir_path=output_dir_path, **slurm_config)
    else:
        login_node_name = 'fs'
        if login_node_name in os.uname()[1]:
            logging.error("Refusing to run a compute experiment on a login node. "
                          "Please use Slurm or a compute node.")
            sys.exit(1)
        [get_output_dir_path(exp) for exp in exps_list]  # Check if output dir exists
        logging.info(f'Starting local worker thread that will run up to {nexps} experiment{s_if(nexps)}, '
                     f'until no queued experiments remain.')
        if not unobserved:
            collection.update_many({'_id': {'$in': [e['_id'] for e in exps_list]}}, {"$set": {"status": "PENDING"}})
        num_exceptions = 0
        tq = tqdm(enumerate(exps_list))
        for i_exp, exp in tq:
            if output_to_file:
                output_dir_path = get_output_dir_path(exp)
            else:
                output_dir_path = None
            success = start_local_job(collection, exp, unobserved, post_mortem, output_dir_path)
            if success is False:
                num_exceptions += 1
            tq.set_postfix(failed=f"{num_exceptions}/{i_exp} experiments")
github TUM-DAML / seml / seml / manage.py View on Github external
collection = get_collection(db_collection_name)
    if sacred_id is None:
        if len({'PENDING', 'RUNNING', 'KILLED'} & set(filter_states)) > 0:
            detect_killed(db_collection_name, print_detected=False)

        filter_dict = build_filter_dict(filter_states, batch_id, filter_dict)
        ndelete = collection.count_documents(filter_dict)
        batch_ids = collection.find(filter_dict, {'batch_id'})
        batch_ids_in_del = set([x['batch_id'] for x in batch_ids])

        if ndelete >= 10:
            if input(f"Deleting {ndelete} configuration{s_if(ndelete)} from database collection. "
                     f"Are you sure? (y/n) ").lower() != "y":
                exit()
        else:
            logging.info(f"Deleting {ndelete} configuration{s_if(ndelete)} from database collection.")
        collection.delete_many(filter_dict)
    else:
        exp = collection.find_one({'_id': sacred_id})
        if exp is None:
            logging.error(f"No experiment found with ID {sacred_id}.")
            sys.exit(1)
        else:
            logging.info(f"Deleting experiment with ID {sacred_id}.")
            batch_ids_in_del = set([exp['batch_id']])
            collection.delete_one({'_id': sacred_id})

    if len(batch_ids_in_del) > 0:
        # clean up the uploaded sources if no experiments of a batch remain
        delete_orphaned_sources(collection, batch_ids_in_del)
github TUM-DAML / seml / seml / start.py View on Github external
if 'conda_environment' in exp['seml']:
                configs.append((exe, exp['seml']['conda_environment'], config))
            else:
                configs.append((exe, None, config))
        return configs
    elif slurm:
        if not output_to_file:
            logging.error("Output cannot be written to stdout in Slurm mode. "
                          "Remove the '--output-to-console' argument.")
            sys.exit(1)
        exp_chunks = chunk_list(exps_list)
        exp_arrays = batch_chunks(exp_chunks)
        njobs = len(exp_chunks)
        narrays = len(exp_arrays)

        logging.info(f"Starting {nexps} experiment{s_if(nexps)} in "
                     f"{njobs} Slurm job{s_if(njobs)} in {narrays} Slurm job array{s_if(narrays)}.")

        for exp_array in exp_arrays:
            job_name = get_exp_name(exp_array[0][0], collection.name)
            output_dir_path = get_output_dir_path(exp_array[0][0])
            slurm_config = exp_array[0][0]['slurm']
            del slurm_config['experiments_per_job']
            start_slurm_job(collection, exp_array, unobserved, post_mortem,
                            name=job_name, output_dir_path=output_dir_path, **slurm_config)
    else:
        login_node_name = 'fs'
        if login_node_name in os.uname()[1]:
            logging.error("Refusing to run a compute experiment on a login node. "
                          "Please use Slurm or a compute node.")
            sys.exit(1)
        [get_output_dir_path(exp) for exp in exps_list]  # Check if output dir exists
github TUM-DAML / seml / seml / manage.py View on Github external
def report_status(db_collection_name):
    detect_killed(db_collection_name, print_detected=False)
    collection = get_collection(db_collection_name)
    queued = collection.count_documents({'status': 'QUEUED'})
    pending = collection.count_documents({'status': 'PENDING'})
    failed = collection.count_documents({'status': 'FAILED'})
    killed = collection.count_documents({'status': 'KILLED'})
    interrupted = collection.count_documents({'status': 'INTERRUPTED'})
    running = collection.count_documents({'status': 'RUNNING'})
    completed = collection.count_documents({'status': 'COMPLETED'})
    title = f"********** Report for database collection '{db_collection_name}' **********"
    logging.info(title)
    logging.info(f"*     - {queued:3d} queued experiment{s_if(queued)}")
    logging.info(f"*     - {pending:3d} pending experiment{s_if(pending)}")
    logging.info(f"*     - {running:3d} running experiment{s_if(running)}")
    logging.info(f"*     - {completed:3d} completed experiment{s_if(completed)}")
    logging.info(f"*     - {interrupted:3d} interrupted experiment{s_if(interrupted)}")
    logging.info(f"*     - {failed:3d} failed experiment{s_if(failed)}")
    logging.info(f"*     - {killed:3d} killed experiment{s_if(killed)}")
    logging.info("*" * len(title))
github TUM-DAML / seml / seml / manage.py View on Github external
def report_status(db_collection_name):
    detect_killed(db_collection_name, print_detected=False)
    collection = get_collection(db_collection_name)
    queued = collection.count_documents({'status': 'QUEUED'})
    pending = collection.count_documents({'status': 'PENDING'})
    failed = collection.count_documents({'status': 'FAILED'})
    killed = collection.count_documents({'status': 'KILLED'})
    interrupted = collection.count_documents({'status': 'INTERRUPTED'})
    running = collection.count_documents({'status': 'RUNNING'})
    completed = collection.count_documents({'status': 'COMPLETED'})
    title = f"********** Report for database collection '{db_collection_name}' **********"
    logging.info(title)
    logging.info(f"*     - {queued:3d} queued experiment{s_if(queued)}")
    logging.info(f"*     - {pending:3d} pending experiment{s_if(pending)}")
    logging.info(f"*     - {running:3d} running experiment{s_if(running)}")
    logging.info(f"*     - {completed:3d} completed experiment{s_if(completed)}")
    logging.info(f"*     - {interrupted:3d} interrupted experiment{s_if(interrupted)}")
    logging.info(f"*     - {failed:3d} failed experiment{s_if(failed)}")
    logging.info(f"*     - {killed:3d} killed experiment{s_if(killed)}")
    logging.info("*" * len(title))
github TUM-DAML / seml / seml / manage.py View on Github external
collection = get_collection(db_collection_name)
    queued = collection.count_documents({'status': 'QUEUED'})
    pending = collection.count_documents({'status': 'PENDING'})
    failed = collection.count_documents({'status': 'FAILED'})
    killed = collection.count_documents({'status': 'KILLED'})
    interrupted = collection.count_documents({'status': 'INTERRUPTED'})
    running = collection.count_documents({'status': 'RUNNING'})
    completed = collection.count_documents({'status': 'COMPLETED'})
    title = f"********** Report for database collection '{db_collection_name}' **********"
    logging.info(title)
    logging.info(f"*     - {queued:3d} queued experiment{s_if(queued)}")
    logging.info(f"*     - {pending:3d} pending experiment{s_if(pending)}")
    logging.info(f"*     - {running:3d} running experiment{s_if(running)}")
    logging.info(f"*     - {completed:3d} completed experiment{s_if(completed)}")
    logging.info(f"*     - {interrupted:3d} interrupted experiment{s_if(interrupted)}")
    logging.info(f"*     - {failed:3d} failed experiment{s_if(failed)}")
    logging.info(f"*     - {killed:3d} killed experiment{s_if(killed)}")
    logging.info("*" * len(title))
github TUM-DAML / seml / seml / manage.py View on Github external
# Backward compatibility, we used to store the path in 'slurm'
                        output_file = slurm_config['output_file']
                    else:
                        continue
                    with open(output_file, 'r') as f:
                        all_lines = f.readlines()
                    collection.update_one({'_id': exp['_id']}, {'$set': {'fail_trace': all_lines[-4:]}})
                except IOError:
                    if 'output_file' in seml_config:
                        output_file = seml_config['output_file']
                    elif 'output_file' in slurm_config:
                        # Backward compatibility
                        output_file = slurm_config['output_file']
                    logging.warning(f"File {output_file} could not be read.")
    if print_detected:
        logging.info(f"Detected {nkilled} externally killed experiment{s_if(nkilled)}.")