How to use the dsub.lib.dsub_util function in dsub

To help you get started, we’ve selected a few dsub examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github DataBiosphere / dsub / dsub / providers / test_fails.py View on Github external
if not skip_if_output_present:
      raise FailsException("fails provider made submit_job fail")

    for task_view in job_model.task_view_generator(job_descriptor):
      job_params = task_view.job_params
      task_params = task_view.task_descriptors[0].task_params

      outputs = job_params["outputs"] | task_params["outputs"]
      if dsub_util.outputs_are_present(outputs):
        print("Skipping task because its outputs are present")
        continue

      # if any task is allowed to run, then we fail.
      raise FailsException("fails provider made submit_job fail")

    return {"job-id": dsub_util.NO_JOB}
github DataBiosphere / dsub / dsub / commands / dsub.py View on Github external
user_project, unique_job_id):
  """Allow provider to extract job-specific metadata from command-line args.

  Args:
    provider: job service provider
    user_id: user submitting the job
    job_name: name for the job
    script: the script to run
    task_ids: a set of the task-ids for all tasks in the job
    user_project: name of the project to be billed for the request
    unique_job_id: generate a unique job id

  Returns:
    A dictionary of job-specific metadata (such as job id, name, etc.)
  """
  create_time = dsub_util.replace_timezone(datetime.datetime.now(), tzlocal())
  user_id = user_id or dsub_util.get_os_user()
  job_metadata = provider.prepare_job_metadata(script.name, job_name, user_id,
                                               create_time)
  if unique_job_id:
    job_metadata['job-id'] = uuid.uuid4().hex

  job_metadata['create-time'] = create_time
  job_metadata['script'] = script
  job_metadata['user-project'] = user_project
  if task_ids:
    job_metadata['task-ids'] = dsub_util.compact_interval_string(list(task_ids))

  return job_metadata
github DataBiosphere / dsub / dsub / providers / google.py View on Github external
def _desc_date_sort_key(t):
      return now - dsub_util.replace_timezone(t.get_field('create-time'), None)
github DataBiosphere / dsub / dsub / commands / dsub.py View on Github external
print('Waiting for predecessor jobs to complete...')
      error_messages = _wait_after(provider, after, poll_interval, True)
      if error_messages:
        for msg in error_messages:
          print_error(msg)
        raise dsub_errors.PredecessorJobFailureError(
            'One or more predecessor jobs completed but did not succeed.',
            error_messages, None)

  # Launch all the job tasks!
  job_descriptor = job_model.JobDescriptor(job_metadata, job_params,
                                           job_resources, task_descriptors)
  launched_job = provider.submit_job(job_descriptor, skip)

  if not dry_run:
    if launched_job['job-id'] == dsub_util.NO_JOB:
      print('Job output already present, skipping new job submission.')
      return {'job-id': dsub_util.NO_JOB}
    print('Launched job-id: %s' % launched_job['job-id'])
    if launched_job.get('task-id'):
      print('%s task(s)' % len(launched_job['task-id']))
    print('To check the status, run:')
    print("  dstat %s --jobs '%s' --users '%s' --status '*'" %
          (provider_base.get_dstat_provider_args(provider, project),
           launched_job['job-id'], launched_job['user-id']))
    print('To cancel the job, run:')
    print("  ddel %s --jobs '%s' --users '%s'" %
          (provider_base.get_ddel_provider_args(provider, project),
           launched_job['job-id'], launched_job['user-id']))

  # Poll for job completion
  if wait:
github DataBiosphere / dsub / dsub / providers / local.py View on Github external
# 'OR' filtering arguments.
    statuses = None if statuses == {'*'} else statuses
    user_ids = None if user_ids == {'*'} else user_ids
    job_ids = None if job_ids == {'*'} else job_ids
    job_names = None if job_names == {'*'} else job_names
    task_ids = None if task_ids == {'*'} else task_ids
    task_attempts = None if task_attempts == {'*'} else task_attempts
    # 'AND' filtering arguments.
    labels = labels if labels else {}

    # The local provider is intended for local, single-user development. There
    # is no shared queue (jobs run immediately) and hence it makes no sense
    # to look up a job run by someone else (whether for dstat or for ddel).
    # If a user is passed in, we will allow it, so long as it is the current
    # user. Otherwise we explicitly error out.
    approved_users = {dsub_util.get_os_user()}
    if user_ids:
      if user_ids != approved_users:
        raise NotImplementedError(
            'Filtering by user is not implemented for the local provider'
            ' (%s)' % str(user_ids))
    else:
      user_ids = approved_users

    ret = []
    if not job_ids:
      # Default to every job we know about.
      job_ids = os.listdir(self._provider_root())
    for j in job_ids:
      for u in user_ids:
        path = self._provider_root() + '/' + j
        if not os.path.isdir(path):
github DataBiosphere / dsub / dsub / lib / job_model.py View on Github external
Returns:
      A JobDescriptor populated as best we can from the old meta.yaml.
    """

    # The v0 meta.yaml only contained:
    #   create-time, job-id, job-name, logging, task-id
    #   labels, envs, inputs, outputs
    # It did NOT contain user-id.
    # dsub-version might be there as a label.

    job_metadata = {}
    for key in ['job-id', 'job-name', 'create-time']:
      job_metadata[key] = job.get(key)

    # Make sure that create-time string is turned into a datetime
    job_metadata['create-time'] = dsub_util.replace_timezone(
        datetime.datetime.strptime(job['create-time'], '%Y-%m-%d %H:%M:%S.%f'),
        tzlocal())

    # The v0 meta.yaml contained a "logging" field which was the task-specific
    # logging path. It did not include the actual "--logging" value the user
    # specified.
    job_resources = Resources()

    # The v0 meta.yaml represented a single task.
    # It did not distinguish whether params were job params or task params.
    # We will treat them as either all job params or all task params, based on
    # whether the task-id is empty or an integer value.
    #
    # We also cannot distinguish whether inputs/outputs were recursive or not.
    # Just treat them all as non-recursive.
    params = {}
github DataBiosphere / dsub / dsub / commands / dsub.py View on Github external
def dsub_main(prog, argv):
  # Parse args and validate
  args = _parse_arguments(prog, argv)
  # intent:
  # * dsub tightly controls the output to stdout.
  # * wrap the main body such that output goes to stderr.
  # * only emit the job-id to stdout (which can then be used programmatically).
  with dsub_util.replace_print():
    launched_job = run_main(args)
  print(launched_job.get('job-id', ''))
  return launched_job
github DataBiosphere / dsub / dsub / lib / param_util.py View on Github external
Otherwise the value must be of the form "" where supported
  units are s, m, h, d, w (seconds, minutes, hours, days, weeks).

  Args:
    age: A "" string or integer value.
    from_time:

  Returns:
    A timezone-aware datetime or None if age parameter is empty.
  """

  if not age:
    return None

  if not from_time:
    from_time = dsub_util.replace_timezone(datetime.datetime.now(), tzlocal())

  try:
    last_char = age[-1]

    if last_char == 's':
      return from_time - datetime.timedelta(seconds=int(age[:-1]))
    elif last_char == 'm':
      return from_time - datetime.timedelta(minutes=int(age[:-1]))
    elif last_char == 'h':
      return from_time - datetime.timedelta(hours=int(age[:-1]))
    elif last_char == 'd':
      return from_time - datetime.timedelta(days=int(age[:-1]))
    elif last_char == 'w':
      return from_time - datetime.timedelta(weeks=int(age[:-1]))
    else:
      # If no unit is given treat the age as seconds from epoch, otherwise apply
github DataBiosphere / dsub / dsub / lib / param_util.py View on Github external
Returns:
    task_descriptors: an array of records, each containing the task-id,
    task-attempt, 'envs', 'inputs', 'outputs', 'labels' that defines the set of
    parameters for each task of the job.

  Raises:
    ValueError: If no job records were provided
  """
  task_descriptors = []

  path = tasks['path']
  task_min = tasks.get('min')
  task_max = tasks.get('max')

  # First check for any empty lines
  param_file = dsub_util.load_file(path)
  if any([not line for line in param_file]):
    raise ValueError('Blank line(s) found in {}'.format(path))
  param_file.close()

  # Load the file and set up a Reader that tokenizes the fields
  param_file = dsub_util.load_file(path)
  reader = csv.reader(param_file, delimiter='\t')

  # Read the first line and extract the parameters
  header = six.advance_iterator(reader)
  job_params = parse_tasks_file_header(header, input_file_param_util,
                                       output_file_param_util)

  # Build a list of records from the parsed input file
  for row in reader:
    # Tasks are numbered starting at 1 and since the first line of the TSV