How to use the dxpy.find_data_objects function in dxpy

To help you get started, we’ve selected a few dxpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ENCODE-DCC / chip-seq-pipeline / dnanexus / call_peaks_from_ta.py View on Github external
def get_tas(exp_id, default_project, ta_folders):
	possible_files = []
	for base_folder in ta_folders:
		if ':' in base_folder:
			project_name, path = base_folder.split(':')
			project = resolve_project(project_name)
			project = project.get_id()
			project_name += ":"
		else:
			project = default_project
			project_name = ""
			path = base_folder
		if not path.startswith('/'):
			path = '/' + path
		print project, project_name, path
		for dxfile in dxpy.find_data_objects(classname='file', state='closed', folder=path, describe=True, recurse=True, project=project):
			desc = dxfile.get('describe')
			if exp_id in desc.get('folder') and '/bams' in desc.get('folder') and desc.get('name').endswith(('tagAlign', 'tagAlign.gz')):
				possible_files.append(desc)
	print "%s %i possible files" %(exp_id, len(possible_files))
	rep1_files = [f for f in possible_files if 'rep1' in f.get('folder')]
	rep2_files = [f for f in possible_files if 'rep2' in f.get('folder')]

	if len(rep1_files) != 1:
		print "Tried to find one rep1 ta, found %d" %(len(rep1_files))
	if len(rep1_files) > 0:
		if len(rep1_files) > 1:
			print "Using first one found"
		rep1 = rep1_files[0].get('project') + ':' + rep1_files[0].get('folder') + '/' + rep1_files[0].get('name')
	else:
		rep1 = None
github ENCODE-DCC / chip-seq-pipeline / dnanexus / call_chip_from_tas.py View on Github external
if ':' in base_folder:
            project_name, path = base_folder.split(':')
            project = resolve_project(project_name)
            project_id = project.get_id()
            project_name += ":"
        else:
            project_id = default_project
            project_name = ""
            path = base_folder
        if not path.startswith('/'):
            path = '/' + path
        if not path.endswith('/'):
            path += '/'
        logging.debug(
            "Looking for TA's in %s %s %s" % (project_id, project_name, path))
        for dxfile in dxpy.find_data_objects(
            classname='file',
            state='closed',
            folder=path + 'bams/',
            project=project_id,
            describe=True,
            recurse=True,
            name='*tagAlign.gz',
            name_mode='glob'
        ):
            possible_files.append(dxfile.get('describe'))
    matched_files = \
        [f for f in possible_files if all([acc in f['name'] for acc in accessions])]
    if not matched_files:
        logging.error(
            'Could not find tagAlign with accessions %s' % (accessions))
        return None
github ENCODE-DCC / chip-seq-pipeline / dnanexus / call_chip_from_tas.py View on Github external
if ':' in base_folder:
            project_name, path = base_folder.split(':')
            project = resolve_project(project_name)
            project_id = project.get_id()
            project_name += ":"
        else:
            project_id = default_project
            project_name = ""
            path = base_folder
        if not path.startswith('/'):
            path = '/' + path
        if not path.endswith('/'):
            path += '/'
        logging.debug(
            "Looking for TA's in %s %s %s" % (project_id, project_name, path))
        for dxfile in dxpy.find_data_objects(
            classname='file',
            state='closed',
            folder=path + 'bams/',
            project=project_id,
            describe=True,
            recurse=True,
            name='*tagAlign.gz',
            name_mode='glob'
        ):
            possible_files.append(dxfile.get('describe'))
    matched_files = \
        [f for f in possible_files if all([acc in f['name'] for acc in accessions])]
    if not matched_files:
        logging.error(
            'Could not find tagAlign with accessions %s' % (accessions))
        return None
github dnanexus / dxWDL / scripts / build_release.py View on Github external
dest_proj.new_folder(folder, parents=True)
        region2projid[region] = dest_proj.get_id()
    print(region2projid)

    # Fire off a clone process for each region
    # Wait for the cloning to complete
    for i in [1, 2, 3]:
        jobs = _clone_to_all_regions(region2projid, regions, asset_file_name, folder, url)
        retval = _wait_for_completion(jobs)
        if retval:
            break

    # make records for each file
    for region in regions:
        dest_proj_id = region2projid[region]
        results = list(dxpy.find_data_objects(classname = "file",
                                              visibility = "hidden",
                                              name = asset_file_name,
                                              project = dest_proj_id,
                                              folder = folder))
        file_ids = [p["id"] for p in results]
        if len(file_ids) == 0:
            raise RuntimeError("Found no files {}:{}/{}".format(dest_proj_id, folder, asset_file_name))
        if len(file_ids) > 1:
            raise RuntimeError("Found {} files {}:{}/{}, instead of just one"
                               .format(len(dxfiles), dest_proj_id, folder, asset_file_name))
        dest_asset = dxpy.new_dxrecord(name=record.name,
                                       types=['AssetBundle'],
                                       details={'archiveFileId': dxpy.dxlink(file_ids[0])},
                                       properties=record.get_properties(),
                                       project=dest_proj_id,
                                       folder=folder,
github dnanexus / dx-toolkit / src / python / dxpy / app_builder.py View on Github external
applet_spec['folder'] = override_folder
    if 'folder' not in applet_spec:
        applet_spec['folder'] = '/'

    if override_name:
        applet_spec['name'] = override_name

    if 'dxapi' not in applet_spec:
        applet_spec['dxapi'] = dxpy.API_VERSION

    applets_to_overwrite = []
    archived_applet = None
    if check_name_collisions and not dry_run:
        destination_path = applet_spec['folder'] + ('/' if not applet_spec['folder'].endswith('/') else '') + applet_spec['name']
        logger.debug("Checking for existing applet at " + destination_path)
        for result in dxpy.find_data_objects(classname="applet", name=applet_spec["name"], folder=applet_spec['folder'], project=dest_project, recurse=False):
            if overwrite:
                # Don't remove the old applet until after the new one
                # has been created. This avoids a race condition where
                # we remove the old applet, but that causes garbage
                # collection of the bundled resources that will be
                # shared with the new applet
                applets_to_overwrite.append(result['id'])
            elif archive:
                logger.debug("Archiving applet %s" % (result['id']))
                proj = dxpy.DXProject(dest_project)
                archive_folder = '/.Applet_archive'
                try:
                    proj.list_folder(archive_folder)
                except dxpy.DXAPIError:
                    proj.new_folder(archive_folder)
github ENCODE-DCC / chip-seq-pipeline / dnanexus / call_chip_from_tas.py View on Github external
project = resolve_project(project_name)
            project = project.get_id()
            project_name += ":"
        else:
            project = default_project
            project_name = ""
            base_path = base_folder
        if not base_path.startswith('/'):
            base_path = '/' + base_path
        if not base_path.endswith('/'):
            base_path = base_path + '/'
        path = base_path + 'bams/' + exp_id + '/'
        logging.debug(
            "get_all_tas: find_data objects in project %s project_name %s path %s"
            % (project, project_name, path))
        for dxfile in dxpy.find_data_objects(classname='file', state='closed', folder=path, describe=True, recurse=True, project=project):
            desc = dxfile.get('describe')
            logging.debug(
                "get_all_tas: checking object for match: folder %s name %s"
                % (desc.get('folder'), desc.get('name')))
            if exp_id in desc.get('folder') and '/bams' in desc.get('folder') and desc.get('name').endswith(('tagAlign', 'tagAlign.gz')):
                possible_files.append(desc)
    logging.debug(
        "get_all_tas: exit with possible_files %s" % (possible_files))
    return possible_files
github dnanexus / dx-toolkit / src / python / dxpy / cli / exec_io.py View on Github external
'List and choose from available data in the DNAnexus Reference Genomes project',
                                'Select another project to list and choose available data',
                                'Select an output from a previously-run job (current project only)',
                                'Return to original prompt (specify an ID or path directly)'])
            except KeyboardInterrupt:
                opt_num = 4
            if opt_num == 0:
                query_project = dxpy.WORKSPACE_ID
            elif opt_num == 1:
                query_project = dxpy.find_one_project(name="Reference Genome Files", public=True, billed_to="org-dnanexus", level="VIEW")['id']
            elif opt_num == 2:
                project_generator = dxpy.find_projects(level='VIEW', describe=True, explicit_perms=True)
                print('\nProjects to choose from:')
                query_project = paginate_and_pick(project_generator, (lambda result: result['describe']['name']))['id']
            if opt_num in range(3):
                result_generator = dxpy.find_data_objects(classname=in_class,
                                                          typename=param_desc.get('type'),
                                                          describe=dict(fields=get_ls_l_desc_fields()),
                                                          project=query_project)
                print('\nAvailable data:')
                result_choice = paginate_and_pick(result_generator,
                                                  (lambda result: get_ls_l_desc(result['describe'])))
                if result_choice == 'none found':
                    print('No compatible data found')
                    continue
                elif result_choice == 'none picked':
                    continue
                else:
                    return [result_choice['project'] + ':' + result_choice['id']]
            elif opt_num == 3:
                # Select from previous jobs in current project
                result_generator = dxpy.find_jobs(project=dxpy.WORKSPACE_ID,
github counsyl / stor / stor / dx.py View on Github external
proj_id = self.canonical_project
        proj_name = self.virtual_project
        kwargs = {
            'project': proj_id,
            'name': pattern,
            'name_mode': 'glob',
            # the query performance is similar w/wo describe field,
            # hence no need to customize query based on canonicalize flag
            'describe': {'fields': {'name': True, 'folder': True}},
            'recurse': recurse,
            'classname': classname,
            'limit': limit,
            'folder': ('/' + (self.resource or '')) + (starts_with or '')
        }
        with _wrap_dx_calls():
            list_gen = dxpy.find_data_objects(**kwargs)
        for obj in list_gen:
            if canonicalize:
                yield DXCanonicalPath('dx://{}:/{}'.format(obj['project'], obj['id']))
            else:
                yield DXVirtualPath('{drive}{proj_name}:{folder}/{name}'.format(
                    drive=self.drive,
                    proj_name=proj_name,
                    folder=obj['describe']['folder'].rstrip('/'),
                    name=obj['describe']['name'])
                )
github dnanexus / dx-toolkit / src / python / dxpy / app_builder.py View on Github external
if override_folder:
        applet_spec['folder'] = override_folder
    if 'folder' not in applet_spec:
        applet_spec['folder'] = '/'

    if override_name:
        applet_spec['name'] = override_name

    if 'dxapi' not in applet_spec:
        applet_spec['dxapi'] = dxpy.API_VERSION

    archived_applet = None
    if check_name_collisions and not dry_run:
        destination_path = applet_spec['folder'] + ('/' if not applet_spec['folder'].endswith('/') else '') + applet_spec['name']
        logger.debug("Checking for existing applet at " + destination_path)
        for result in dxpy.find_data_objects(classname="applet", name=applet_spec["name"], folder=applet_spec['folder'], project=dest_project, recurse=False):
            if overwrite:
                logger.info("Deleting applet %s" % (result['id']))
                # TODO: test me
                dxpy.DXProject(dest_project).remove_objects([result['id']])
            elif archive:
                logger.debug("Archiving applet %s" % (result['id']))
                proj = dxpy.DXProject(dest_project)
                archive_folder = '/.Applet_archive'
                try:
                    proj.list_folder(archive_folder)
                except dxpy.DXAPIError:
                    proj.new_folder(archive_folder)

                proj.move(objects=[result['id']], destination=archive_folder)
                archived_applet = dxpy.DXApplet(result['id'], project=dest_project)
                now = datetime.datetime.fromtimestamp(archived_applet.created/1000).ctime()