How to use the dcicutils.ff_utils.get_es_metadata function in dcicutils

To help you get started, we’ve selected a few dcicutils examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github 4dn-dcic / foursight / chalicelib / checks / cgap_wrangler_checks.py View on Github external
id2item = {}
    stati2search = [s for s in STATUS_LEVEL.keys() if STATUS_LEVEL.get(s) >= 4]
    items2search = ['Case']
    item_search = 'search/?frame=object'
    for item in items2search:
        item_search += '&type={}'.format(item)
    for status in stati2search:
        item_search += '&status={}'.format(status)

    if id_list:
        itemids = re.split(',|\s+', id_list)
        itemids = [id for id in itemids if id]
    else:
        itemres = ff_utils.search_metadata(item_search, key=connection.ff_keys, page_limit=500)
        itemids = [item.get('uuid') for item in itemres]
    es_items = ff_utils.get_es_metadata(itemids, key=connection.ff_keys, chunk_size=200, is_generator=True)
    for es_item in es_items:
        label = es_item.get('object').get('display_title')
        desc = es_item.get('object').get('description')
        inst = es_item.get('embedded').get('institution').get('display_title')
        status = es_item.get('properties').get('status', 'in review')
        id2links[es_item.get('uuid')] = [li.get('uuid') for li in es_item.get('linked_uuids_embedded')]
        id2status[es_item.get('uuid')] = STATUS_LEVEL.get(status)
        id2item[es_item.get('uuid')] = {'label': label, 'status': status, 'institution': inst,
                                        'description': desc}

    mismatches = {}
    linked2get = {}
    for i, iid in enumerate(itemids):
        linkedids = id2links.get(iid)
        if not linkedids:  # item with no link
            continue
github 4dn-dcic / foursight / chalicelib / checks / wrangler_checks.py View on Github external
elif res.get('experiment_sets'):
            if len(res['experiment_sets']) != 1:  # this should not happen
                opf['problematic'].append({
                    '@id': res['@id'],
                    'experiment_sets': [es['uuid'] for es in res['experiment_sets']]})
                continue
            exp_or_set = res['experiment_sets'][0]
        else:  # this should not happen
            opf['problematic'].append({'@id': res['@id']})
            continue
        res['exp_set_uuid'] = exp_or_set['uuid']
        if res['exp_set_uuid'] not in exp_set_uuids:
            exp_set_uuids.append(res['exp_set_uuid'])

    # get lab of Exp/ExpSet
    result_exp_set = ff_utils.get_es_metadata(exp_set_uuids, sources=['uuid', 'properties.lab'], key=connection.ff_keys)
    uuid_2_lab = {}  # map file uuid to Exp/Set lab
    for item in result_exp_set:
        uuid_2_lab[item['uuid']] = item['properties']['lab']

    # evaluate contributing lab
    for res in result:
        if res['@id'] not in [pr['@id'] for pr in opf['problematic']]:
            contr_lab = []
            exp_set_lab = uuid_2_lab[res['exp_set_uuid']]
            if exp_set_lab == res['lab']['uuid']:
                continue
            elif res.get('contributing_labs'):
                contr_lab = [lab['uuid'] for lab in res['contributing_labs']]
                if exp_set_lab in contr_lab:
                    continue
            contr_lab.append(exp_set_lab)
github 4dn-dcic / foursight / chalicelib / checks / wrangler_checks.py View on Github external
elif res.get('experiment_sets'):
            if len(res['experiment_sets']) != 1:  # this should not happen
                opf['problematic'].append({
                    '@id': res['@id'],
                    'experiment_sets': [es['uuid'] for es in res['experiment_sets']]})
                continue
            exp_or_set = res['experiment_sets'][0]
        else:  # this should not happen
            opf['problematic'].append({'@id': res['@id']})
            continue
        res['exp_set_uuid'] = exp_or_set['uuid']
        if res['exp_set_uuid'] not in exp_set_uuids:
            exp_set_uuids.append(res['exp_set_uuid'])

    # get lab of Exp/ExpSet
    result_exp_set = ff_utils.get_es_metadata(exp_set_uuids, sources=['uuid', 'properties.lab'], key=connection.ff_keys)
    uuid_2_lab = {}  # map file uuid to Exp/Set lab
    for item in result_exp_set:
        uuid_2_lab[item['uuid']] = item['properties']['lab']

    # evaluate contributing lab
    for res in result:
        if res['@id'] not in [pr['@id'] for pr in opf['problematic']]:
            contr_lab = []
            exp_set_lab = uuid_2_lab[res['exp_set_uuid']]
            if exp_set_lab == res['lab']['uuid']:
                continue
            elif res.get('contributing_labs'):
                contr_lab = [lab['uuid'] for lab in res['contributing_labs']]
                if exp_set_lab in contr_lab:
                    continue
            contr_lab.append(exp_set_lab)
github 4dn-dcic / foursight / chalicelib / checks / audit_checks.py View on Github external
if not linkedids:  # item with no link
            continue
        istatus = id2status.get(iid)
        for lid in linkedids:
            lstatus = id2status.get(lid)
            if not lstatus:  # add to list to get
                linked2get.setdefault(lid, []).append(iid)
            elif lstatus < istatus:  # status mismatch for an item we've seen before
                ignore = id2item.get(iid).get('to_ignore')
                if ignore is not None and lid in ignore:
                    continue
                else:
                    mismatches.setdefault(iid, []).append(lid)

        if len(linked2get) > MIN_CHUNK_SIZE or i + 1 == len(itemids):  # only query es when we have more than a set number of ids (500)
            linked2chk = ff_utils.get_es_metadata(list(linked2get.keys()), key=connection.ff_keys,
                                                  chunk_size=200, is_generator=True)
            for litem in linked2chk:
                luuid = litem.get('uuid')
                listatus = litem.get('properties').get('status', 'in review by lab')
                llabel = litem.get('item_type')
                lstatus = STATUS_LEVEL.get(listatus)
                # add info to tracking dict
                id2status[luuid] = lstatus
                id2item[luuid] = {'label': llabel, 'status': listatus}
                for lfid in set(linked2get[luuid]):
                    # check to see if the linked item is something to ignore for that item
                    ignore = id2item[lfid].get('to_ignore')
                    if ignore is not None and luuid in ignore:
                        continue
                    elif lstatus < id2status[lfid]:  # status mismatch so add to report
                        mismatches.setdefault(lfid, []).append(luuid)
github 4dn-dcic / foursight / chalicelib / checks / audit_checks.py View on Github external
borgns = [gene2org.get(g.get('@id')) for g in biogenes if '@id' in g]
            linked_orgn_name = _get_orgname_from_atid_list(borgns, orgn2name)
        if not linked_orgn_name:  # didn't get it from genes - try genomic regions
            gen_regions = biofeat.get('genome_location')
            if gen_regions is not None:
                grorgns = []
                for genreg in gen_regions:
                    assembly_in_dt = False
                    gr_dt = genreg.get('display_title')
                    for ga, orgn in genome2orgn.items():
                        if ga in gr_dt:
                            grorgns.append(orgn)
                            assembly_in_dt = True
                            break
                    if not assembly_in_dt:
                        gr_res = ff_utils.get_es_metadata([genreg.get('uuid')],
                                                          key=connection.ff_keys, sources=['properties.genome_assembly'])
                        try:
                            gr_ass = gr_res[0].get('properties').get('genome_assembly')
                        except AttributeError:
                            gr_ass = None
                        if gr_ass is not None:
                            for ga, orgn in genome2orgn.items():
                                if ga == gr_ass:
                                    grorgns.append(orgn)
                linked_orgn_name = _get_orgname_from_atid_list(grorgns, orgn2name)
        if not linked_orgn_name:  # and finally try Description
            desc = biofeat.get('description')
            if desc is not None:
                for o in orgn2name.values():
                    if o in desc.lower():
                        linked_orgn_name = o
github 4dn-dcic / foursight / chalicelib / checks / system_checks.py View on Github external
#         return check

    if get_stage_info()['stage'] != 'prod':
        check.summary = check.description = 'This check only runs on Foursight prod'
        return check

    time_limit = 270  # 4.5 minutes
    t0 = time.time()
    check.full_output = {}  # purged items by item type
    search = '/search/?type=TrackingItem&tracking_type=download_tracking&status=deleted&field=uuid&limit=300'
    search_res = ff_utils.search_metadata(search, key=connection.ff_keys)
    search_uuids = [res['uuid'] for res in search_res]
    client = es_utils.create_es_client(connection.ff_es, True)
    # a bit convoluted, but we want the frame=raw, which does not include uuid
    # use get_es_metadata to handle this. Use it as a generator
    for to_purge in ff_utils.get_es_metadata(search_uuids, es_client=client, is_generator=True,
                                             key=connection.ff_keys):
        if round(time.time() - t0, 2) > time_limit:
            break
        purge_properties = to_purge['properties']
        purge_properties['uuid'] = to_purge['uuid']  # add uuid to frame=raw
        try:
            purge_res = ff_utils.purge_metadata(to_purge['uuid'], key=connection.ff_keys)
        except Exception as exc:
            purge_status = 'error'
            purge_detail = str(exc)
        else:
            purge_status = purge_res['status']
            purge_detail = purge_properties if purge_status == 'success' else purge_res
        purge_record = {'uuid': to_purge['uuid'], 'result': purge_detail}
        if to_purge['item_type'] not in check.full_output:
            check.full_output[to_purge['item_type']] = {}
github 4dn-dcic / foursight / chalicelib / checks / audit_checks.py View on Github external
opf_exp_results = ff_utils.search_metadata(opf_exp, key=connection.ff_keys)
    results = opf_set_results + opf_exp_results
    # extract file uuids
    files = []
    for result in results:
        if result.get('other_processed_files'):
            for case in result['other_processed_files']:
                files.extend([i['uuid'] for i in case['files']])
                if case.get('higlass_view_config'):
                    files.append(case['higlass_view_config'].get('uuid'))
        if result.get('experiments_in_set'):
            for exp in result['experiments_in_set']:
                for case in exp['other_processed_files']:
                    files.extend([i['uuid'] for i in case['files']])
    # get metadata for files, to collect status
    resp = ff_utils.get_es_metadata(list(set(files)),
                                    sources=['links.quality_metric', 'object.status', 'uuid'],
                                    key=connection.ff_keys)
    opf_status_dict = {item['uuid']: item['object']['status'] for item in resp if item['uuid'] in files}
    opf_linked_dict = {
        item['uuid']: item.get('links', {}).get('quality_metric', []) for item in resp if item['uuid'] in files
    }
    quality_metrics = [uuid for item in resp for uuid in item.get('links', {}).get('quality_metric', [])]
    qm_resp = ff_utils.get_es_metadata(list(set(quality_metrics)),
                                       sources=['uuid', 'object.status'],
                                       key=connection.ff_keys)
    opf_other_dict = {item['uuid']: item['object']['status'] for item in qm_resp if item not in files}
    check.full_output = {}
    for result in results:
        hg_dict = {item['title']: item.get('higlass_view_config', {}).get('uuid')
                   for item in result.get('other_processed_files', [])}
        titles = [item['title'] for item in result.get('other_processed_files', [])]
github 4dn-dcic / foursight / chalicelib / checks / cgap_wrangler_checks.py View on Github external
mismatches = {}
    linked2get = {}
    for i, iid in enumerate(itemids):
        linkedids = id2links.get(iid)
        if not linkedids:  # item with no link
            continue
        istatus = id2status.get(iid)
        for lid in linkedids:
            lstatus = id2status.get(lid)
            if not lstatus:  # add to list to get
                linked2get.setdefault(lid, []).append(iid)
            elif lstatus < istatus:  # status mismatch for an item we've seen before
                mismatches.setdefault(iid, []).append(lid)

        if len(linked2get) > MIN_CHUNK_SIZE or i + 1 == len(itemids):  # only query es when we have more than a set number of ids (500)
            linked2chk = ff_utils.get_es_metadata(list(linked2get.keys()), key=connection.ff_keys,
                                                  chunk_size=200, is_generator=True)
            for litem in linked2chk:
                luuid = litem.get('uuid')
                listatus = litem.get('properties').get('status', 'in review')
                llabel = litem.get('item_type')
                lstatus = STATUS_LEVEL.get(listatus)
                # add info to tracking dict
                id2status[luuid] = lstatus
                id2item[luuid] = {'label': llabel, 'status': listatus}
                for lfid in set(linked2get[luuid]):
                    if lstatus < id2status[lfid]:  # status mismatch so add to report
                        mismatches.setdefault(lfid, []).append(luuid)
            linked2get = {}  # reset the linked id dict
    if mismatches:
        brief_output = {}
        full_output = {}