How to use the dcicutils.ff_utils.expand_es_metadata function in dcicutils

To help you get started, we’ve selected a few dcicutils examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github 4dn-dcic / foursight / chalicelib / checks / helpers / wfr_utils.py View on Github external
def check_repli(res, my_auth, tag, check, start, lambda_limit, winsize=None):
    """Check run status for each set in res, and report missing runs and completed process"""
    for a_set in res:
        # get all related items
        all_items, all_uuids = ff_utils.expand_es_metadata([a_set['uuid']], my_auth,
                                                           store_frame='embedded',
                                                           add_pc_wfr=True,
                                                           ignore_field=['experiment_relation',
                                                                         'biosample_relation',
                                                                         'references',
                                                                         'reference_pubs'])
        all_wfrs = all_items.get('workflow_run_awsem', []) + all_items.get('workflow_run_sbg', [])
        now = datetime.utcnow()
        print(a_set['accession'], (now-start).seconds)
        if (now-start).seconds > lambda_limit:
            break
        # missing run
        missing_run = []
        # still running
        running = []
        # problematic cases
github 4dn-dcic / foursight / chalicelib / checks / helpers / wfr_utils.py View on Github external
def check_margi(res, my_auth, tag, check, start, lambda_limit, nore=False, nonorm=False):
    """Check run status for each set in res, and report missing runs and completed process"""
    for a_set in res:
        # get all related items
        all_items, all_uuids = ff_utils.expand_es_metadata([a_set['uuid']], my_auth,
                                                           store_frame='embedded',
                                                           add_pc_wfr=True,
                                                           ignore_field=['experiment_relation',
                                                                         'biosample_relation',
                                                                         'references',
                                                                         'reference_pubs'])
        all_wfrs = all_items.get('workflow_run_awsem', []) + all_items.get('workflow_run_sbg', [])
        now = datetime.utcnow()
        print(a_set['accession'], (now-start).seconds, len(all_uuids))
        if (now-start).seconds > lambda_limit:
            break
        # missing run
        missing_run = []
        # still running
        running = []
        # problematic cases
github 4dn-dcic / foursight / chalicelib / checks / wfr_cgap_checks.py View on Github external
all_samples = ff_utils.search_metadata(q, my_auth)
    print(len(all_samples))

    step1_name = 'workflow_bwa-mem_no_unzip-check'
    step2_name = 'workflow_add-readgroups-check'
    step3_name = 'workflow_merge-bam-check'
    step4_name = 'workflow_picard-MarkDuplicates-check'
    step5_name = 'workflow_sort-bam-check'
    step6_name = 'workflow_gatk-BaseRecalibrator'
    step7_name = 'workflow_gatk-ApplyBQSR-check'
    step8_name = 'workflow_gatk-HaplotypeCaller'
    # collect all wf for wf version check
    all_system_wfs = ff_utils.search_metadata('/search/?type=Workflow&status=released', my_auth)
    for a_sample in all_samples:
        all_items, all_uuids = ff_utils.expand_es_metadata([a_sample['uuid']], my_auth,
                                                           store_frame='embedded',
                                                           add_pc_wfr=True,
                                                           ignore_field=['previous_version',
                                                                         'experiment_relation',
                                                                         'biosample_relation',
                                                                         'references',
                                                                         'reference_pubs'])
        now = datetime.utcnow()
        print(a_sample['accession'], (now-start).seconds, len(all_uuids))
        if (now-start).seconds > lambda_limit:
            break
        # collect similar types of items under library
        all_wfrs = all_items.get('workflow_run_awsem', []) + all_items.get('workflow_run_sbg', [])
        file_items = [typ for typ in all_items if typ.startswith('file_') and typ != 'file_format']
        all_files = [i for typ in all_items for i in all_items[typ] if typ in file_items]
        all_qcs = [i for typ in all_items for i in all_items[typ] if typ.startswith('quality_metric')]
github 4dn-dcic / foursight / chalicelib / checks / helpers / wfr_utils.py View on Github external
def check_rna(res, my_auth, tag, check, start, lambda_limit):
    """Check run status for each set in res, and report missing runs and completed process"""
    for a_set in res:
        # get all related items
        all_items, all_uuids = ff_utils.expand_es_metadata([a_set['uuid']], my_auth,
                                                           store_frame='embedded',
                                                           add_pc_wfr=True,
                                                           ignore_field=['experiment_relation',
                                                                         'biosample_relation',
                                                                         'references',
                                                                         'reference_pubs'])
        all_wfrs = all_items.get('workflow_run_awsem', []) + all_items.get('workflow_run_sbg', [])
        now = datetime.utcnow()
        # print(a_set['accession'], (now-start).seconds)
        if (now-start).seconds > lambda_limit:
            break
        # missing run
        missing_run = []
        # still running
        running = []
        # problematic cases
github 4dn-dcic / foursight / chalicelib / checks / wfr_cgap_checks.py View on Github external
# check indexing queue
    env = connection.ff_env
    indexing_queue = ff_utils.stuff_in_queues(env, check_secondary=True)
    if indexing_queue:
        check.status = 'PASS'  # maybe use warn?
        check.brief_output = ['Waiting for indexing queue to clear']
        check.summary = 'Waiting for indexing queue to clear'
        check.full_output = {}
        return check

    q = '/search/?type=Sample&files.display_title=No+value&cram_files.display_title%21=No+value'
    all_samples = ff_utils.search_metadata(q, my_auth)
    print(len(all_samples))

    for a_sample in all_samples:
        all_items, all_uuids = ff_utils.expand_es_metadata([a_sample['uuid']], my_auth,
                                                           store_frame='embedded',
                                                           add_pc_wfr=True,
                                                           ignore_field=['experiment_relation',
                                                                         'biosample_relation',
                                                                         'references',
                                                                         'reference_pubs'])
        now = datetime.utcnow()
        print(a_sample['accession'], (now-start).seconds, len(all_uuids))
        if (now-start).seconds > lambda_limit:
            break
        # are all files uploaded ?
        all_uploaded = True
        cram_files = [i for i in all_items['file_processed'] if i['file_format']['file_format'] == 'CRAM']
        print(len(cram_files))
        for a_file in cram_files:
            if a_file['status'] in ['uploading', 'upload failed']:
github 4dn-dcic / foursight / chalicelib / checks / helpers / wfr_utils.py View on Github external
def check_hic(res, my_auth, tag, check, start, lambda_limit, nore=False, nonorm=False):
    """Check run status for each set in res, and report missing runs and completed process"""
    for a_set in res:
        # get all related items
        all_items, all_uuids = ff_utils.expand_es_metadata([a_set['uuid']], my_auth,
                                                           store_frame='embedded',
                                                           add_pc_wfr=True,
                                                           ignore_field=['experiment_relation',
                                                                         'biosample_relation',
                                                                         'references',
                                                                         'reference_pubs'])
        all_wfrs = all_items.get('workflow_run_awsem', []) + all_items.get('workflow_run_sbg', [])
        now = datetime.utcnow()
        print(a_set['accession'], (now-start).seconds)
        if (now-start).seconds > lambda_limit:
            break
        # missing run
        missing_run = []
        # still running
        running = []
        # problematic cases
github 4dn-dcic / foursight / chalicelib / checks / wfr_cgap_checks.py View on Github external
res = ff_utils.search_metadata(q, my_auth)
    # check if anything in scope
    if not res:
        return check
    # list step names
    step1_name = 'workflow_gatk-CombineGVCFs'
    step2_name = 'workflow_gatk-GenotypeGVCFs-check'
    # step3_name = 'workflow_gatk-VQSR-check'

    # collect all wf for wf version check
    all_system_wfs = ff_utils.search_metadata('/search/?type=Workflow&status=released', my_auth)

    # iterate over msa
    print(len(res))
    for an_msa in res:
        all_items, all_uuids = ff_utils.expand_es_metadata([an_msa['uuid']], my_auth,
                                                           store_frame='embedded',
                                                           add_pc_wfr=True,
                                                           ignore_field=['previous_version',
                                                                         'experiment_relation',
                                                                         'biosample_relation',
                                                                         'references',
                                                                         'reference_pubs'])
        now = datetime.utcnow()
        print(an_msa['@id'], (now-start).seconds, len(all_uuids))
        if (now-start).seconds > lambda_limit:
            break

        all_wfrs = all_items.get('workflow_run_awsem', []) + all_items.get('workflow_run_sbg', [])
        file_items = [typ for typ in all_items if typ.startswith('file_') and typ != 'file_format']
        all_files = [i for typ in all_items for i in all_items[typ] if typ in file_items]
        all_qcs = [i for typ in all_items for i in all_items[typ] if typ.startswith('quality_metric')]