How to use the dcicutils.ff_utils function in dcicutils

To help you get started, we’ve selected a few dcicutils examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github 4dn-dcic / foursight / chalicelib / checks / wrangler_checks.py View on Github external
check.status = "FAIL"
                check.description = "Problem retrieving metadata for input data - " + b_meta
                return check
            fulloutput['biorxivs2check'].setdefault(bid, {}).update({'new_pub_ids': [pid]})
            if b_meta.get('url'):
                fulloutput['biorxivs2check'][bid].setdefault('blink', b_meta.get('url'))
            fulloutput['biorxivs2check'][bid].setdefault('data2transfer', {}).update(get_transfer_fields(b_meta))
        fndcnt = len(b2p)
    search = 'search/?'
    if kwargs.get('uuid_list'):
        suffix = '&'.join(['uuid={}'.format(u) for u in [uid.strip() for uid in kwargs.get('uuid_list').split(',')]])
    else:
        suffix = 'journal=bioRxiv&type=Publication&status=current&limit=all'
    # run the check
    search_query = search + suffix
    biorxivs = ff_utils.search_metadata(search_query, key=connection.ff_keys)
    if not biorxivs and not fndcnt:
        check.status = "FAIL"
        check.description = "Could not retrieve biorxiv records from fourfront"
        return check

    # here is where we get any previous or current false positives
    last_result = check.get_primary_result()
    # if last one was fail, find an earlier check with non-FAIL status
    it = 0
    while last_result['status'] == 'ERROR' or not last_result['kwargs'].get('primary'):
        it += 1
        # this is a daily check, so look for checks with 12h iteration
        hours = it * 12
        last_result = check.get_closest_result(diff_hours=hours)
        # if this is going forever kill it
        if hours > 100:
github 4dn-dcic / foursight / chalicelib / checks / helpers / google_utils.py View on Github external
def get_latest_tracking_item_date(self, increment="daily"):
            '''
            Queries '/search/?type=TrackingItem&sort=-google_analytics.for_date&&google_analytics.date_increment=...'
            to get date of last TrackingItem for increment in database.

            TODO: Accept yearly once we want to collect & viz it.
            '''
            if increment not in ('daily', 'monthly'):
                raise IndexError("increment parameter must be one of 'daily', 'monthly'")

            search_results = ff_utils.search_metadata(
                '/search/?type=TrackingItem&tracking_type=google_analytics&sort=-google_analytics.for_date&limit=1&google_analytics.date_increment=' + increment,
                key=dict(self.owner.access_key, server=self.owner.server)
            )
            if len(search_results) == 0:
                return None

            iso_date = search_results[0]['google_analytics']['for_date']

            # TODO: Use date.fromisoformat() once we're on Python 3.7
            year, month, day = iso_date.split('-', 2) # In python, months are indexed from 1 <= month <= 12, not 0 <= month <= 11 like in JS.
            return date(int(year), int(month), int(day))
github 4dn-dcic / foursight / chalicelib / app_utils.py View on Github external
# this looks bad but isn't because request authentication will
    # still fail if local keys are not configured
    src_ip = request_dict.get('context', {}).get('identity', {}).get('sourceIp', '')
    if src_ip == '127.0.0.1':
        return True
    token = get_jwt(request_dict)
    auth0_client = os.environ.get('CLIENT_ID', None)
    auth0_secret = os.environ.get('CLIENT_SECRET', None)
    if auth0_client and auth0_secret and token:
        try:
            if env is None:
                return False  # we have no env to check auth
            # leeway accounts for clock drift between us and auth0
            payload = jwt.decode(token, b64decode(auth0_secret, '-_'), audience=auth0_client, leeway=30)
            for env_info in init_environments(env).values():
                user_res = ff_utils.get_metadata('users/' + payload.get('email').lower(),
                                            ff_env=env_info['ff_env'], add_on='frame=object')
                if not ('admin' in user_res['groups'] and payload.get('email_verified')):
                    # if unauthorized for one, unauthorized for all
                    return False
            return True
        except:
            pass
    return False
github 4dn-dcic / Submit4DN / wranglertools / import_data.py View on Github external
def _verify_and_return_item(item, connection):
    try:
        res = ff_utils.get_metadata(item, key=connection.key, add_on='frame=object')
        assert '@id' in res
    except (AssertionError, TypeError):
        return None
    return res
github 4dn-dcic / foursight / chalicelib / checks / wrangler_checks.py View on Github external
def patch_wfr_and_log(wfr, full_output):
        uuid = wfr['uuid']
        patch_json = {'uuid': uuid, 'status': 'deleted'}
        # no need to patch again
        if uuid in full_output['success']:
            return
        try:
            ff_utils.patch_metadata(patch_json, uuid, key=connection.ff_keys)
        except Exception as exc:
            # log something about str(exc)
            full_output['failure'].append('%s. %s' % (uuid, str(exc)))
        else:
            # successful patch
            full_output['success'].append(uuid)
github 4dn-dcic / foursight / chalicelib / checks / qc_checks.py View on Github external
check = CheckResult(connection, 'identify_files_without_qc_summary_bb')
    # must set this to be the function name of the action
    check.action = 'patch_quality_metric_summary_bb'
    default_filetype = 'FileProcessed'  # skip fastq
    default_stati = 'released%20to%20project&status=released&status=uploaded&status=pre-release'
    filetype = kwargs.get('file_type') or default_filetype
    stati = 'status=' + (kwargs.get('status') or default_stati)
    search_query = 'search/?type={}&{}&frame=object'.format(filetype, stati)
    search_query += '&file_format.file_format=' + fileformat
    addon = kwargs.get('search_add_on')
    if addon is not None:
        if not addon.startswith('&'):
            addon = '&' + addon
        search_query += addon
    problem_files = []
    file_hits = ff_utils.search_metadata(search_query, key=connection.ff_keys, page_limit=200)
    for hit in file_hits:
        if round(time.time() - t0, 2) > time_limit:
            break
        if hit.get('quality_metric') and not hit.get('quality_metric_summary', ''):
            hit_dict = {
                'accession': hit.get('accession'),
                'uuid': hit.get('uuid'),
                '@type': hit.get('@type'),
                'upload_key': hit.get('upload_key'),
                'file_format': hit.get('file_format'),
                'quality_metric': hit.get('quality_metric')
            }
            problem_files.append(hit_dict)
    check.summary = '{} files with no quality metric summary'.format(len(problem_files))
    check.full_output = problem_files
    if problem_files:
github 4dn-dcic / foursight / chalicelib / checks / header_checks.py View on Github external
def find_items_for_header_processing(connection, check, header, add_search=None,
                                     remove_search=None, append=True):
    """
    (add_search) and remove them from others (remove_search).
    Args are:
    - connection (FS connection)
    - check (required; check object initialized by CheckResult)
    - headers @id (required)
    - add_search search query
    - remove_search search query
    Meant to be used for CHECKS
    """
    # sets the full_output of the check!
    check.full_output = {'static_section': header, 'to_add': {}, 'to_remove': {}}
    # this GET will fail if the static header does not exist
    header_res = ff_utils.get_metadata(header, key=connection.ff_keys)
    # add entries keyed by item uuid with value of the static headers
    if add_search:
        search_res_add = ff_utils.search_metadata(add_search, key=connection.ff_keys)
        for search_res in search_res_add:
            curr_headers = search_res.get('static_headers', [])
            # handle case where frame != object
            if curr_headers and isinstance(curr_headers[0], dict):
                curr_headers = [obj['@id'] for obj in curr_headers]
            if header not in curr_headers:
                curr_headers = curr_headers + [header] if append else [header] + curr_headers
                check.full_output['to_add'][search_res['@id']] = curr_headers

    if remove_search:
        search_res_remove = ff_utils.search_metadata(remove_search,
                                                     key=connection.ff_keys)
        for search_res in search_res_remove:
github 4dn-dcic / foursight / chalicelib / checks / badge_checks.py View on Github external
def compare_badges(obj_ids, item_type, badge, ff_keys):
    '''
    Compares items that should have a given badge to items that do have the given badge.
    Used for badges that utilize a single message choice.
    Input (first argument) should be a list of item @ids.
    '''
    search_url = 'search/?type={}&badges.badge.@id=/badges/{}/'.format(item_type, badge)
    has_badge = ff_utils.search_metadata(search_url + '&frame=object', key=ff_keys)
    needs_badge = []
    badge_ok = []
    remove_badge = {}
    for item in has_badge:
        if item['@id'] in obj_ids:
            # handle differences in badge messages
            badge_ok.append(item['@id'])
        else:
            keep = [badge_dict for badge_dict in item['badges'] if badge not in badge_dict['badge']]
            remove_badge[item['@id']] = keep
    for other_item in obj_ids:
        if other_item not in badge_ok:
            needs_badge.append(other_item)
    return needs_badge, remove_badge, badge_ok