How to use the gsutil.get_local_file_mtime_in_utc function in gsutil

To help you get started, we’ve selected a few gsutil examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mitodl / edx2bigquery / edx2bigquery / make_combined_person_course.py View on Github external
ofnset = []
    cnt = 0
    for course_id in course_id_set:
        gb = gsutil.gs_path_from_course_id(course_id, use_dataset_latest=use_dataset_latest)
        ofn = outdir / ('person_course_%s.csv.gz' % (course_id.replace('/', '__')))
        ofnset.append(ofn)

        if (nskip>0) and ofn.exists():
            print "%s already exists, not downloading" % ofn
            sys.stdout.flush()
            continue

        if ofn.exists():
            fnset = gsutil.get_gs_file_list(gb)
            local_dt = gsutil.get_local_file_mtime_in_utc(ofn)
            fnb = 'person_course.csv.gz'
            if not fnb in fnset:
                print "%s/%s missing!  skipping %s" % (gb, fnb, course_id)
                continue
            if (fnb in fnset) and (local_dt >= fnset[fnb]['date']):
                print "%s already exists with date %s (gs file date %s), not re-downloading" % (ofn, local_dt, fnset[fnb]['date'])
                sys.stdout.flush()
                continue
            else:
                print "%s already exists but has date %s (gs file date %s), so re-downloading" % (ofn, local_dt, fnset[fnb]['date'])
                sys.stdout.flush()

        cmd = 'gsutil cp %s/person_course.csv.gz %s' % (gb, ofn)
        print "Retrieving %s via %s" % (course_id, cmd)
        sys.stdout.flush()
        os.system(cmd)
github mitodl / edx2bigquery / edx2bigquery / make_problem_analysis.py View on Github external
SCHEMA_FILE = '%s/schemas/schema_problem_analysis.json' % mypath
    the_schema = json.loads(open(SCHEMA_FILE).read())['problem_analysis']
    the_dict_schema = schema2dict(the_schema)

    smfn = lfp / 'studentmodule.csv'
    smfp = openfile(smfn)
    if smfp is None:
        print "--> [analyze_problems] oops, missing %s, cannot process course %s" % (smfn, course_id)
        return

    print "[analyze_problems] processing %s for course %s to create problem_analysis table" % (smfn, course_id)
    sys.stdout.flush()

    if smfp.name.endswith('.gz'):
        smfn += '.gz'
    sm_moddate = gsutil.get_local_file_mtime_in_utc(smfn, make_tz_unaware=True)

    dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=use_dataset_latest)
    table = 'problem_analysis'

    # if table already exists, then check its modification time to see if it's older
    if not force_recompute:
        try:
            table_moddate = bqutil.get_bq_table_last_modified_datetime(dataset, table)
        except Exception as err:
            if "Not Found" in str(err):
                table_moddate = None
            else:
                raise
        
        if table_moddate is not None:
            try:
github mitodl / edx2bigquery / edx2bigquery / main.py View on Github external
print "Retrieving %s as %s" % (table, ofn)
        if args.add_courseid and course_id_by_table:
            optargs['extra_fields'] = {'course_id': course_id_by_table[table]}
            print "--> Adding %s for %s to each row" % (course_id_by_table[table], 'course_id')
        sys.stdout.flush()

        if args.just_get_schema:
            tinfo = bqutil.get_bq_table_info(dataset, tablename, **optargs)
            ofn = '%s__%s__schema.json' % (dataset, tablename)
            print "Saving schema file as %s" % ofn
            open(ofn, 'w').write(json.dumps(tinfo['schema']['fields'], indent=4))
            continue

        if args.only_if_newer and os.path.exists(ofn):
            mod_dt = bqutil.get_bq_table_last_modified_datetime(dataset, tablename)
            of_dt = gsutil.get_local_file_mtime_in_utc(ofn,make_tz_unaware=True)
            if (mod_dt < of_dt):
                print "--> only_if_newer specified, and table %s mt=%s, file mt=%s, so skipping" % (tablename,
                                                                                                    mod_dt,
                                                                                                    of_dt)
                continue

        try:
            bqdat = bqutil.get_table_data(dataset, tablename,
                                          convert_timestamps=True,
                                          return_csv=(out_fmt=='csv'), **optargs)
        except Exception as err:
            if args.skip_missing and 'HttpError 404' in str(err):
                print "--> missing table [%s.%s] Skipping..." % (dataset, tablename)
                sys.stdout.flush()
                continue
            raise