How to use gsutil - 10 common examples

To help you get started, we’ve selected a few gsutil examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github web-platform-tests / wpt.fyi / results-processor / processor.py View on Github external
def upload_split(self):
        """Uploads the individual results recursively to GCS."""
        self.report.populate_upload_directory(output_dir=self._upload_dir)

        # 1. Copy [ID]-summary.json.gz to gs://wptd/[SHA]/[ID]-summary.json.gz.
        gsutil.copy(
            os.path.join(self._upload_dir, self.report.sha_summary_path),
            self.results_gs_url,
            gzipped=True)

        # 2. Copy the individual results recursively if there is any (i.e. if
        # the report is not empty).
        results_dir = os.path.join(
            self._upload_dir, self.report.sha_product_path)
        if os.path.exists(results_dir):
            # gs://wptd/[SHA] is guaranteed to exist after 1, so copying foo to
            # gs://wptd/[SHA] will create gs://wptd/[SHA]/foo according to
            # `gsutil cp --help`.
            gsutil.copy(
                results_dir,
                self.results_gs_url[:self.results_gs_url.rfind('/')],
                gzipped=True)
github web-platform-tests / wpt.fyi / results-processor / processor.py View on Github external
# 1. Copy [ID]-summary.json.gz to gs://wptd/[SHA]/[ID]-summary.json.gz.
        gsutil.copy(
            os.path.join(self._upload_dir, self.report.sha_summary_path),
            self.results_gs_url,
            gzipped=True)

        # 2. Copy the individual results recursively if there is any (i.e. if
        # the report is not empty).
        results_dir = os.path.join(
            self._upload_dir, self.report.sha_product_path)
        if os.path.exists(results_dir):
            # gs://wptd/[SHA] is guaranteed to exist after 1, so copying foo to
            # gs://wptd/[SHA] will create gs://wptd/[SHA]/foo according to
            # `gsutil cp --help`.
            gsutil.copy(
                results_dir,
                self.results_gs_url[:self.results_gs_url.rfind('/')],
                gzipped=True)
github web-platform-tests / wpt.fyi / results-processor / processor.py View on Github external
def _download_gcs(self, gcs):
        assert gcs.startswith('gs://')
        ext = self.known_extension(gcs)
        fd, path = tempfile.mkstemp(suffix=ext, dir=self._temp_dir)
        os.close(fd)
        # gsutil will log itself.
        gsutil.copy(gcs, path)
        return path
github web-platform-tests / wpt.fyi / results-processor / processor.py View on Github external
def upload_raw(self):
        """Uploads the merged raw JSON report to GCS."""
        with tempfile.NamedTemporaryFile(
                suffix='.json.gz', dir=self._temp_dir) as temp:
            self.report.serialize_gzip(temp.name)
            gsutil.copy(temp.name, self.raw_results_gs_url, gzipped=True)
github web-platform-tests / wpt.fyi / results-processor / processor.py View on Github external
def results_url(self):
        return gsutil.gs_to_public_url(self.results_gs_url)
github web-platform-tests / wpt.fyi / results-processor / processor.py View on Github external
def raw_results_url(self):
        return gsutil.gs_to_public_url(self.raw_results_gs_url)
github GoogleCloudPlatform / gsutil / test / gsutil_measure_imports.py View on Github external
def initialize():
  """Initializes gsutil."""
  sys.path.insert(0, os.path.abspath(os.path.join(sys.path[0], '..')))
  import gsutil  # pylint: disable=g-import-not-at-top
  atexit.register(print_sorted_initialization_times)
  gsutil.MEASURING_TIME_ACTIVE = True
  gsutil.RunMain()
github GoogleCloudPlatform / gsutil / test / gsutil_measure_imports.py View on Github external
def initialize():
  """Initializes gsutil."""
  sys.path.insert(0, os.path.abspath(os.path.join(sys.path[0], '..')))
  import gsutil  # pylint: disable=g-import-not-at-top
  atexit.register(print_sorted_initialization_times)
  gsutil.MEASURING_TIME_ACTIVE = True
  gsutil.RunMain()
github mitodl / edx2bigquery / edx2bigquery / make_research_data_tables.py View on Github external
if not course_id_set:
            print "ERROR! Must specify list of course_id's for report.  Aborting."
            return

        org = course_id_set[0].split('/',1)[0]	# extract org from first course_id
        self.org = org

        self.output_project_id = output_project_id

        crname = ('course_report_%s' % org)
        if use_dataset_latest:
            crname = 'course_report_latest'
        self.dataset = output_dataset_id or crname

        self.gsbucket = gsutil.gs_path_from_course_id(crname, gsbucket=output_bucket)
        self.course_id_set = course_id_set
	course_id = course_id_set

        #course_datasets = [ bqutil.course_id2dataset(x, use_dataset_latest=use_dataset_latest) for x in course_id_set]
        #course_datasets_dict = { x:bqutil.course_id2dataset(x, use_dataset_latest=use_dataset_latest) for x in course_id_set}
	course_dataset = bqutil.course_id2dataset( course_id, use_dataset_latest=use_dataset_latest )

	self.rdp_matrix = collections.OrderedDict()
        #for course_id in course_datasets_dict.keys():

	print "[researchData] Processing data for course %s" % ( course_id )
	sys.stdout.flush()
	for rdp in RESEARCH_DATA_PRODUCTS.keys():
		try:
			table = bqutil.get_bq_table_info( course_dataset, rdp )
			#table = bqutil.get_bq_table_info( course_id, rdp )
github mitodl / edx2bigquery / edx2bigquery / make_course_report_tables.py View on Github external
if not course_id_set:
            print "ERROR! Must specify list of course_id's for report.  Aborting."
            return

        org = course_id_set[0].split('/',1)[0]	# extract org from first course_id
        self.org = org

        self.output_project_id = output_project_id

        crname = ('course_report_%s' % org)
        if use_dataset_latest:
            crname = 'course_report_latest'
        self.dataset = output_dataset_id or crname

        self.gsbucket = gsutil.gs_path_from_course_id(crname, gsbucket=output_bucket)
        self.course_id_set = course_id_set

        course_datasets = [ bqutil.course_id2dataset(x, use_dataset_latest=use_dataset_latest) for x in course_id_set]

        # check to see which datasets have person_course tables
        datasets_with_pc = []
        self.all_pc_tables = OrderedDict()
        self.all_pcday_ip_counts_tables = OrderedDict()
        self.all_pcday_trlang_counts_tables = OrderedDict()
        self.all_uic_tables = OrderedDict()
        self.all_ca_tables = OrderedDict()
        self.all_va_tables = OrderedDict()
        self.all_tott_tables = OrderedDict()
        for cd in course_datasets:
            try:
                table = bqutil.get_bq_table_info(cd, 'person_course')