How to use the zarr.open_group function in zarr

To help you get started, we’ve selected a few zarr examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github aimbrain / vqa-project / data / preprocess_image.py View on Github external
FIELDNAMES = ['image_id', 'image_w', 'image_h',
                  'num_boxes', 'boxes', 'features']

    if phase == 'trainval':
        infiles = [
            'raw/trainval_36/trainval_resnet101_faster_rcnn_genome_36.tsv',
        ]
    elif phase == 'test':
        infiles = [
            'raw/test2015_36/test2015_resnet101_faster_rcnn_genome_36.tsv',
        ]
    else:
        raise SystemExit('Unrecognised phase')

    # Read the tsv and append to files
    boxes = zarr.open_group(phase + '_boxes.zarr', mode='w')
    features = zarr.open_group(phase + '.zarr', mode='w')
    image_size = {}
    for infile in infiles:
        with open(infile, "r") as tsv_in_file:
            reader = csv.DictReader(
                tsv_in_file, delimiter='\t', fieldnames=FIELDNAMES)
            print('Converting ' + infile + ' to zarr...')
            for item in tqdm(reader):
                item['image_id'] = str(item['image_id'])
                item['image_h'] = int(item['image_h'])
                item['image_w'] = int(item['image_w'])
                item['num_boxes'] = int(item['num_boxes'])
                for field in ['boxes', 'features']:
                    encoded_str = base64.decodestring(
                        item[field].encode('utf-8'))
                    item[field] = np.frombuffer(encoded_str,
github aimbrain / vqa-project / data / preprocess_image.py View on Github external
'num_boxes', 'boxes', 'features']

    if phase == 'trainval':
        infiles = [
            'raw/trainval_36/trainval_resnet101_faster_rcnn_genome_36.tsv',
        ]
    elif phase == 'test':
        infiles = [
            'raw/test2015_36/test2015_resnet101_faster_rcnn_genome_36.tsv',
        ]
    else:
        raise SystemExit('Unrecognised phase')

    # Read the tsv and append to files
    boxes = zarr.open_group(phase + '_boxes.zarr', mode='w')
    features = zarr.open_group(phase + '.zarr', mode='w')
    image_size = {}
    for infile in infiles:
        with open(infile, "r") as tsv_in_file:
            reader = csv.DictReader(
                tsv_in_file, delimiter='\t', fieldnames=FIELDNAMES)
            print('Converting ' + infile + ' to zarr...')
            for item in tqdm(reader):
                item['image_id'] = str(item['image_id'])
                item['image_h'] = int(item['image_h'])
                item['image_w'] = int(item['image_w'])
                item['num_boxes'] = int(item['num_boxes'])
                for field in ['boxes', 'features']:
                    encoded_str = base64.decodestring(
                        item[field].encode('utf-8'))
                    item[field] = np.frombuffer(encoded_str,
                                                dtype=np.float32).reshape((item['num_boxes'], -1))
github veugene / data_tools / data_tools / io.py View on Github external
'dtype': dtype}
        if self.length is None:
            self.arr_kwargs['shape'] = (1,)+self.data_element_shape
        else:
            self.arr_kwargs['shape'] = (self.length,)+self.data_element_shape
        if kwargs is not None:
            self.arr_kwargs.update(kwargs)
    
        # Open the file for writing.
        self.group = None
        if append:
            self.write_mode = 'a'
        else:
            self.write_mode = 'w'
        try:
            self.group = zarr.open_group(filename, self.write_mode)
        except:
            print("Error: failed to open file %s" % filename)
            raise
        
        # Open an array interface (check if the array exists; if not, create it)
        if self.length is None:
            ds_args = (self.array_name, (1,)+self.data_element_shape)
        else:
            ds_args = (self.array_name, (self.length,)+self.data_element_shape)
        try:
            self.storage_array = self.group[self.array_name]
            self.storage_array_ptr = len(self.storage_array)
        except KeyError:
            self.storage_array = self.group.create_dataset(**self.arr_kwargs)
            self.storage_array_ptr = 0
github calico / basenji / bin / basenji_sad_multi.py View on Github external
def collect_zarr(file_name, out_dir, num_procs):
  final_zarr_file = '%s/%s' % (out_dir, file_name)

  # seed w/ job0
  job_zarr_file = '%s/job0/%s' % (out_dir, file_name)
  shutil.copytree(job_zarr_file, final_zarr_file)

  # open final
  final_zarr_open = zarr.open_group(final_zarr_file)

  for pi in range(1, num_procs):
    # open job
    job_zarr_file = '%s/job%d/%s' % (out_dir, pi, file_name)
    job_zarr_open = zarr.open_group(job_zarr_file, 'r')

    # append to final
    for key in final_zarr_open.keys():
      if key in ['percentiles', 'target_ids', 'target_labels']:
        # once is enough
        pass

      elif key[-4:] == '_pct':
        # average
        u_k1 = np.array(final_zarr_open[key])
        x_k = np.array(job_zarr_open[key])
        final_zarr_open[key] = u_k1 + (x_k - u_k1) / (pi+1)

      else:
        # append
        final_zarr_open[key].append(job_zarr_open[key])
github calico / basenji / bin / basenji_sadq_multi.py View on Github external
def collect_zarr(file_name, out_dir, num_procs):
  final_zarr_file = '%s/%s' % (out_dir, file_name)

  # seed w/ job0
  job_zarr_file = '%s/job0/%s' % (out_dir, file_name)
  shutil.copytree(job_zarr_file, final_zarr_file)

  # open final
  final_zarr_open = zarr.open_group(final_zarr_file)

  for pi in range(1, num_procs):
    # open job
    job_zarr_file = '%s/job%d/%s' % (out_dir, pi, file_name)
    job_zarr_open = zarr.open_group(job_zarr_file, 'r')

    # append to final
    for key in final_zarr_open.keys():
      if key in ['percentiles', 'target_ids', 'target_labels']:
        # once is enough
        pass

      elif key[-4:] == '_pct':
        # average
        u_k1 = np.array(final_zarr_open[key])
        x_k = np.array(job_zarr_open[key])
github calico / basenji / bin / basenji_sadf.py View on Github external
def initialize_output_zarr(out_dir, sad_stats, snps, target_ids, target_labels):
  """Initialize an output Zarr file for SAD stats."""

  num_targets = len(target_ids)
  num_snps = len(snps)

  sad_out = zarr.open_group('%s/sad.zarr' % out_dir, 'w')

  # write SNPs
  sad_out.create_dataset('snp', data=[snp.rsid for snp in snps], chunks=(32768,))

  # write targets
  sad_out.create_dataset('target_ids', data=target_ids, compressor=None)
  sad_out.create_dataset('target_labels', data=target_labels, compressor=None)

  # initialize SAD stats
  for sad_stat in sad_stats:
    sad_out.create_dataset(sad_stat,
        shape=(num_snps, num_targets),
        chunks=(128, num_targets),
        dtype='float16')

  return sad_out