How to use the toil.lib.compatibility.compat_bytes function in toil

To help you get started, we’ve selected a few toil examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github DataBiosphere / toil / src / toil / jobStores / aws / utils.py View on Github external
def uploadFromPath(localFilePath, partSize, bucket, fileID, headers):
    """
    Uploads a file to s3, using multipart uploading if applicable

    :param str localFilePath: Path of the file to upload to s3
    :param int partSize: max size of each part in the multipart upload, in bytes
    :param boto.s3.Bucket bucket: the s3 bucket to upload to
    :param str fileID: the name of the file to upload to
    :param headers: http headers to use when uploading - generally used for encryption purposes
    :return: version of the newly uploaded file
    """
    file_size, file_time = fileSizeAndTime(localFilePath)
    if file_size <= partSize:
        key = bucket.new_key(key_name=compat_bytes(fileID))
        key.name = fileID
        for attempt in retry_s3():
            with attempt:
                key.set_contents_from_filename(localFilePath, headers=headers)
        version = key.version_id
    else:
        with open(localFilePath, 'rb') as f:
            version = chunkedFileUpload(f, bucket, fileID, file_size, headers, partSize)
    for attempt in retry_s3():
        with attempt:
            key = bucket.get_key(compat_bytes(fileID),
                                 headers=headers,
                                 version_id=version)
    assert key.size == file_size
    # Make reasonably sure that the file wasn't touched during the upload
    assert fileSizeAndTime(localFilePath) == (file_size, file_time)
github DataBiosphere / toil / src / toil / jobStores / aws / jobStore.py View on Github external
assert self.outer.filesDomain.put_attributes(item_name=compat_bytes(self.fileID),
                                                                     attributes=attributes,
                                                                     expected_value=expected)
                # clean up the old version of the file if necessary and safe
                if self.previousVersion and (self.previousVersion != self.version):
                    for attempt in retry_s3():
                        with attempt:
                            self.outer.filesBucket.delete_key(compat_bytes(self.fileID),
                                                              version_id=self.previousVersion)
                self._previousVersion = self._version
                if numNewContentChunks < self._numContentChunks:
                    residualChunks = range(numNewContentChunks, self._numContentChunks)
                    attributes = [self._chunkName(i) for i in residualChunks]
                    for attempt in retry_sdb():
                        with attempt:
                            self.outer.filesDomain.delete_attributes(compat_bytes(self.fileID),
                                                                     attributes=attributes)
                self._numContentChunks = numNewContentChunks
            except SDBResponseError as e:
                if e.error_code == 'ConditionalCheckFailed':
                    raise ConcurrentFileModificationException(self.fileID)
                else:
                    raise
github DataBiosphere / toil / src / toil / jobStores / googleJobStore.py View on Github external
def delete(self, jobStoreID):
        self._delete(jobStoreID)

        # best effort delete associated files
        for blob in self.bucket.list_blobs(prefix=compat_bytes(jobStoreID)):
            self._delete(blob.name)
github DataBiosphere / toil / src / toil / jobStores / aws / utils.py View on Github external
def chunkedFileUpload(readable, bucket, fileID, file_size, headers=None, partSize=50 << 20):
    for attempt in retry_s3():
        with attempt:
            upload = bucket.initiate_multipart_upload(
                key_name=compat_bytes(fileID),
                headers=headers)
    try:
        start = 0
        part_num = itertools.count()
        while start < file_size:
            end = min(start + partSize, file_size)
            assert readable.tell() == start
            for attempt in retry_s3():
                with attempt:
                    upload.upload_part_from_file(fp=readable,
                                                 part_num=next(part_num) + 1,
                                                 size=end - start,
                                                 headers=headers)
            start = end
        assert readable.tell() == file_size == start
    except:
github DataBiosphere / toil / src / toil / jobStores / googleJobStore.py View on Github external
def fileExists(self, jobStoreFileID):
        return self.bucket.blob(compat_bytes(jobStoreFileID), encryption_key=self.sseKey).exists()
github DataBiosphere / toil / src / toil / jobStores / aws / jobStore.py View on Github external
def exists(self, jobStoreID):
        for attempt in retry_sdb():
            with attempt:
                return bool(self.jobsDomain.get_attributes(
                    item_name=compat_bytes(jobStoreID),
                    attribute_name=[SDBHelper.presenceIndicator()],
                    consistent_read=True))
github DataBiosphere / toil / src / toil / jobStores / googleJobStore.py View on Github external
def _delete(self, jobStoreFileID):
        if self.fileExists(jobStoreFileID):
            self.bucket.get_blob(compat_bytes(jobStoreFileID)).delete()
        # remember, this is supposed to be idempotent, so we don't do anything
github DataBiosphere / toil / src / toil / jobStores / aws / jobStore.py View on Github external
log.debug("Deleting %d file(s) associated with job %s", len(items), jobStoreID)
            n = self.itemsPerBatchDelete
            batches = [items[i:i + n] for i in range(0, len(items), n)]
            for batch in batches:
                itemsDict = {item.name: None for item in batch}
                for attempt in retry_sdb():
                    with attempt:
                        self.filesDomain.batch_delete_attributes(itemsDict)
            for item in items:
                version = item.get('version')
                for attempt in retry_s3():
                    with attempt:
                        if version:
                            self.filesBucket.delete_key(key_name=compat_bytes(item.name), version_id=version)
                        else:
                            self.filesBucket.delete_key(key_name=compat_bytes(item.name))
github DataBiosphere / toil / src / toil / jobStores / googleJobStore.py View on Github external
def _writeFile(self, jobStoreID, fileObj, update=False, encrypt=True):
        blob = self.bucket.blob(compat_bytes(jobStoreID), encryption_key=self.sseKey if encrypt else None)
        if not update:
            # TODO: should probably raise a special exception and be added to all jobStores
            assert not blob.exists()
        else:
            if not blob.exists():
                raise NoSuchFileException(jobStoreID)
        blob.upload_from_file(fileObj)