How to use the torchmeta.datasets.tcga.TCGA function in torchmeta

To help you get started, we’ve selected a few torchmeta examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tristandeleu / pytorch-meta / torchmeta / datasets / tcga.py View on Github external
def get_task_variables():
    return get_asset(TCGA.folder, 'task_variables.json', dtype='json')
github tristandeleu / pytorch-meta / torchmeta / datasets / tcga.py View on Github external
def __init__(self, root, meta_train=False, meta_val=False, meta_test=False, meta_split=None,
                 min_samples_per_class=5, transform=None, target_transform=None,
                 dataset_transform=None, download=False, chunksize=100, preload=True):
        super(TCGA, self).__init__(meta_train, meta_val, meta_test, meta_split,
            target_transform=target_transform, dataset_transform=dataset_transform)
        self.root = os.path.join(os.path.expanduser(root), self.folder)
        self.min_samples_per_class = min_samples_per_class
        self.transform = transform

        self._all_sample_ids = None
        self._gene_ids = None
        self._tasks = None

        if download:
            self.download(chunksize)

        self.preloaded = False
        self.gene_expression_data = None
        self.gene_expression_file = None
        if preload:
github tristandeleu / pytorch-meta / torchmeta / datasets / tcga.py View on Github external
def from_id(cls, root, task_id, transform=None, target_transform=None):
        import pandas as pd
        root = os.path.join(os.path.expanduser(root), TCGA.folder)
        gene_filepath = os.path.join(root, TCGA.gene_expression_filename)
        if not os.path.isfile(gene_filepath):
            raise IOError()

        label, cancer = task_id

        processed_folder = os.path.join(root, 'clinicalMatrices', 'processed')
        filename = '{0}.tsv'.format(TCGA.clinical_matrix_filename.format(cancer))
        filepath = os.path.join(processed_folder, filename)
        if not os.path.isfile(filepath):
            raise IOError()

        dataframe = pd.read_csv(filepath, sep='\t', index_col=0, header=0)
        labels = dataframe[label].dropna().astype('category')

        with h5py.File(gene_filepath, 'r') as f:
            data = f['expression_data'][labels.index]

        return cls(task_id, data, labels.cat.codes.tolist(),
                   labels.cat.categories.tolist(), transform=transform,
                   target_transform=target_transform)
github tristandeleu / pytorch-meta / torchmeta / datasets / tcga.py View on Github external
def from_id(cls, root, task_id, transform=None, target_transform=None):
        import pandas as pd
        root = os.path.join(os.path.expanduser(root), TCGA.folder)
        gene_filepath = os.path.join(root, TCGA.gene_expression_filename)
        if not os.path.isfile(gene_filepath):
            raise IOError()

        label, cancer = task_id

        processed_folder = os.path.join(root, 'clinicalMatrices', 'processed')
        filename = '{0}.tsv'.format(TCGA.clinical_matrix_filename.format(cancer))
        filepath = os.path.join(processed_folder, filename)
        if not os.path.isfile(filepath):
            raise IOError()

        dataframe = pd.read_csv(filepath, sep='\t', index_col=0, header=0)
        labels = dataframe[label].dropna().astype('category')

        with h5py.File(gene_filepath, 'r') as f:
            data = f['expression_data'][labels.index]