How to use the kipoi.data.kipoi_dataloader function in kipoi

To help you get started, we’ve selected a few kipoi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github kipoi / kipoiseq / kipoiseq / dataloaders / sequence.py View on Github external
if self.ignore_targets or self.n_tasks == 0:
            labels = {}
        else:
            labels = row.iloc[self.bed_columns:].values.astype(
                self.label_dtype)
        return interval, labels

    def __len__(self):
        return len(self.df)

    def get_targets(self):
        return self.df.iloc[:, self.bed_columns:].values.astype(self.label_dtype)


@kipoi_dataloader(override={"dependencies": deps, 'info.authors': package_authors})
class StringSeqIntervalDl(Dataset):
    """
    info:
        doc: >
           Dataloader for a combination of fasta and tab-delimited input files such as bed files. The dataloader extracts
           regions from the fasta file as defined in the tab-delimited `intervals_file`. Returned sequences are of the type
           np.array([str]).
    args:
        intervals_file:
            doc: bed3+ file path containing intervals + (optionally) labels
            example:
              url: https://raw.githubusercontent.com/kipoi/kipoiseq/master/tests/data/intervals_51bp.tsv
              md5: a76e47b3df87fd514860cf27fdc10eb4
        fasta_file:
            doc: Reference genome FASTA file path.
            example:
github kipoi / kipoiseq / kipoiseq / dataloaders / splicing.py View on Github external
continue
        for exon in gtf_db.children(gene, featuretype='exon'):
            isLast = False  # track whether is last exon
            if firstLastNoExtend:
                if (gene.strand == "+" and exon.end == gene.end) or (gene.strand == "-" and exon.start == gene.start):
                    overhang = (overhang[0], 0)
                    isLast = True
                elif (gene.strand == "+" and exon.start == gene.start) or (gene.strand == "-" and exon.end == gene.end):
                    overhang = (0, overhang[1])
            exon = ExonInterval.from_feature(exon, overhang)
            exon.isLast = isLast
            overhang = default_overhang
            yield exon


@kipoi_dataloader(override={"dependencies": deps, 'info.authors': package_authors})
class MMSpliceDl(SampleIterator):
    """
    info:
        doc: >
            Dataloader for splicing models. With inputs as gtf annotation file and fasta file,
            each output is an exon sequence with flanking intronic seuqences. Intronic sequnce
            lengths specified by the users. Returned sequences are of the type np.array([str])
    args:
        gtf_file:
            doc: file path; Genome annotation GTF file
            example:
                url: https://raw.githubusercontent.com/kipoi/models/master/MMSplice/tests/data/test.gtf
                md5: b20607afe91ec20d6ee79ed95ab0e85b
        fasta_file:
            doc: Reference Genome sequence in fasta format
            example: