How to use the genomepy.utils.get_localname function in genomepy

To help you get started, we’ve selected a few genomepy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vanheeringen-lab / genomepy / tests / test_04_download_annotation.py View on Github external
def test_UCSC_annotation(localname=None):
    """Test UCSC annotation"""
    tmp = mkdtemp()
    p = genomepy.provider.ProviderBase.create("UCSC")
    name = "sacCer3"

    p.download_annotation(name, tmp, localname=localname)

    localname = genomepy.utils.get_localname(name, localname)
    gtf = os.path.join(tmp, localname, localname + ".annotation.gtf.gz")
    validate_gzipped_gtf(gtf)

    bed = os.path.join(tmp, localname, localname + ".annotation.bed.gz")
    validate_gzipped_bed(bed)

    shutil.rmtree(tmp)
github vanheeringen-lab / genomepy / tests / test_04_download_annotation.py View on Github external
def test_ensembl_annotation(localname=None):
    """Test Ensembl annotation

    This annotation is hosted on https://ftp.ensembl.org.
    """
    tmp = mkdtemp()
    p = genomepy.provider.ProviderBase.create("Ensembl")

    # Only test on vertebrates as these are downloaded over HTTPS.
    # All others are downloaded over FTP, which is unreliable on Travis.
    for name, version in [("KH", 98)]:
        p.download_annotation(name, tmp, localname=localname, version=version)

        localname = genomepy.utils.get_localname(name, localname)
        gtf = os.path.join(tmp, localname, localname + ".annotation.gtf.gz")
        validate_gzipped_gtf(gtf)

        bed = os.path.join(tmp, localname, localname + ".annotation.bed.gz")
        validate_gzipped_bed(bed)

    shutil.rmtree(tmp)
github vanheeringen-lab / genomepy / tests / test_04_download_annotation.py View on Github external
def test_url_annotation(localname=None):
    """Test url annotation"""
    tmp = mkdtemp()
    p = genomepy.provider.ProviderBase.create("URL")
    name = "http://ftp.xenbase.org/pub/Genomics/JGI/Xentr9.1/XT9_1.fa.gz"

    p.download_annotation(name, tmp, localname=localname)

    localname = genomepy.utils.get_localname(name, localname)
    gtf = os.path.join(tmp, localname, localname + ".annotation.gtf.gz")
    validate_gzipped_gtf(gtf)

    bed = os.path.join(tmp, localname, localname + ".annotation.bed.gz")
    validate_gzipped_bed(bed)

    shutil.rmtree(tmp)
github vanheeringen-lab / genomepy / genomepy / provider.py View on Github external
def search_url_for_annotation(url):
        """Attempts to find a gtf or gff3 file in the same location as the genome url"""
        urldir = os.path.dirname(url)
        sys.stderr.write(
            "You have requested gene annotation to be downloaded.\n"
            "Genomepy will check the remote directory:\n"
            f"{urldir} for annotation files...\n"
        )

        # try to find a GTF or GFF3 file
        name = get_localname(url)
        with urlopen(urldir) as f:
            for urlline in f.readlines():
                urlstr = str(urlline)
                if any(
                    substring in urlstr.lower() for substring in [".gtf", name + ".gff"]
                ):
                    break

        # retrieve the filename from the HTML line
        fname = ""
        for split in re.split('>|<|><|/|"', urlstr):
            if split.lower().endswith(
                (
                    ".gtf",
                    ".gtf.gz",
                    name + ".gff",
github vanheeringen-lab / genomepy / genomepy / provider.py View on Github external
url of where to download genome from

        genomes_dir : str
            Directory to install annotation

        localname : str , optional
            Custom name for your genome

        kwargs: dict , optional:
            Provider specific options.

            to_annotation : str , optional
                url to annotation file (only required if this not located in the same directory as the fasta)
        """
        name = get_localname(url)
        localname = get_localname(name, localname)

        if kwargs.get("to_annotation"):
            link = self.get_annotation_download_link(None, **kwargs)
        else:
            link = self.search_url_for_annotation(url)

        self.attempt_and_report(name, localname, link, genomes_dir)
github vanheeringen-lab / genomepy / genomepy / functions.py View on Github external
with the (first header fields of) the genome.fa will not be corrected.

    kwargs : dict , optional
        Provider specific options.
        toplevel : bool , optional
            Ensembl only: Always download the toplevel genome. Ignores potential primary assembly.

        version : int , optional
            Ensembl only: Specify release version. Default is latest.

        to_annotation : text , optional
            URL only: direct link to annotation file.
            Required if this is not the same directory as the fasta.
    """
    genomes_dir = get_genomes_dir(genomes_dir, check_exist=False)
    localname = get_localname(name, localname)
    out_dir = os.path.join(genomes_dir, localname)

    # Check if genome already exists, or if downloading is forced
    genome_found = (
        len([f for f in glob_ext_files(out_dir) if f"{localname}.fa" in f]) >= 1
    )
    if (not genome_found or force) and not only_annotation:
        # Download genome from provider
        p = ProviderBase.create(provider)
        p.download_genome(
            name,
            genomes_dir,
            mask=mask,
            regex=regex,
            invert_match=invert_match,
            localname=localname,
github vanheeringen-lab / genomepy / genomepy / genome.py View on Github external
def _parse_name(name):
        """extract a safe name from file path, url or regular names"""
        return os.path.basename(re.sub(".fa(.gz)?$", "", get_localname(name)))