How to use the genomepy.utils.check_url function in genomepy

To help you get started, we’ve selected a few genomepy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vanheeringen-lab / genomepy / genomepy / provider.py View on Github external
mask : str , optional
            Masking level. Options: soft, hard or none. Default is soft.

        Returns
        ------
        str with the http/ftp download link.
        """
        genome = self.genomes[safe(name)]

        # only soft masked genomes available. can be (un)masked in _post _process_download
        link = genome["ftp_path"]
        link = link.replace("ftp://", "https://")
        link += "/" + link.split("/")[-1] + "_genomic.fna.gz"

        if check_url(link):
            return link

        raise GenomeDownloadError(
            f"Could not download genome {name} from {self.name}.\n"
            "URL is broken. Select another genome or provider.\n"
github vanheeringen-lab / genomepy / genomepy / provider.py View on Github external
def get_annotation_download_link(self, name, **kwargs):
        """
        Parse and test the link to the NCBI annotation file.

        Parameters
        ----------
        name : str
            Genome name
        """
        genome = self.genomes[safe(name)]
        link = genome["ftp_path"]
        link = link.replace("ftp://", "https://")
        link += "/" + link.split("/")[-1] + "_genomic.gff.gz"

        if check_url(link):
            return link
github vanheeringen-lab / genomepy / genomepy / provider.py View on Github external
name + ".gff3",
                    name + ".gff3.gz",
                )
            ):
                fname = split
                break
        else:
            raise FileNotFoundError(
                "Could not parse the remote directory. "
                "Please supply a URL using --url-to-annotation.\n"
            )

        # set variables for downloading
        link = urldir + "/" + fname

        if check_url(link):
            return link
github vanheeringen-lab / genomepy / genomepy / provider.py View on Github external
"""
        Parse and test the link to the UCSC annotation file.

        Will check UCSC, Ensembl and RefSeq annotation, respectively.

        Parameters
        ----------
        name : str
            Genome name
        """
        ucsc_gene_url = f"http://hgdownload.cse.ucsc.edu/goldenPath/{name}/database/"
        annot_files = ["knownGene.txt.gz", "ensGene.txt.gz", "refGene.txt.gz"]

        for file in annot_files:
            link = ucsc_gene_url + file
            if check_url(link):
                return link
github vanheeringen-lab / genomepy / genomepy / provider.py View on Github external
def get_annotation_download_link(self, name, **kwargs):
        """
        check if the linked annotation file is of a supported file type (gtf/gff3/bed)
        """
        link = kwargs.get("to_annotation")
        if link:
            ext = get_file_info(link)[0]
            if ext not in [".gtf", ".gff", ".gff3", ".bed"]:
                raise TypeError(
                    "Only (gzipped) gtf, gff and bed files are supported.\n"
                )

            if check_url(link):
                return link
github vanheeringen-lab / genomepy / genomepy / provider.py View on Github external
mask : str , optional
            Masking level. Options: soft, hard or none. Default is soft.

        Returns
        ------
        str with the http/ftp download link.
        """
        # soft masked genomes. can be unmasked in _post _process_download
        urls = [self.ucsc_url, self.alt_ucsc_url]
        if mask == "hard":
            urls = [self.ucsc_url_masked, self.alt_ucsc_url_masked]

        for genome_url in urls:
            link = genome_url.format(name)

            if check_url(link):
                return link

        raise GenomeDownloadError(
            f"Could not download genome {name} from {self.name}.\n"
            "URLs are broken. Select another genome or provider.\n"
github vanheeringen-lab / genomepy / genomepy / provider.py View on Github external
asm_url = "{}/{}.{}.{}.fa.gz".format(
                url,
                genome["url_name"].capitalize(),
                re.sub(r"\.p\d+$", "", safe(genome["assembly_name"])),
                pattern,
            )
            return asm_url

        # try to get the (much smaller) primary assembly,
        # unless specified otherwise
        link = get_url("primary_assembly")
        if kwargs.get("toplevel") or not check_url(link):
            link = get_url()

        if check_url(link):
            return link

        raise GenomeDownloadError(
            f"Could not download genome {name} from {self.name}.\n"
            "URL is broken. Select another genome or provider.\n"
github vanheeringen-lab / genomepy / genomepy / provider.py View on Github external
if division != "vertebrates":
            ftp_site += f"/{division}"

        # Get the GTF URL
        base_url = ftp_site + "/release-{}/gtf/{}/{}.{}.{}.gtf.gz"
        safe_name = re.sub(r"\.p\d+$", "", name)
        link = base_url.format(
            version,
            genome["url_name"].lower(),
            genome["url_name"].capitalize(),
            safe_name,
            version,
        )

        if check_url(link):
            return link