How to use the genomepy.provider.ProviderBase.create function in genomepy

To help you get started, we’ve selected a few genomepy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vanheeringen-lab / genomepy / tests / test_02_links.py View on Github external
def test_ncbi_genome_download_links(masking):
    """Test NCBI HTTPS links for various genomes

    Also test masking (should be ignored).

    These genomes are hosted on ftp://ftp.ncbi.nlm.nih.gov."""
    p = genomepy.provider.ProviderBase.create("NCBI")

    for genome in ["Charlie1.0", "GRCh38.p13"]:
        p.get_genome_download_link(genome, mask=masking)
github vanheeringen-lab / genomepy / tests / test_04_download_annotation.py View on Github external
def test_ensemblgenomes_annotation(localname=None):
    """Test Ensembl annotation

    This annotation is hosted on ftp.ensemblgenomes.org.
    """
    tmp = mkdtemp()
    p = genomepy.provider.ProviderBase.create("Ensembl")

    for name, version in [("TAIR10", 45)]:
        p.download_annotation(name, tmp, localname=localname, version=version)

        localname = genomepy.utils.get_localname(name, localname)
        gtf = os.path.join(tmp, localname, localname + ".annotation.gtf.gz")
        validate_gzipped_gtf(gtf)

        bed = os.path.join(tmp, localname, localname + ".annotation.bed.gz")
        validate_gzipped_bed(bed)

    shutil.rmtree(tmp)
github vanheeringen-lab / genomepy / genomepy / annotation.py View on Github external
# Fast lookup for some common queries
    common_names = {
        "danRer11": "GRCz11",
        "hg38": "GRCh38",
        "mm10": "GRCm38",
        "dm6": "BDGP6.28",
    }
    if genome_name in common_names:
        search_term = common_names[genome_name]
    else:
        try:
            genome = Genome(genome_name)
            search_term = genome.tax_id
        except FileNotFoundError:
            logger.info(f"Genome {genome_name} not installed locally")
            p = ProviderBase.create("Ensembl")
            for name, *_rest in p.search(genome_name):
                if name == genome_name:
                    logger.info(
                        f"It can be downloaded from Ensembl: genomepy install {name} Ensembl --annotation"
                    )
                    return None
            return None

    # search Ensembl by taxonomy_id or by specific Ensembl name (if we know it)
    p = ProviderBase.create("Ensembl")
    name, accession, species, tax_id, *rest = [row for row in p.search(search_term)][0]

    # Check if the assembly_id of the current Ensembl genome is the same as the
    # local genome. If it is identical, we can correctly assume that the genomes
    # sequences are identical.
    # For the genomes in the lookup table, we already know they match.
github vanheeringen-lab / genomepy / genomepy / functions.py View on Github external
to_annotation : text , optional
            URL only: direct link to annotation file.
            Required if this is not the same directory as the fasta.
    """
    genomes_dir = get_genomes_dir(genomes_dir, check_exist=False)
    localname = get_localname(name, localname)
    out_dir = os.path.join(genomes_dir, localname)

    # Check if genome already exists, or if downloading is forced
    genome_found = (
        len([f for f in glob_ext_files(out_dir) if f"{localname}.fa" in f]) >= 1
    )
    if (not genome_found or force) and not only_annotation:
        # Download genome from provider
        p = ProviderBase.create(provider)
        p.download_genome(
            name,
            genomes_dir,
            mask=mask,
            regex=regex,
            invert_match=invert_match,
            localname=localname,
            bgzip=bgzip,
            **kwargs,
        )
        genome_found = True

        # Export installed genome(s)
        generate_env()

    # Generates a Fasta object, index, gaps and sizes file
github vanheeringen-lab / genomepy / genomepy / functions.py View on Github external
term : str
        Search term, case-insensitive.

    provider : str , optional
        Provider name

    Yields
    ------
    tuple
        genome information (name/identfier and description)
    """
    if provider:
        providers = [ProviderBase.create(provider)]
    else:
        # if provider is not specified search all providers
        providers = [ProviderBase.create(p) for p in ProviderBase.list_providers()]
    for p in providers:
        for row in p.search(term):
            yield [
                x.encode("latin-1") for x in list(row[:1]) + [p.name] + list(row[1:])
            ]