How to use the genomepy.search function in genomepy

To help you get started, we’ve selected a few genomepy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vanheeringen-lab / genomepy / genomepy / cli.py View on Github external
def search(term, provider=None):
    """
    Search for genomes that contain TERM in their name or description.

    Function is case-insensitive. Spaces in TERM can be replaced with underscores
    (_) or TERM can be "quoted", e.g., "homo sapiens".
    """
    data = [["name", "provider", "accession", "species", "tax_id", "other_info"]]
    for row in genomepy.search(term, provider):
        data.append([x.decode("utf-8", "ignore") for x in row])
    if len(data) == 1:
        print("No genomes found!", file=sys.stderr)
        return

    # In case we print to a terminal, the output is aligned.
    # Otherwise (file, pipe) we use tab-separated columns.
    if sys.stdout.isatty():
        sizes = [max(len(row[i]) + 4 for row in data) for i in range(len(data[0]))]
        fstring = "".join([f"{{: <{size}}}" for size in sizes])
    else:
        fstring = "\t".join(["{}" for _ in range(len(data[0]))])

    for i, row in enumerate(data):
        if i == 0:
            print(Style.BRIGHT + fstring.format(*row))
github vanheeringen-lab / genomepy / genomepy / annotation.py View on Github external
logger.info("Loading chromosome mapping.")
    if to.startswith("GCA"):
        if provider is None:
            raise ValueError("Need a provider: NCBI, UCSC or Ensembl")
        asm_acc = to
    else:
        try:
            genome = Genome(to)
            logger.info("Using local genome information")
            asm_acc = genome.assembly_accession
            if provider is None:
                provider = genome.provider
        except Exception:
            logger.info("Searching remote genome information")
            result = [row for row in search(to, provider=provider)]
            if len(result) > 1:
                p = [row[1].decode() for row in result]
                raise ValueError(
                    f"More than one result, need one of these providers: {', '.join(p)}"
                )
            if provider is None:
                provider = result[0][1].decode()
            asm_acc = result[0][2].decode()

    logger.info(f"Assembly {asm_acc}, provider {provider}")

    if provider not in ["UCSC", "NCBI", "Ensembl"]:
        logger.error(f"Can't map to provider {provider}")
        return None

    asm_report = ncbi_assembly_report(asm_acc)