How to use the genomepy.utils.filter_fasta function in genomepy

To help you get started, we’ve selected a few genomepy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vanheeringen-lab / genomepy / tests / test_genomepy.py View on Github external
def test_regexp_filter():
    fname = "tests/data/regexp/regexp.fa"

    regexps = [
        ("Chr.*", 2, 15),
        ("Scaffold.*", 1, 16),
        ("scaffold_.*", 3, 14),
        (r"^\d+$", 4, 13),
        ("chr.*", 4, 13),
    ]

    tmpfa = NamedTemporaryFile(suffix=".fa").name
    for regex, match, no_match in regexps:
        fa = genomepy.utils.filter_fasta(fname, tmpfa, regex=regex, v=False, force=True)
        assert len(fa.keys()) == match
        fa = genomepy.utils.filter_fasta(fname, tmpfa, regex=regex, v=True, force=True)
        assert len(fa.keys()) == no_match
github vanheeringen-lab / genomepy / tests / test_genomepy.py View on Github external
def test_regexp_filter():
    fname = "tests/data/regexp/regexp.fa"

    regexps = [
        ("Chr.*", 2, 15),
        ("Scaffold.*", 1, 16),
        ("scaffold_.*", 3, 14),
        (r"^\d+$", 4, 13),
        ("chr.*", 4, 13),
    ]

    tmpfa = NamedTemporaryFile(suffix=".fa").name
    for regex, match, no_match in regexps:
        fa = genomepy.utils.filter_fasta(fname, tmpfa, regex=regex, v=False, force=True)
        assert len(fa.keys()) == match
        fa = genomepy.utils.filter_fasta(fname, tmpfa, regex=regex, v=True, force=True)
        assert len(fa.keys()) == no_match
github vanheeringen-lab / genomepy / tests / test_5_install_options.py View on Github external
def test_regexp_filter():
    fname = "tests/data/regexp/regexp.fa"

    regexps = [
        ("Chr.*", 2, 15),
        ("Scaffold.*", 1, 16),
        ("scaffold_.*", 3, 14),
        (r"^\d+$", 4, 13),
        ("chr.*", 4, 13),
    ]

    tmpfa = NamedTemporaryFile(suffix=".fa").name
    for regex, match, no_match in regexps:
        fa = genomepy.utils.filter_fasta(fname, tmpfa, regex=regex, v=False, force=True)
        assert len(fa.keys()) == match
        fa = genomepy.utils.filter_fasta(fname, tmpfa, regex=regex, v=True, force=True)
        assert len(fa.keys()) == no_match
github vanheeringen-lab / genomepy / genomepy / provider.py View on Github external
os.rename(fname, fname + ".gz")
                ret = sp.check_call(["gunzip", "-f", fname])
                if ret != 0:
                    raise Exception(f"Error gunzipping genome {fname}")

            # process genome (e.g. masking)
            if hasattr(self, "_post_process_download"):
                self._post_process_download(
                    name=name, localname=localname, out_dir=tmp_dir, mask=mask
                )

            if regex:
                os.rename(fname, fname + "_to_regex")
                infa = fname + "_to_regex"
                outfa = fname
                filter_fasta(infa, outfa, regex=regex, v=invert_match, force=True)

                not_included = [
                    k for k in Fasta(infa).keys() if k not in Fasta(outfa).keys()
                ]

            # bgzip genome if requested
            if bgzip or config.get("bgzip"):
                ret = sp.check_call(["bgzip", "-f", fname])
                if ret != 0:
                    raise Exception(f"Error bgzipping {name}. Is tabix installed?")
                fname += ".gz"

            # transfer the genome from the tmpdir to the genome_dir
            src = fname
            dst = os.path.join(genomes_dir, localname, os.path.basename(fname))
            shutil.move(src, dst)