How to use the snps.resources.Resources function in snps

To help you get started, we’ve selected a few snps examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github apriha / snps / tests / io / test_reader.py View on Github external
def _setup_gsa_test(resources_dir):
        # reset resource if already loaded
        r = Resources()
        r._resources_dir = resources_dir
        r._gsa_resources = {}

        gzip_file(
            "tests/resources/gsa_rsid_map.txt",
            os.path.join(resources_dir, "gsa_rsid_map.txt.gz"),
        )
        gzip_file(
            "tests/resources/gsa_chrpos_map.txt",
            os.path.join(resources_dir, "gsa_chrpos_map.txt.gz"),
        )
github apriha / snps / tests / io / test_reader.py View on Github external
def _teardown_gsa_test():
        r = Resources()
        r._resources_dir = "resources"
        r._gsa_resources = {}
github apriha / snps / tests / io / test_writer.py View on Github external
def test_save_snps_vcf(self):
        s = SNPs("tests/input/testvcf.vcf")

        r = Resources()
        r._reference_sequences["GRCh37"] = {}

        with tempfile.TemporaryDirectory() as tmpdir:
            dest = os.path.join(tmpdir, "generic.fa.gz")
            gzip_file("tests/input/generic.fa", dest)

            seq = ReferenceSequence(ID="1", path=dest)

            r._reference_sequences["GRCh37"]["1"] = seq

            self.assertEqual(
                os.path.relpath(s.save_snps(vcf=True)), "output/vcf_GRCh37.vcf"
            )

        self.run_parsing_tests_vcf("output/vcf_GRCh37.vcf")
github apriha / snps / tests / io / test_writer.py View on Github external
def test_save_snps_vcf_phased(self):
        # read phased data
        s = SNPs("tests/input/testvcf_phased.vcf")

        # setup resource to use test FASTA reference sequence
        r = Resources()
        r._reference_sequences["GRCh37"] = {}

        with tempfile.TemporaryDirectory() as tmpdir:
            dest = os.path.join(tmpdir, "generic.fa.gz")
            gzip_file("tests/input/generic.fa", dest)

            seq = ReferenceSequence(ID="1", path=dest)

            r._reference_sequences["GRCh37"]["1"] = seq

            # save phased data to VCF
            self.assertEqual(
                os.path.relpath(s.save_snps(vcf=True)), "output/vcf_GRCh37.vcf"
            )

        # read saved VCF
github apriha / snps / src / snps / snps.py View on Github external
processes : int
            processes to launch if multiprocessing
        rsids : tuple, optional
            rsids to extract if loading a VCF file
        """
        self._file = file
        self._only_detect_source = only_detect_source
        self._snps = get_empty_snps_dataframe()
        self._duplicate_snps = pd.DataFrame()
        self._discrepant_XY_snps = pd.DataFrame()
        self._source = ""
        self._phased = False
        self._build = 0
        self._build_detected = False
        self._output_dir = output_dir
        self._resources = Resources(resources_dir=resources_dir)
        self._parallelizer = Parallelizer(parallelize=parallelize, processes=processes)

        if file:

            d = self._read_raw_data(file, only_detect_source, rsids)

            # Replace multiple rsids separated by commas in index with the first rsid. E.g. rs1,rs2 -> rs1
            multi_rsids = {
                multi_rsid: multi_rsid.split(",")[0]
                for multi_rsid in list(
                    filter(lambda x: len(x.split(",")) > 1, d["snps"].index)
                )
            }
            d["snps"].rename(index=multi_rsids, inplace=True)

            self._snps = d["snps"]
github apriha / snps / src / snps / __init__.py View on Github external
path to output directory
        resources_dir : str
            name / path of resources directory
        parallelize : bool
            utilize multiprocessing to speedup calculations
        processes : int
            processes to launch if multiprocessing
        """
        self._file = file
        self._only_detect_source = only_detect_source
        self._snps = pd.DataFrame()
        self._source = ""
        self._build = 0
        self._build_detected = False
        self._output_dir = output_dir
        self._resources = Resources(resources_dir=resources_dir)
        self._parallelizer = Parallelizer(parallelize=parallelize, processes=processes)

        if file:

            self._snps, self._source = self._read_raw_data(file, only_detect_source)

            if not self._snps.empty:
                self.sort_snps()

                self._build = self.detect_build()

                if not self._build:
                    self._build = 37  # assume Build 37 / GRCh37 if not detected
                else:
                    self._build_detected = True
github apriha / snps / src / snps / __init__.py View on Github external
processes : int
            processes to launch if multiprocessing
        rsids : tuple, optional
            rsids to extract if loading a VCF file
        """
        self._file = file
        self._only_detect_source = only_detect_source
        self._snps = get_empty_snps_dataframe()
        self._duplicate_snps = pd.DataFrame()
        self._discrepant_XY_snps = pd.DataFrame()
        self._source = ""
        self._phased = False
        self._build = 0
        self._build_detected = False
        self._output_dir = output_dir
        self._resources = Resources(resources_dir=resources_dir)
        self._parallelizer = Parallelizer(parallelize=parallelize, processes=processes)

        if file:

            d = self._read_raw_data(file, only_detect_source, rsids)

            self._snps = d["snps"]
            self._source = d["source"]
            self._phased = d["phased"]

            if not self._snps.empty:
                self.sort_snps()

                if deduplicate:
                    self._deduplicate_rsids()