Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_save_discrepant_genotypes(self):
sc = SNPsCollection()
sc.load_snps(["tests/input/NCBI36.csv", "tests/input/GRCh37.csv"])
self.assertEqual(len(sc.discrepant_genotypes), 1)
discrepant_genotypes_file = sc.save_discrepant_genotypes()
self.assertEqual(
os.path.relpath(discrepant_genotypes_file),
"output/discrepant_genotypes.csv",
)
self.assertTrue(os.path.exists(discrepant_genotypes_file))
def test_load_snps_None(self):
sc = SNPsCollection()
with self.assertRaises(TypeError):
sc.load_snps(None)
"discrepant_genotype": bool,
},
)
df1 = df[["chrom", "pos_file1", "genotype_file1"]]
df2 = df[["chrom", "pos_file2", "genotype_file2"]]
df1.to_csv(
dest1, na_rep="--", header=["chromosome", "position", "genotype"]
)
df2.to_csv(
dest2, na_rep="--", header=["chromosome", "position", "genotype"]
)
sc = SNPsCollection([dest1, dest2])
expected = df[
[
"chrom",
"discrepant_position",
"discrepant_genotype",
"expected_position",
"expected_genotype",
]
]
expected = expected.rename(
columns={"expected_position": "pos", "expected_genotype": "genotype"}
)
expected_snps = SNPs()
expected_snps._snps = expected
expected_snps.sort_snps()
def test_save_discrepant_snps_no_discrepant_snps(self):
sc = SNPsCollection()
self.assertEqual(len(sc.discrepant_snps), 0)
self.assertFalse(sc.save_discrepant_snps())
def test_load_snps_non_existent_file(self):
sc = SNPsCollection()
sc.load_snps(["tests/input/GRCh37.csv", "tests/input/non_existent_file.csv"])
pd.testing.assert_frame_equal(sc.snps, self.snps_GRCh37(), check_exact=True)
def test_save_discrepant_genotypes_no_discrepant_snps(self):
sc = SNPsCollection()
self.assertEqual(len(sc.discrepant_genotypes), 0)
self.assertFalse(sc.save_discrepant_genotypes())
def test_load_snps_assembly_mismatch_save_output(self):
sc = SNPsCollection()
sc.load_snps(
["tests/input/NCBI36.csv", "tests/input/GRCh37.csv"], save_output=True
)
self.assertTrue(os.path.exists("output/discrepant_positions_1.csv"))
self.assertTrue(os.path.exists("output/discrepant_genotypes_1.csv"))
self.assertEqual(len(sc.discrepant_positions), 4)
self.assertEqual(len(sc.discrepant_genotypes), 1)
pd.testing.assert_frame_equal(
sc.snps, self.snps_NCBI36_discrepant_snps(), check_exact=True
)
def test_load_snps_assembly_mismatch_exceed_discrepant_positions_threshold(self):
sc = SNPsCollection()
sc.load_snps(
["tests/input/NCBI36.csv", "tests/input/GRCh37.csv"],
discrepant_snp_positions_threshold=0,
)
self.assertFalse(os.path.exists("output/discrepant_positions_1.csv"))
self.assertFalse(os.path.exists("output/discrepant_genotypes_1.csv"))
self.assertEqual(len(sc.discrepant_positions), 4)
self.assertEqual(len(sc.discrepant_genotypes), 0)
pd.testing.assert_frame_equal(sc.snps, self.snps_NCBI36(), check_exact=True)
def test_load_snps_assembly_mismatch(self):
sc = SNPsCollection()
sc.load_snps(["tests/input/NCBI36.csv", "tests/input/GRCh37.csv"])
self.assertFalse(os.path.exists("output/ind_discrepant_positions_1.csv"))
self.assertFalse(os.path.exists("output/ind_discrepant_genotypes_1.csv"))
self.assertEqual(len(sc.discrepant_positions), 4)
self.assertEqual(len(sc.discrepant_genotypes), 1)
pd.testing.assert_frame_equal(
sc.snps, self.snps_NCBI36_discrepant_snps(), check_exact=True
)
def test_source_lineage_file_gzip(self):
sc = SNPsCollection("tests/input/GRCh37.csv")
self.assertEqual(sc.source, "generic")
sc.load_snps("tests/input/23andme.txt")
self.assertEqual(sc.source, "generic, 23andMe")
file = sc.save_snps()
with open(file, "rb") as f_in:
with atomic_write(file + ".gz", mode="wb", overwrite=True) as f_out:
with gzip.open(f_out, "wb") as f_gzip:
shutil.copyfileobj(f_in, f_gzip)
s = SNPs(file + ".gz")
self.assertEqual(s.source, "generic, 23andMe")
pd.testing.assert_frame_equal(sc.snps, s.snps, check_exact=True)