Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if not self._name:
prefix = ""
else:
prefix = "{}_".format(clean_str(self._name))
if 0 < len(discrepant_positions) < discrepant_snp_positions_threshold:
logger.warning(
"{} SNP positions were discrepant; keeping original positions".format(
str(len(discrepant_positions))
)
)
if save_output:
self._discrepant_positions_file_count += 1
save_df_as_csv(
discrepant_positions,
self._output_dir,
"{}discrepant_positions_{}{}".format(
prefix, str(self._discrepant_positions_file_count), ".csv"
),
)
elif len(discrepant_positions) >= discrepant_snp_positions_threshold:
logger.warning(
"too many SNPs differ in position; ensure same genome build is being used"
)
return discrepant_positions, discrepant_genotypes
# remove null genotypes
common_snps = common_snps.loc[
~common_snps["genotype"].isnull()
& ~common_snps["genotype_added"].isnull()
# create the VCF representation for SNPs
results = map(self._create_vcf_representation, tasks)
contigs = []
vcf = [pd.DataFrame()]
for result in list(results):
contigs.append(result["contig"])
vcf.append(result["vcf"])
vcf = pd.concat(vcf)
comment += "".join(contigs)
comment += '##FORMAT=\n'
comment += "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE\n"
return save_df_as_csv(
vcf,
self._snps._output_dir,
filename,
comment=comment,
prepend_info=False,
header=False,
index=False,
na_rep=".",
sep="\t",
)
== common_snps["genotype_added"].str[0]
)
)
)
]
if 0 < len(discrepant_genotypes) < discrepant_genotypes_threshold:
logger.warning(
"{} SNP genotypes were discrepant; marking those as null".format(
str(len(discrepant_genotypes))
)
)
if save_output:
self._discrepant_genotypes_file_count += 1
save_df_as_csv(
discrepant_genotypes,
self._output_dir,
"{}discrepant_genotypes_{}{}".format(
prefix, str(self._discrepant_genotypes_file_count), ".csv"
),
)
elif len(discrepant_genotypes) >= discrepant_genotypes_threshold:
logger.warning(
"too many SNPs differ in their genotype; ensure file is for same "
"individual"
)
return discrepant_positions, discrepant_genotypes
# add new SNPs
self._source.extend(source)
self._snps = self._snps.combine_first(snps)
& (df[genotype1].str[1] != df[genotype3].str[0])
& (df[genotype1].str[1] != df[genotype3].str[1])
)
)
| (
df[genotype2].notnull()
& df[genotype3].notnull()
& (df[genotype2].str.len() == 2)
& (df[genotype2].str[0] == df[genotype2].str[1])
& (df[genotype2] == df[genotype3])
& (df[genotype1] != df[genotype2])
)
]
if save_output:
save_df_as_csv(
df,
self._output_dir,
"discordant_snps_{}_{}_{}_GRCh37.csv".format(
individual1.get_var_name(),
individual2.get_var_name(),
individual3.get_var_name(),
),
comment=self._get_csv_header(),
prepend_info=False,
)
return df
def _save_discrepant_snps_file(self, df, discrepant_snps_type, filename):
if not filename:
if not self._name:
filename = "{}.csv".format(discrepant_snps_type)
else:
filename = "{}_{}.csv".format(
clean_str(self._name), discrepant_snps_type
)
return save_df_as_csv(
df,
self._output_dir,
filename,
comment="# Source(s): {}\n".format(self.source),
)
== common_snps["genotype_added"].str[0]
)
)
)
]
if 0 < len(discrepant_genotypes) < discrepant_genotypes_threshold:
print(
"{} SNP genotypes were discrepant; marking those as null".format(
str(len(discrepant_genotypes))
)
)
if save_output:
self._discrepant_genotypes_file_count += 1
save_df_as_csv(
discrepant_genotypes,
self._output_dir,
"{}discrepant_genotypes_{}{}".format(
prefix, str(self._discrepant_genotypes_file_count), ".csv"
),
)
elif len(discrepant_genotypes) >= discrepant_genotypes_threshold:
print(
"too many SNPs differ in their genotype; ensure file is for same "
"individual"
)
return discrepant_positions, discrepant_genotypes
# add new SNPs
self._source.extend(source)
self._snps = self._snps.combine_first(snps)
if not self._name:
prefix = ""
else:
prefix = "{}_".format(clean_str(self._name))
if 0 < len(discrepant_positions) < discrepant_snp_positions_threshold:
print(
"{} SNP positions were discrepant; keeping original positions".format(
str(len(discrepant_positions))
)
)
if save_output:
self._discrepant_positions_file_count += 1
save_df_as_csv(
discrepant_positions,
self._output_dir,
"{}discrepant_positions_{}{}".format(
prefix, str(self._discrepant_positions_file_count), ".csv"
),
)
elif len(discrepant_positions) >= discrepant_snp_positions_threshold:
print(
"too many SNPs differ in position; ensure same genome build is being used"
)
return discrepant_positions, discrepant_genotypes
# remove null genotypes
common_snps = common_snps.loc[
~common_snps["genotype"].isnull()
& ~common_snps["genotype_added"].isnull()
== common_snps["genotype_added"].str[0]
)
)
)
]
if 0 < len(discrepant_genotypes) < discrepant_genotypes_threshold:
logger.warning(
"{} SNP genotypes were discrepant; marking those as null".format(
str(len(discrepant_genotypes))
)
)
if save_output:
self._discrepant_genotypes_file_count += 1
save_df_as_csv(
discrepant_genotypes,
self._output_dir,
"{}discrepant_genotypes_{}{}".format(
prefix, str(self._discrepant_genotypes_file_count), ".csv"
),
)
elif len(discrepant_genotypes) >= discrepant_genotypes_threshold:
logger.warning(
"too many SNPs differ in their genotype; ensure file is for same "
"individual"
)
return discrepant_positions, discrepant_genotypes
# add new SNPs
self._source.extend(source)
self._snps = self._snps.combine_first(snps)
)
if len(one_chrom_shared_dna) > 0:
file = "shared_dna_one_chrom_{}_GRCh37.csv".format(individuals_filename)
save_df_as_csv(
one_chrom_shared_dna,
self._output_dir,
file,
comment=self._get_csv_header(),
prepend_info=False,
float_format="%.2f",
)
if len(two_chrom_shared_dna) > 0:
file = "shared_dna_two_chroms_{}_GRCh37.csv".format(individuals_filename)
save_df_as_csv(
two_chrom_shared_dna,
self._output_dir,
file,
comment=self._get_csv_header(),
prepend_info=False,
float_format="%.2f",
)
if len(one_chrom_shared_genes) > 0:
file = "shared_genes_one_chrom_{}_GRCh37.csv".format(individuals_filename)
save_df_as_csv(
one_chrom_shared_genes,
self._output_dir,
file,
comment=self._get_csv_header(),
prepend_info=False,