How to use the snps.utils.save_df_as_csv function in snps

To help you get started, we’ve selected a few snps examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github apriha / snps / src / snps / snps_collection.py View on Github external
if not self._name:
                prefix = ""
            else:
                prefix = "{}_".format(clean_str(self._name))

            if 0 < len(discrepant_positions) < discrepant_snp_positions_threshold:
                logger.warning(
                    "{} SNP positions were discrepant; keeping original positions".format(
                        str(len(discrepant_positions))
                    )
                )

                if save_output:
                    self._discrepant_positions_file_count += 1
                    save_df_as_csv(
                        discrepant_positions,
                        self._output_dir,
                        "{}discrepant_positions_{}{}".format(
                            prefix, str(self._discrepant_positions_file_count), ".csv"
                        ),
                    )
            elif len(discrepant_positions) >= discrepant_snp_positions_threshold:
                logger.warning(
                    "too many SNPs differ in position; ensure same genome build is being used"
                )
                return discrepant_positions, discrepant_genotypes

            # remove null genotypes
            common_snps = common_snps.loc[
                ~common_snps["genotype"].isnull()
                & ~common_snps["genotype_added"].isnull()
github apriha / snps / src / snps / io / writer.py View on Github external
# create the VCF representation for SNPs
        results = map(self._create_vcf_representation, tasks)

        contigs = []
        vcf = [pd.DataFrame()]
        for result in list(results):
            contigs.append(result["contig"])
            vcf.append(result["vcf"])

        vcf = pd.concat(vcf)

        comment += "".join(contigs)
        comment += '##FORMAT=\n'
        comment += "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE\n"

        return save_df_as_csv(
            vcf,
            self._snps._output_dir,
            filename,
            comment=comment,
            prepend_info=False,
            header=False,
            index=False,
            na_rep=".",
            sep="\t",
        )
github apriha / snps / src / snps / __init__.py View on Github external
== common_snps["genotype_added"].str[0]
                        )
                    )
                )
            ]

            if 0 < len(discrepant_genotypes) < discrepant_genotypes_threshold:
                logger.warning(
                    "{} SNP genotypes were discrepant; marking those as null".format(
                        str(len(discrepant_genotypes))
                    )
                )

                if save_output:
                    self._discrepant_genotypes_file_count += 1
                    save_df_as_csv(
                        discrepant_genotypes,
                        self._output_dir,
                        "{}discrepant_genotypes_{}{}".format(
                            prefix, str(self._discrepant_genotypes_file_count), ".csv"
                        ),
                    )
            elif len(discrepant_genotypes) >= discrepant_genotypes_threshold:
                logger.warning(
                    "too many SNPs differ in their genotype; ensure file is for same "
                    "individual"
                )
                return discrepant_positions, discrepant_genotypes

            # add new SNPs
            self._source.extend(source)
            self._snps = self._snps.combine_first(snps)
github apriha / lineage / src / lineage / __init__.py View on Github external
& (df[genotype1].str[1] != df[genotype3].str[0])
                        & (df[genotype1].str[1] != df[genotype3].str[1])
                    )
                )
                | (
                    df[genotype2].notnull()
                    & df[genotype3].notnull()
                    & (df[genotype2].str.len() == 2)
                    & (df[genotype2].str[0] == df[genotype2].str[1])
                    & (df[genotype2] == df[genotype3])
                    & (df[genotype1] != df[genotype2])
                )
            ]

            if save_output:
                save_df_as_csv(
                    df,
                    self._output_dir,
                    "discordant_snps_{}_{}_{}_GRCh37.csv".format(
                        individual1.get_var_name(),
                        individual2.get_var_name(),
                        individual3.get_var_name(),
                    ),
                    comment=self._get_csv_header(),
                    prepend_info=False,
                )

        return df
github apriha / snps / src / snps / __init__.py View on Github external
def _save_discrepant_snps_file(self, df, discrepant_snps_type, filename):
        if not filename:
            if not self._name:
                filename = "{}.csv".format(discrepant_snps_type)
            else:
                filename = "{}_{}.csv".format(
                    clean_str(self._name), discrepant_snps_type
                )

        return save_df_as_csv(
            df,
            self._output_dir,
            filename,
            comment="# Source(s): {}\n".format(self.source),
        )
github apriha / snps / src / snps / __init__.py View on Github external
== common_snps["genotype_added"].str[0]
                        )
                    )
                )
            ]

            if 0 < len(discrepant_genotypes) < discrepant_genotypes_threshold:
                print(
                    "{} SNP genotypes were discrepant; marking those as null".format(
                        str(len(discrepant_genotypes))
                    )
                )

                if save_output:
                    self._discrepant_genotypes_file_count += 1
                    save_df_as_csv(
                        discrepant_genotypes,
                        self._output_dir,
                        "{}discrepant_genotypes_{}{}".format(
                            prefix, str(self._discrepant_genotypes_file_count), ".csv"
                        ),
                    )
            elif len(discrepant_genotypes) >= discrepant_genotypes_threshold:
                print(
                    "too many SNPs differ in their genotype; ensure file is for same "
                    "individual"
                )
                return discrepant_positions, discrepant_genotypes

            # add new SNPs
            self._source.extend(source)
            self._snps = self._snps.combine_first(snps)
github apriha / snps / src / snps / __init__.py View on Github external
if not self._name:
                prefix = ""
            else:
                prefix = "{}_".format(clean_str(self._name))

            if 0 < len(discrepant_positions) < discrepant_snp_positions_threshold:
                print(
                    "{} SNP positions were discrepant; keeping original positions".format(
                        str(len(discrepant_positions))
                    )
                )

                if save_output:
                    self._discrepant_positions_file_count += 1
                    save_df_as_csv(
                        discrepant_positions,
                        self._output_dir,
                        "{}discrepant_positions_{}{}".format(
                            prefix, str(self._discrepant_positions_file_count), ".csv"
                        ),
                    )
            elif len(discrepant_positions) >= discrepant_snp_positions_threshold:
                print(
                    "too many SNPs differ in position; ensure same genome build is being used"
                )
                return discrepant_positions, discrepant_genotypes

            # remove null genotypes
            common_snps = common_snps.loc[
                ~common_snps["genotype"].isnull()
                & ~common_snps["genotype_added"].isnull()
github apriha / snps / src / snps / snps_collection.py View on Github external
== common_snps["genotype_added"].str[0]
                        )
                    )
                )
            ]

            if 0 < len(discrepant_genotypes) < discrepant_genotypes_threshold:
                logger.warning(
                    "{} SNP genotypes were discrepant; marking those as null".format(
                        str(len(discrepant_genotypes))
                    )
                )

                if save_output:
                    self._discrepant_genotypes_file_count += 1
                    save_df_as_csv(
                        discrepant_genotypes,
                        self._output_dir,
                        "{}discrepant_genotypes_{}{}".format(
                            prefix, str(self._discrepant_genotypes_file_count), ".csv"
                        ),
                    )
            elif len(discrepant_genotypes) >= discrepant_genotypes_threshold:
                logger.warning(
                    "too many SNPs differ in their genotype; ensure file is for same "
                    "individual"
                )
                return discrepant_positions, discrepant_genotypes

            # add new SNPs
            self._source.extend(source)
            self._snps = self._snps.combine_first(snps)
github apriha / lineage / src / lineage / __init__.py View on Github external
)

        if len(one_chrom_shared_dna) > 0:
            file = "shared_dna_one_chrom_{}_GRCh37.csv".format(individuals_filename)
            save_df_as_csv(
                one_chrom_shared_dna,
                self._output_dir,
                file,
                comment=self._get_csv_header(),
                prepend_info=False,
                float_format="%.2f",
            )

        if len(two_chrom_shared_dna) > 0:
            file = "shared_dna_two_chroms_{}_GRCh37.csv".format(individuals_filename)
            save_df_as_csv(
                two_chrom_shared_dna,
                self._output_dir,
                file,
                comment=self._get_csv_header(),
                prepend_info=False,
                float_format="%.2f",
            )

        if len(one_chrom_shared_genes) > 0:
            file = "shared_genes_one_chrom_{}_GRCh37.csv".format(individuals_filename)
            save_df_as_csv(
                one_chrom_shared_genes,
                self._output_dir,
                file,
                comment=self._get_csv_header(),
                prepend_info=False,