How to use the pyranges.pyranges.fill_kwargs function in pyranges

To help you get started, we’ve selected a few pyranges examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github biocore-ntnu / pyranges / pyranges / statistics.py View on Github external
41     0.41    188   9956  0.018883
        42     0.42    230   9956  0.023102
        43     0.43    197   9956  0.019787
        44     0.44    224   9956  0.022499
        45     0.45    184   9956  0.018481
        46     0.46    198   9956  0.019888
        47     0.47    187   9956  0.018783
        48     0.48    200   9956  0.020088
        49     0.49    194   9956  0.019486
        """

        self = self.pr

        kwargs = {}
        kwargs["sparse"] = {"self": True, "other": True}
        kwargs = pr.pyranges.fill_kwargs(kwargs)

        result = pyrange_apply(_relative_distance, self, other, **kwargs)  # pylint: disable=E1132

        result = pd.Series(np.concatenate(list(result.values())))

        not_nan = ~np.isnan(result)
        result.loc[not_nan] = np.floor(result[not_nan] * 100) / 100
        vc = result.value_counts(dropna=False).to_frame().reset_index()
        vc.columns = "reldist count".split()
        vc.insert(vc.shape[1], "total", len(result))
        vc.insert(vc.shape[1], "fraction", vc["count"] / len(result))
        vc = vc.sort_values("reldist", ascending=True)
        vc = vc.reset_index(drop=True)

        return vc
github biocore-ntnu / pyranges / pyranges / genomicfeatures.py View on Github external
| 1            | ensembl_havana | intron     | 960800    | 961292    | .          | +            | .          | protein_coding                   | ...   |
        | 1            | ensembl_havana | intron     | 961552    | 961628    | .          | +            | .          | protein_coding                   | ...   |
        | 1            | ensembl_havana | intron     | 961750    | 961825    | .          | +            | .          | protein_coding                   | ...   |
        | ...          | ...            | ...        | ...       | ...       | ...        | ...          | ...        | ...                              | ...   |
        | 1            | havana         | intron     | 732207    | 732980    | .          | -            | .          | transcribed_processed_pseudogene | ...   |
        | 1            | havana_tagene  | intron     | 168165    | 169048    | .          | -            | .          | lncRNA                           | ...   |
        | 1            | havana_tagene  | intron     | 165942    | 167958    | .          | -            | .          | lncRNA                           | ...   |
        | 1            | havana_tagene  | intron     | 168165    | 169048    | .          | -            | .          | lncRNA                           | ...   |
        +--------------+----------------+------------+-----------+-----------+------------+--------------+------------+----------------------------------+-------+
        Stranded PyRanges object has 1,043 rows and 28 columns from 1 chromosomes.
        For printing, the PyRanges was sorted on Chromosome and Strand.
        19 hidden columns: gene_id, gene_name, gene_source, gene_version, tag, transcript_biotype, transcript_id, transcript_name, transcript_source, transcript_support_level, ... (+ 9 more.)
        """

        kwargs = {"by": by, "nb_cpu": nb_cpu}
        kwargs = pr.pyranges.fill_kwargs(kwargs)

        assert by in ["gene", "transcript"]

        id_column = by_to_id[by]
        gr = self.pr.sort(id_column)

        if not len(gr):
            return pr.PyRanges()

        exons = gr.subset(lambda df: df.Feature == "exon")
        exons = exons.merge(by=id_column)

        by_gr = gr.subset(lambda df: df.Feature == by)

        result = pyrange_apply(_introns2, by_gr, exons, **kwargs)
github biocore-ntnu / pyranges / pyranges / statistics.py View on Github external
--------

        pyranges.statistics.forbes : compute the forbes coefficient

        Examples
        --------

        >>> gr, gr2 = pr.data.chipseq(), pr.data.chipseq_background()
        >>> chromsizes = pr.data.chromsizes()
        >>> gr.stats.jaccard(gr2, chromsizes=chromsizes)
        6.657941988519211e-05"""

        self = self.pr

        kwargs["sparse"] = {"self": True, "other": True}
        kwargs = pr.pyranges.fill_kwargs(kwargs)
        strand = True if kwargs.get("strandedness") else False

        intersection_sum = sum(
            v.sum()
            for v in self.set_intersect(other).lengths(as_dict=True).values())

        union_sum = 0
        for gr in [self, other]:
            union_sum += sum(
                v.sum() for v in gr.merge(strand=strand).lengths(as_dict=True).values())

        denominator = (union_sum - intersection_sum)
        if denominator == 0:
            return 1
        else:
            jc = intersection_sum / denominator
github biocore-ntnu / pyranges / pyranges / statistics.py View on Github external
Examples
        --------

        >>> gr, gr2 = pr.data.chipseq(), pr.data.chipseq_background()
        >>> chromsizes = pr.data.chromsizes()
        >>> gr.stats.forbes(gr2, chromsizes=chromsizes)
        1.7168314674978278"""

        chromsizes = chromsizes_as_int(chromsizes)

        self = self.pr

        kwargs = {}
        kwargs["sparse"] = {"self": True, "other": True}
        kwargs = pr.pyranges.fill_kwargs(kwargs)
        strand = True if kwargs.get("strandedness") else False

        reference_length = self.merge(strand=strand).length
        query_length = other.merge(strand=strand).length

        intersection_sum = sum(
            v.sum()
            for v in self.set_intersect(
                    other, strandedness=strandedness).lengths(as_dict=True).values())

        forbes = chromsizes * intersection_sum / (reference_length * query_length)

        return forbes