Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
41 0.41 188 9956 0.018883
42 0.42 230 9956 0.023102
43 0.43 197 9956 0.019787
44 0.44 224 9956 0.022499
45 0.45 184 9956 0.018481
46 0.46 198 9956 0.019888
47 0.47 187 9956 0.018783
48 0.48 200 9956 0.020088
49 0.49 194 9956 0.019486
"""
self = self.pr
kwargs = {}
kwargs["sparse"] = {"self": True, "other": True}
kwargs = pr.pyranges.fill_kwargs(kwargs)
result = pyrange_apply(_relative_distance, self, other, **kwargs) # pylint: disable=E1132
result = pd.Series(np.concatenate(list(result.values())))
not_nan = ~np.isnan(result)
result.loc[not_nan] = np.floor(result[not_nan] * 100) / 100
vc = result.value_counts(dropna=False).to_frame().reset_index()
vc.columns = "reldist count".split()
vc.insert(vc.shape[1], "total", len(result))
vc.insert(vc.shape[1], "fraction", vc["count"] / len(result))
vc = vc.sort_values("reldist", ascending=True)
vc = vc.reset_index(drop=True)
return vc
| 1 | ensembl_havana | intron | 960800 | 961292 | . | + | . | protein_coding | ... |
| 1 | ensembl_havana | intron | 961552 | 961628 | . | + | . | protein_coding | ... |
| 1 | ensembl_havana | intron | 961750 | 961825 | . | + | . | protein_coding | ... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1 | havana | intron | 732207 | 732980 | . | - | . | transcribed_processed_pseudogene | ... |
| 1 | havana_tagene | intron | 168165 | 169048 | . | - | . | lncRNA | ... |
| 1 | havana_tagene | intron | 165942 | 167958 | . | - | . | lncRNA | ... |
| 1 | havana_tagene | intron | 168165 | 169048 | . | - | . | lncRNA | ... |
+--------------+----------------+------------+-----------+-----------+------------+--------------+------------+----------------------------------+-------+
Stranded PyRanges object has 1,043 rows and 28 columns from 1 chromosomes.
For printing, the PyRanges was sorted on Chromosome and Strand.
19 hidden columns: gene_id, gene_name, gene_source, gene_version, tag, transcript_biotype, transcript_id, transcript_name, transcript_source, transcript_support_level, ... (+ 9 more.)
"""
kwargs = {"by": by, "nb_cpu": nb_cpu}
kwargs = pr.pyranges.fill_kwargs(kwargs)
assert by in ["gene", "transcript"]
id_column = by_to_id[by]
gr = self.pr.sort(id_column)
if not len(gr):
return pr.PyRanges()
exons = gr.subset(lambda df: df.Feature == "exon")
exons = exons.merge(by=id_column)
by_gr = gr.subset(lambda df: df.Feature == by)
result = pyrange_apply(_introns2, by_gr, exons, **kwargs)
--------
pyranges.statistics.forbes : compute the forbes coefficient
Examples
--------
>>> gr, gr2 = pr.data.chipseq(), pr.data.chipseq_background()
>>> chromsizes = pr.data.chromsizes()
>>> gr.stats.jaccard(gr2, chromsizes=chromsizes)
6.657941988519211e-05"""
self = self.pr
kwargs["sparse"] = {"self": True, "other": True}
kwargs = pr.pyranges.fill_kwargs(kwargs)
strand = True if kwargs.get("strandedness") else False
intersection_sum = sum(
v.sum()
for v in self.set_intersect(other).lengths(as_dict=True).values())
union_sum = 0
for gr in [self, other]:
union_sum += sum(
v.sum() for v in gr.merge(strand=strand).lengths(as_dict=True).values())
denominator = (union_sum - intersection_sum)
if denominator == 0:
return 1
else:
jc = intersection_sum / denominator
Examples
--------
>>> gr, gr2 = pr.data.chipseq(), pr.data.chipseq_background()
>>> chromsizes = pr.data.chromsizes()
>>> gr.stats.forbes(gr2, chromsizes=chromsizes)
1.7168314674978278"""
chromsizes = chromsizes_as_int(chromsizes)
self = self.pr
kwargs = {}
kwargs["sparse"] = {"self": True, "other": True}
kwargs = pr.pyranges.fill_kwargs(kwargs)
strand = True if kwargs.get("strandedness") else False
reference_length = self.merge(strand=strand).length
query_length = other.merge(strand=strand).length
intersection_sum = sum(
v.sum()
for v in self.set_intersect(
other, strandedness=strandedness).lengths(as_dict=True).values())
forbes = chromsizes * intersection_sum / (reference_length * query_length)
return forbes