Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
| chr1 | 46918271 | 46978908 | + | 60637 |
| chr1 | 97355021 | 97391587 | + | 36566 |
| chr1 | 57284999 | 57323542 | + | 38543 |
| ... | ... | ... | ... | ... |
| chrY | 31665821 | 31692660 | - | 26839 |
| chrY | 20236607 | 20253473 | - | 16866 |
| chrY | 33255377 | 33315933 | - | 60556 |
| chrY | 31182964 | 31205467 | - | 22503 |
+--------------+-----------+-----------+--------------+-----------+
Stranded PyRanges object has 1,000 rows and 5 columns from 24 chromosomes.
For printing, the PyRanges was sorted on Chromosome and Strand.
"""
if chromsizes is None:
from pyranges import data
chromsizes = data.chromsizes()
df = chromsizes.df
elif isinstance(chromsizes, dict):
df = pd.DataFrame({"Chromosome": list(chromsizes.keys()), "End": list(chromsizes.values())})
else:
df = chromsizes.df
p = df.End / df.End.sum()
n_per_chrom = pd.Series(np.random.choice(
df.index, size=n, p=p)).value_counts(sort=False).to_frame()
n_per_chrom.insert(1, "Chromosome", df.loc[n_per_chrom.index].Chromosome)
n_per_chrom.columns = "Count Chromosome".split()
random_dfs = []
for _, (count, chrom) in n_per_chrom.iterrows():
r = np.random.randint(
elif how in ["upstream", "downstream"] and not kwargs["stranded"]:
__nearest = {"upstream": nearest_previous, "downstream": nearest_next}[how]
else:
__nearest = nearest
df = __nearest(d1, d2, **kwargs)
return df
if __name__ == "__main__":
import pyranges as pr
import numpy as np
np.random.seed(0)
chrM = pr.data.chromsizes()
# chrM = chrM[chrM.Chromosome == "chrM"]
size = int(1e5)
print(np.log10(size))
half_size = int(size / 2)
strand = True
gr = pr.random(size, chromsizes=chrM, strand=strand).sort()
gr2 = pr.random(size, chromsizes=chrM, strand=strand).sort()
gr.ID = np.arange(len(gr))
gr2.ID = np.arange(len(gr2))
from time import time
start = time()
ks = np.array([1, 2] * half_size, dtype=int)
result = gr.k_nearest(gr2, k=ks, strandedness=None, overlap=True, ties="different")
end = time()