Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
grs = []
for _, gdf in natsorted(df.groupby(groupby)):
grs.append(pr.PyRanges(gdf))
clusters = [gr.cluster(strand=strand) for gr in grs]
i = 1
new_clusters = []
for c in clusters:
print("c")
print(c)
c.Cluster = i
i += 1
new_clusters.append(c)
expected = pr.concat(new_clusters).df
expected.loc[:, "Cluster"] = expected.Cluster.astype(np.int32)
# expected = expected.drop_duplicates()
print(expected)
print(result)
assert_df_equal(result, expected)
def test_merge_by(gr, strand):
print(gr)
result = gr.merge(by="ID").df.drop("ID", axis=1)
df = gr.df
grs = []
for _, gdf in df.groupby("ID"):
grs.append(pr.PyRanges(gdf))
expected = pr.concat([gr.merge() for gr in grs]).df
print(expected)
print(result)
assert_df_equal(result, expected)
| (category) | (int32) | (int32) | (int32) | (int32) | (int32) |
|--------------+-----------+-----------+-----------+-----------+-----------|
| chr1 | 0 | 10 | 1 | 0 | 1 |
| chr1 | 10 | 20 | 2 | 2 | 2 |
| chr1 | 20 | 30 | 2 | 2 | 0 |
| chr1 | 30 | 40 | 0 | 1 | 1 |
+--------------+-----------+-----------+-----------+-----------+-----------+
Unstranded PyRanges object has 4 rows and 6 columns from 1 chromosomes.
For printing, the PyRanges was sorted on Chromosome.
"""
kwargs = {"as_pyranges": False, "nb_cpu": nb_cpu, "strandedness": strandedness, "how": how, "nb_cpu": nb_cpu}
names = list(grs.keys())
if features is None:
features = pr.concat(grs.values()).split(between=True)
from pyranges.methods.intersection import _count_overlaps
for name, gr in grs.items():
gr = gr.drop()
res = features.apply_pair(gr, _count_overlaps, **kwargs)
setattr(features, name, res)
setattr(features, name, getattr(features, name).fillna(0))
def to_int(df):
df.loc[:, names] = df[names].astype(np.int32)