How to use the pyranges.data.chromsizes function in pyranges

To help you get started, we’ve selected a few pyranges examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github biocore-ntnu / pyranges / pyranges / __init__.py View on Github external
| chr1         | 46918271  | 46978908  | +            | 60637     |
    | chr1         | 97355021  | 97391587  | +            | 36566     |
    | chr1         | 57284999  | 57323542  | +            | 38543     |
    | ...          | ...       | ...       | ...          | ...       |
    | chrY         | 31665821  | 31692660  | -            | 26839     |
    | chrY         | 20236607  | 20253473  | -            | 16866     |
    | chrY         | 33255377  | 33315933  | -            | 60556     |
    | chrY         | 31182964  | 31205467  | -            | 22503     |
    +--------------+-----------+-----------+--------------+-----------+
    Stranded PyRanges object has 1,000 rows and 5 columns from 24 chromosomes.
    For printing, the PyRanges was sorted on Chromosome and Strand.
    """

    if chromsizes is None:
        from pyranges import data
        chromsizes = data.chromsizes()
        df = chromsizes.df
    elif isinstance(chromsizes, dict):
        df = pd.DataFrame({"Chromosome": list(chromsizes.keys()), "End": list(chromsizes.values())})
    else:
        df = chromsizes.df

    p = df.End / df.End.sum()

    n_per_chrom = pd.Series(np.random.choice(
        df.index, size=n, p=p)).value_counts(sort=False).to_frame()
    n_per_chrom.insert(1, "Chromosome", df.loc[n_per_chrom.index].Chromosome)
    n_per_chrom.columns = "Count Chromosome".split()

    random_dfs = []
    for _, (count, chrom) in n_per_chrom.iterrows():
        r = np.random.randint(
github biocore-ntnu / pyranges / pyranges / methods / k_nearest.py View on Github external
elif how in ["upstream", "downstream"] and not kwargs["stranded"]:
        __nearest = {"upstream": nearest_previous, "downstream": nearest_next}[how]
    else:
        __nearest = nearest

    df = __nearest(d1, d2, **kwargs)

    return df


if __name__ == "__main__":

    import pyranges as pr
    import numpy as np
    np.random.seed(0)
    chrM = pr.data.chromsizes()
    # chrM = chrM[chrM.Chromosome == "chrM"]
    size = int(1e5)
    print(np.log10(size))
    half_size = int(size / 2)
    strand = True

    gr = pr.random(size, chromsizes=chrM, strand=strand).sort()
    gr2 = pr.random(size, chromsizes=chrM, strand=strand).sort()
    gr.ID = np.arange(len(gr))
    gr2.ID = np.arange(len(gr2))

    from time import time
    start = time()
    ks = np.array([1, 2] * half_size, dtype=int)
    result = gr.k_nearest(gr2, k=ks, strandedness=None, overlap=True, ties="different")
    end = time()