How to use the pyranges.PyRanges function in pyranges

To help you get started, we’ve selected a few pyranges examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github biocore-ntnu / pyranges / tests / test_nearest_previous.py View on Github external
def expected_result_previous_bed_opposite_stranded(names):

    c = """chr1 8 9 h 0 + 6 7 f 0 - 2
chr1 5 7 h 0 - 1 2 f 0 + 4"""

    df = pd.read_table(StringIO(c), sep=" ", header=None, names="Chromosome  Start  End  Name Score Strand Start_b  End_b  Name_b Score_b Strand_b Distance".split())
    print(df)

    return PyRanges(df)
github biocore-ntnu / pyranges / tests / test_unary.py View on Github external
def test_init(gr, strand):

    c, s, e, strands = gr

    if strand:
        pr.PyRanges(chromosomes=c, starts=s, ends=e, strands=strands)
    else:
        pr.PyRanges(chromosomes=c, starts=s, ends=e)
github biocore-ntnu / pyranges / tests / test_hypothesis.py View on Github external
gr2.df.to_csv(f2, sep="\t", header=False, index=False)

        cmd = join_command.format(bedtools_strand, f1, f2)
        print(cmd)
        result = subprocess.check_output(cmd, shell=True, executable="/bin/bash").decode()

        bedtools_df = pd.read_table(StringIO(result), header=None, squeeze=True, names="Chromosome Start End Name Score Strand Chromosome_b Start_b End_b Name_b Score_b Strand_b Overlap".split(), dtype={"Chromosome": "category", "Strand": "category"} )
        bedtools_df = bedtools_df.drop(["Overlap", "Chromosome_b"], 1)

    print("gr\n", gr)
    print("gr2\n", gr2)

    result = gr.join(gr2, strandedness=strandedness)

    print("result\n", result)
    print("bedtools\n", PyRanges(bedtools_df))

    if result.df.empty:
        assert bedtools_df.empty
    else:
        assert_df_equal(result.df, bedtools_df)
github biocore-ntnu / pyranges / tests / hypothesis_helper.py View on Github external
# strand = draw(use_strand)
    df.loc[:, "End"] += df.Start

    df.insert(3, "Name", "a")
    df.insert(4, "Score", 0)
    df.insert(5, "gene_id", ids)
    df.insert(6, "exon_id", list(range(len(df))))

    # df.Start = df.Start.astype(np.int32)
    # df.End = df.End.astype(np.int32)
    # print(df.dtypes)
    # stranded = draw(st.booleans())
    # if not strand:
    #     df = df.drop("Strand", axis=1)

    gr = PyRanges(df)
    # print(gr)
    # raise
    # gr = PyRanges(df)

    # do not sort like this, use pyranges sort
    # np.random.seed(draw(st.integers(min_value=0, max_value=int(1e6))))
    # gr.df = df.reindex(np.random.permutation(df.index.values))

    return gr
github mirnylab / bioframe / tests / test_ops.py View on Github external
def bioframe_to_pyranges(df):
    pydf = df.copy()
    pydf.rename(
        {"chrom": "Chromosome", "start": "Start", "end": "End"},
        axis="columns",
        inplace=True,
    )
    return pr.PyRanges(pydf)
github biocore-ntnu / pyranges / tests / hypothesis_helper.py View on Github external
def dfs_no_min(draw):  # nosec
    df = draw(better_dfs_no_min)
    # strand = draw(use_strand)
    df.loc[:, "End"] += df.Start

    df.insert(3, "Name", "a")
    df.insert(4, "Score", 0)

    # stranded = draw(st.booleans())
    # if not strand:
    #     df = df.drop("Strand", axis=1)

    gr = PyRanges(df, int64=True)
    # gr = PyRanges(df)

    # do not sort like this, use pyranges sort
    # np.random.seed(draw(st.integers(min_value=0, max_value=int(1e6))))
    # gr.df = df.reindex(np.random.permutation(df.index.values))

    return gr
github biocore-ntnu / pyranges / pyranges / db / ucsc.py View on Github external
"Chromosome": "category",
        "TranscriptID": "category",
        "GeneID": "category",
        "Strand": "category",
        "Start": int,
        "End": int
    })

    exons = _exons(df)
    _df = (df.drop("XS XE".split(), axis=1).assign(Feature="transcript"))

    df = pd.concat([_df, exons], sort=False).sort_values(
        "Chromosome Start End".split()
    )["Chromosome Start End Strand Feature TranscriptID ExonNumber".split()]

    return pr.PyRanges(df)
github broadinstitute / ABC-Enhancer-Gene-Prediction / src / tools.py View on Github external
def df_to_pyranges(df, start_col='start', end_col='end', chr_col='chr', start_slop=0, end_slop=0):
    df['Chromosome'] = df[chr_col]
    df['Start'] = df[start_col] - start_slop
    df['End'] = df[end_col] + end_slop

    return(pr.PyRanges(df))
github biocore-ntnu / pyranges / pyranges / methods / call.py View on Github external
if not result:
            return pr.PyRanges()

        first_hit = list(result.values())[0]

        if isinstance(first_hit, pd.Series):
            if first_hit.dtype == bool and subset:
                return self[result]
            elif col:
                self.__setattr__(col, result)
                return self
            else:
                raise Exception(
                    "Cannot return PyRanges when function returns a Series! Use as_pyranges=False."
                )
        return pr.PyRanges(result)
    else:
        return result
github biocore-ntnu / pyranges / pyranges / methods / attr.py View on Github external
_column = column[k].values
            else:
                _column = column[k]

            df.insert(pos, column_name, _column)
        else:
            df.insert(pos, column_name, column)

        start_length = end_length

        dfs[k] = df

    if column_name not in ["Chromosome", "Strand"]:
        self.__dict__["dfs"] = dfs
    else:
        self.__dict__["dfs"] = pr.PyRanges(pr.PyRanges(dfs).df).dfs # will merge the dfs, then split on keys again to ensure they are correct