How to use the bioframe.overlap function in bioframe

To help you get started, we’ve selected a few bioframe examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mirnylab / bioframe / tests / test_ops.py View on Github external
assert np.sum(pd.isna(b["index_2"].values)) == 2

    b = bioframe.overlap(
        df1,
        df2,
        on=None,
        how="left",
        cols1=("chrom1", "start", "end"),
        cols2=("chrom2", "start2", "end2"),
        return_index=True,
        return_input=False,
    )
    assert np.sum(pd.isna(b["index_2"].values)) == 0

    ### test overlap 'left', 'outer', and 'right'
    b = bioframe.overlap(
        df1,
        df2,
        on=None,
        how="outer",
        cols1=("chrom1", "start", "end"),
        cols2=("chrom2", "start2", "end2"),
    )
    assert len(b) == 3

    b = bioframe.overlap(
        df1,
        df2,
        on=["animal"],
        how="outer",
        cols1=("chrom1", "start", "end"),
        cols2=("chrom2", "start2", "end2"),
github mirnylab / bioframe / tests / test_ops.py View on Github external
cols1=("chrom1", "start", "end"),
        cols2=("chrom2", "start2", "end2"),
    )
    assert len(b) == 5

    b = bioframe.overlap(
        df1,
        df2,
        on=["animal"],
        how="inner",
        cols1=("chrom1", "start", "end"),
        cols2=("chrom2", "start2", "end2"),
    )
    assert len(b) == 0

    b = bioframe.overlap(
        df1,
        df2,
        on=["animal"],
        how="right",
        cols1=("chrom1", "start", "end"),
        cols2=("chrom2", "start2", "end2"),
    )
    assert len(b) == 2

    b = bioframe.overlap(
        df1,
        df2,
        on=["animal"],
        how="left",
        cols1=("chrom1", "start", "end"),
        cols2=("chrom2", "start2", "end2"),
github mirnylab / bioframe / tests / test_ops.py View on Github external
### test overlap on= [] ###
    df1 = pd.DataFrame(
        [
            ["chr1", 8, 12, "+", "cat"],
            ["chr1", 8, 12, "-", "cat"],
            ["chrX", 1, 8, "+", "cat"],
        ],
        columns=["chrom1", "start", "end", "strand", "animal"],
    )

    df2 = pd.DataFrame(
        [["chr1", 6, 10, "+", "dog"], ["chrX", 7, 10, "-", "dog"]],
        columns=["chrom2", "start2", "end2", "strand", "animal"],
    )

    b = bioframe.overlap(
        df1,
        df2,
        on=["animal"],
        how="left",
        cols1=("chrom1", "start", "end"),
        cols2=("chrom2", "start2", "end2"),
        return_index=True,
        return_input=False,
    )
    assert np.sum(pd.isna(b["index_2"].values)) == 3

    b = bioframe.overlap(
        df1,
        df2,
        on=["strand"],
        how="left",
github mirnylab / bioframe / tests / test_ops.py View on Github external
def test_overlap():

    ### test consistency of overlap(how='inner') with pyranges.join ###
    ### note does not test overlap_start or overlap_end columns of bioframe.overlap
    df1 = mock_bioframe()
    df2 = mock_bioframe()
    assert df1.equals(df2) == False
    p1 = bioframe_to_pyranges(df1)
    p2 = bioframe_to_pyranges(df2)
    pp = pyranges_overlap_to_bioframe(p1.join(p2, how=None))[
        ["chrom_1", "start_1", "end_1", "chrom_2", "start_2", "end_2"]
    ]
    bb = bioframe.overlap(df1, df2, how="inner")[
        ["chrom_1", "start_1", "end_1", "chrom_2", "start_2", "end_2"]
    ]
    pp = pp.sort_values(
        ["chrom_1", "start_1", "end_1", "chrom_2", "start_2", "end_2"],
        ignore_index=True)
    bb = bb.sort_values(
        ["chrom_1", "start_1", "end_1", "chrom_2", "start_2", "end_2"],
        ignore_index=True)
    pd.testing.assert_frame_equal(bb, pp, check_dtype=False, check_exact=True)
    print("overlap elements agree")

    ### test overlap on= [] ###
    df1 = pd.DataFrame(
        [
            ["chr1", 8, 12, "+", "cat"],
            ["chr1", 8, 12, "-", "cat"],