How to use the bioframe.cluster function in bioframe

To help you get started, we’ve selected a few bioframe examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mirnylab / bioframe / tests / test_ops.py View on Github external
df_annotated = bioframe.cluster(df1)
    assert (
        df_annotated["cluster"].values == np.array([0, 0, 0, 1])
    ).all()  # the last interval does not overlap the first three
    df_annotated = bioframe.cluster(df1, min_dist=2)
    assert (
        df_annotated["cluster"].values == np.array([0, 0, 0, 0])
    ).all()  # all intervals part of the same cluster

    df_annotated = bioframe.cluster(df1, min_dist=None)
    assert (
        df_annotated["cluster"].values == np.array([0, 0, 1, 2])
    ).all()  # adjacent intervals not clustered

    df1.iloc[0, 0] = "chrX"
    df_annotated = bioframe.cluster(df1)
    assert (
        df_annotated["cluster"].values == np.array([2, 0, 0, 1])
    ).all()  # do not cluster intervals across chromosomes

    # test consistency with pyranges (which automatically sorts df upon creation and uses 1-based indexing for clusters)
    assert (
        (bioframe_to_pyranges(df1).cluster(count=True).df["Cluster"].values - 1)
        == bioframe.cluster(df1.sort_values(["chrom", "start"]))["cluster"].values
    ).all()

    # test on=[] argument
    df1 = pd.DataFrame(
        [
            ["chr1", 3, 8, "+", "cat", 5.5],
            ["chr1", 3, 8, "-", "dog", 6.5],
            ["chr1", 6, 10, "-", "cat", 6.5],
github mirnylab / bioframe / tests / test_ops.py View on Github external
def test_cluster():
    df1 = pd.DataFrame(
        [["chr1", 1, 5], ["chr1", 3, 8], ["chr1", 8, 10], ["chr1", 12, 14],],
        columns=["chrom", "start", "end"],
    )
    df_annotated = bioframe.cluster(df1)
    assert (
        df_annotated["cluster"].values == np.array([0, 0, 0, 1])
    ).all()  # the last interval does not overlap the first three
    df_annotated = bioframe.cluster(df1, min_dist=2)
    assert (
        df_annotated["cluster"].values == np.array([0, 0, 0, 0])
    ).all()  # all intervals part of the same cluster

    df_annotated = bioframe.cluster(df1, min_dist=None)
    assert (
        df_annotated["cluster"].values == np.array([0, 0, 1, 2])
    ).all()  # adjacent intervals not clustered

    df1.iloc[0, 0] = "chrX"
    df_annotated = bioframe.cluster(df1)
    assert (
        df_annotated["cluster"].values == np.array([2, 0, 0, 1])
    ).all()  # do not cluster intervals across chromosomes

    # test consistency with pyranges (which automatically sorts df upon creation and uses 1-based indexing for clusters)
    assert (
        (bioframe_to_pyranges(df1).cluster(count=True).df["Cluster"].values - 1)
        == bioframe.cluster(df1.sort_values(["chrom", "start"]))["cluster"].values
    ).all()
github mirnylab / bioframe / tests / test_ops.py View on Github external
df_annotated = bioframe.cluster(df1, min_dist=None)
    assert (
        df_annotated["cluster"].values == np.array([0, 0, 1, 2])
    ).all()  # adjacent intervals not clustered

    df1.iloc[0, 0] = "chrX"
    df_annotated = bioframe.cluster(df1)
    assert (
        df_annotated["cluster"].values == np.array([2, 0, 0, 1])
    ).all()  # do not cluster intervals across chromosomes

    # test consistency with pyranges (which automatically sorts df upon creation and uses 1-based indexing for clusters)
    assert (
        (bioframe_to_pyranges(df1).cluster(count=True).df["Cluster"].values - 1)
        == bioframe.cluster(df1.sort_values(["chrom", "start"]))["cluster"].values
    ).all()

    # test on=[] argument
    df1 = pd.DataFrame(
        [
            ["chr1", 3, 8, "+", "cat", 5.5],
            ["chr1", 3, 8, "-", "dog", 6.5],
            ["chr1", 6, 10, "-", "cat", 6.5],
            ["chrX", 6, 10, "-", "cat", 6.5],
        ],
        columns=["chrom", "start", "end", "strand", "animal", "location"],
    )
    assert (
        bioframe.cluster(df1, on=["animal"])["cluster"].values == np.array([0, 1, 0, 2])
    ).all()
    assert (
github mirnylab / bioframe / tests / test_ops.py View on Github external
# test on=[] argument
    df1 = pd.DataFrame(
        [
            ["chr1", 3, 8, "+", "cat", 5.5],
            ["chr1", 3, 8, "-", "dog", 6.5],
            ["chr1", 6, 10, "-", "cat", 6.5],
            ["chrX", 6, 10, "-", "cat", 6.5],
        ],
        columns=["chrom", "start", "end", "strand", "animal", "location"],
    )
    assert (
        bioframe.cluster(df1, on=["animal"])["cluster"].values == np.array([0, 1, 0, 2])
    ).all()
    assert (
        bioframe.cluster(df1, on=["strand"])["cluster"].values == np.array([0, 1, 1, 2])
    ).all()
    assert (
        bioframe.cluster(df1, on=["location", "animal"])["cluster"].values
        == np.array([0, 2, 1, 3])
    ).all()