How to use the bioframe.region.parse_region function in bioframe

To help you get started, we’ve selected a few bioframe examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mirnylab / bioframe / bioframe / formats.py View on Github external
f = pypairix.open(fp, 'r')

    header = f.get_header()
    if len(header):
        header_groups = toolz.groupby(lambda x: x.split(':')[0], header)
        if '#chromsize' in header_groups and chromsizes is None:
            items = [line.split()[1:] for line in header_groups['#chromsize']]
            if len(items) and chromsizes is None:
                names, lengths = zip(*((item[0], int(item[1])) for item in items))
                chromsizes = pd.Series(index=names, data=lengths)
        if '#columns' in header_groups and columns is None:
            columns = header_groups['#columns'][0].split()[1:]

    chrom1, start1, end1 = parse_region(region1, chromsizes)
    if region2 is not None:
        chrom2, start2, end2 = parse_region(region2, chromsizes)
    else:
        chrom2, start2, end2 = chrom1, start1, end1

    it = f.query2D(chrom1, start1, end1, chrom2, start2, end2)
    if usecols is not None:
        argusecols = [columns.index(col) for col in usecols]
        records = [
            (record[i] for i in argusecols) for record in it
        ]
        columns = usecols
    else:
        records = it

    df = pd.DataFrame.from_records(records, columns=columns)
    if columns is not None:
        for col in columns:
github mirnylab / bioframe / bioframe / formats.py View on Github external
if dtypes is None:
        dtypes = {}
    f = pypairix.open(fp, 'r')

    header = f.get_header()
    if len(header):
        header_groups = toolz.groupby(lambda x: x.split(':')[0], header)
        if '#chromsize' in header_groups and chromsizes is None:
            items = [line.split()[1:] for line in header_groups['#chromsize']]
            if len(items) and chromsizes is None:
                names, lengths = zip(*((item[0], int(item[1])) for item in items))
                chromsizes = pd.Series(index=names, data=lengths)
        if '#columns' in header_groups and columns is None:
            columns = header_groups['#columns'][0].split()[1:]

    chrom1, start1, end1 = parse_region(region1, chromsizes)
    if region2 is not None:
        chrom2, start2, end2 = parse_region(region2, chromsizes)
    else:
        chrom2, start2, end2 = chrom1, start1, end1

    it = f.query2D(chrom1, start1, end1, chrom2, start2, end2)
    if usecols is not None:
        argusecols = [columns.index(col) for col in usecols]
        records = [
            (record[i] for i in argusecols) for record in it
        ]
        columns = usecols
    else:
        records = it

    df = pd.DataFrame.from_records(records, columns=columns)
github mirnylab / bioframe / bioframe / frameops.py View on Github external
def bedbisect(bedf, region):
    """Returns the span of a block of rows corresponding to
    the genomic region.
    Rows must be sorted by `start` and `end`;
    `chrom` must be grouped, but does not have to be sorted.

    """
    chrom, start, end = parse_region(region)

    lo, hi = _find_block_span(bedf.chrom.values, chrom)

    lo += bedf['end'].values[lo:hi].searchsorted(start, side='right')
    if end is not None:
        hi = lo + bedf['start'].values[lo:hi].searchsorted(end, side='left')
#    else:
#        hi = None This only works when bedf is a groupby object.
    return lo, hi