How to use the bioframe._process.tsv function in bioframe

To help you get started, we’ve selected a few bioframe examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mirnylab / bioframe / bioframe / tools.py View on Github external
'v': v,
        'nonamecheck': False,
    }

    if outer_join:
        if overlap:
            bt_kwargs['wao'] = True
            bt_kwargs['loj'] = False
        else:
            bt_kwargs['wao'] = False
            bt_kwargs['loj'] = True
    else:
        if overlap:
            bt_kwargs['wo'] = True

    with tsv(left) as a, tsv(right) as b:
        out = bedtools.intersect(a=a.name, b=b.name, **bt_kwargs)

    bed1_extra_out = bed1_extra.iloc[out[3]].reset_index(drop=True)

    if v:
        out_final = pd.concat([out, bed1_extra_out], axis=1)
    else:
        if outer_join:
            out[4] = out[4].where(out[4] != '.')
            out[7] = out[7].where(out[7] != '.', -1).astype(int)

            bed2_extra_out = pd.DataFrame.from_items([
                (name, pd.Series(data=None, index=out.index, dtype=series.dtype))
                for name, series in bed2_extra.iteritems()])
            mask = (out[7] != -1)
            bed2_extra_out.loc[mask, :] = bed2_extra.iloc[out[7][mask]].values
github mirnylab / bioframe / bioframe / tools.py View on Github external
def wrapper(**kwargs):
            columns = kwargs.pop('_schema', None)

            run_kws = {}
            pandas_inputs = {}
            for arg in list(kwargs.keys()):
                if arg.startswith('_'):
                    run_kws[arg[1:]] = kwargs.pop(arg)
                elif isinstance(kwargs[arg], pd.DataFrame):
                    tmp_file = tsv(kwargs[arg])
                    pandas_inputs[arg] = tmp_file
                    kwargs[arg] = tmp_file.name

            cmd = ['bedtools', name]
            for k, v in kwargs.items():
                if isinstance(v, bool):
                    if not v: continue
                    cmd.append('-{}'.format(k))
                else:
                    cmd.append('-{}'.format(k))
                    cmd.append(str(v))

            try:
                out = run(cmd, **run_kws)
            finally:
                for tmp_file in pandas_inputs.values():
github mirnylab / bioframe / bioframe / frameops.py View on Github external
def frac_gene_coverage(bintable, mrna):

    from .tools import bedtools

    if isinstance(mrna, six.string_types):
        from .resources import UCSCClient
        mrna=UCSCClient(mrna).fetch_mrna().rename(
            columns={'tName': 'chrom', 'tStart': 'start', 'tEnd': 'end'})

    mrna = mrna.sort_values(['chrom','start','end']).reset_index(drop=True)

    with tsv(bintable) as a, tsv(mrna[['chrom','start','end']]) as b:
        cov = bedtools.coverage(a=a.name, b=b.name)

    bintable = bintable.copy()
    bintable['gene_count'] = cov.iloc[:,-4]
    bintable['gene_coverage'] = cov.iloc[:,-1]

    return bintable