Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
'v': v,
'nonamecheck': False,
}
if outer_join:
if overlap:
bt_kwargs['wao'] = True
bt_kwargs['loj'] = False
else:
bt_kwargs['wao'] = False
bt_kwargs['loj'] = True
else:
if overlap:
bt_kwargs['wo'] = True
with tsv(left) as a, tsv(right) as b:
out = bedtools.intersect(a=a.name, b=b.name, **bt_kwargs)
bed1_extra_out = bed1_extra.iloc[out[3]].reset_index(drop=True)
if v:
out_final = pd.concat([out, bed1_extra_out], axis=1)
else:
if outer_join:
out[4] = out[4].where(out[4] != '.')
out[7] = out[7].where(out[7] != '.', -1).astype(int)
bed2_extra_out = pd.DataFrame.from_items([
(name, pd.Series(data=None, index=out.index, dtype=series.dtype))
for name, series in bed2_extra.iteritems()])
mask = (out[7] != -1)
bed2_extra_out.loc[mask, :] = bed2_extra.iloc[out[7][mask]].values
def wrapper(**kwargs):
columns = kwargs.pop('_schema', None)
run_kws = {}
pandas_inputs = {}
for arg in list(kwargs.keys()):
if arg.startswith('_'):
run_kws[arg[1:]] = kwargs.pop(arg)
elif isinstance(kwargs[arg], pd.DataFrame):
tmp_file = tsv(kwargs[arg])
pandas_inputs[arg] = tmp_file
kwargs[arg] = tmp_file.name
cmd = ['bedtools', name]
for k, v in kwargs.items():
if isinstance(v, bool):
if not v: continue
cmd.append('-{}'.format(k))
else:
cmd.append('-{}'.format(k))
cmd.append(str(v))
try:
out = run(cmd, **run_kws)
finally:
for tmp_file in pandas_inputs.values():
def frac_gene_coverage(bintable, mrna):
from .tools import bedtools
if isinstance(mrna, six.string_types):
from .resources import UCSCClient
mrna=UCSCClient(mrna).fetch_mrna().rename(
columns={'tName': 'chrom', 'tStart': 'start', 'tEnd': 'end'})
mrna = mrna.sort_values(['chrom','start','end']).reset_index(drop=True)
with tsv(bintable) as a, tsv(mrna[['chrom','start','end']]) as b:
cov = bedtools.coverage(a=a.name, b=b.name)
bintable = bintable.copy()
bintable['gene_count'] = cov.iloc[:,-4]
bintable['gene_coverage'] = cov.iloc[:,-1]
return bintable