Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if options.as_obsexp:
# compute obs/exp
if options.global_obsexp: # compute global obs/exp
exp_chr = genome_hic_expected.iloc[ genome_hic_expected['chrom'].values ==mseq.chr][0:seq_len_pool]
if len(exp_chr) ==0:
raise ValueError('no expected values found for chr:'+mseq.chr)
exp_map= np.zeros((seq_len_pool,seq_len_pool))
for i in range(seq_len_pool):
set_diag(exp_map,exp_chr['balanced.avg'].values[i],i)
set_diag(exp_map,exp_chr['balanced.avg'].values[i],-i)
seq_hic_obsexp = seq_hic_smoothed / exp_map
for i in range(-options.diagonal_offset+1,options.diagonal_offset): set_diag(seq_hic_obsexp,1.0,i)
seq_hic_obsexp[seq_hic_nan] = np.nan
else: # compute local obs/exp
seq_hic_obsexp = observed_over_expected(seq_hic_smoothed, ~seq_hic_nan)[0]
# log
if options.no_log==False:
seq_hic_obsexp = np.log(seq_hic_obsexp)
seq_hic_obsexp = np.clip(seq_hic_obsexp, -options.clip, options.clip)
seq_hic_obsexp = interp_nan(seq_hic_obsexp)
for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 0,i)
else:
seq_hic_obsexp = np.clip(seq_hic_obsexp, 0, options.clip)
seq_hic_obsexp = interp_nan(seq_hic_obsexp)
for i in range(-options.diagonal_offset+1, options.diagonal_offset): set_diag(seq_hic_obsexp, 1,i)
# apply kernel
if kernel is not None:
seq_hic = convolve(seq_hic_obsexp, kernel)
else:
# adaptively coarsegrain based on raw counts
seq_hic_smoothed = adaptive_coarsegrain(
seq_hic_raw,
genome_hic_cool.matrix(balance=False).fetch(mseq_str),
cutoff= 2, max_levels=8)
#todo: pass an option to add a certain pseudocount value, or the minimum nonzero value
if options.as_obsexp == True:
# interpolate single missing bins
seq_hic_interpolated = interpolate_bad_singletons(seq_hic_smoothed, mask=(~seq_hic_nan),
fillDiagonal=True, returnMask=False, secondPass=True,verbose=False)
seq_hic_nan = np.isnan(seq_hic_interpolated)
# compute observed/expected
seq_hic_obsexp = observed_over_expected(seq_hic_interpolated, ~seq_hic_nan)[0]
# todo: allow passing a global expected rather than computing locally
# log
seq_hic_obsexp = np.log(seq_hic_obsexp)
# set nan to 0
seq_hic_obsexp = np.nan_to_num(seq_hic_obsexp)
# todo: make obsexp_clip an option for obs/exp
seq_hic = np.clip(seq_hic_obsexp,-2,2)
else:
# interpolate all missing bins
seq_hic_interpolated = interp_nan(seq_hic_smoothed)
# rescale
if chr_pre:
mseq_str = '%s:%d-%d' % (mseq.chr, mseq.start, mseq.end)
else:
mseq_str = '%s:%d-%d' % (mseq.chr[3:], mseq.start, mseq.end)
seq_hic_raw = genome_hic_cool.matrix(balance=True).fetch(mseq_str)
# interpolate
seq_hic_raw = interpolateNearest(seq_hic_raw)
# find minimum nonzero value
seq_hic_min = np.min(seq_hic_raw[seq_hic_raw > 0])
seq_hic_raw += seq_hic_min
# compute observed/expected
seq_hic_nan = np.isnan(seq_hic_raw)
seq_hic_obsexp = observed_over_expected(seq_hic_raw, ~seq_hic_nan)[0]
# log
seq_hic_obsexp = np.log(seq_hic_obsexp)
# set nan to 0
seq_hic_obsexp = np.nan_to_num(seq_hic_obsexp)
except ValueError:
print("WARNING: %s doesn't see %s. Setting to all zeros." % (genome_hic_file, mseq_str))
seq_hic_obsexp = np.zeros((seq_len_pool,seq_len_pool), dtype='float16')
# write
seqs_hic_open['seqs_hic'][si,:] = seq_hic_obsexp
# close sequences coverage file
seqs_hic_open.close()